From da99ddeaa0687a776c3ffc44d04880f450033a96 Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Thu, 25 Jul 2024 12:27:20 +0000 Subject: [PATCH 01/86] Deprecate COMBFLEN also for GPU --- src/trans/gpu/external/setup_trans0.F90 | 10 ++++++---- src/trans/gpu/internal/tpm_distr.F90 | 1 - 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/trans/gpu/external/setup_trans0.F90 b/src/trans/gpu/external/setup_trans0.F90 index efa5b87b9..36ddbcfba 100755 --- a/src/trans/gpu/external/setup_trans0.F90 +++ b/src/trans/gpu/external/setup_trans0.F90 @@ -34,7 +34,7 @@ SUBROUTINE SETUP_TRANS0(KOUT,KERR,KPRINTLEV,KMAX_RESOL,KPROMATR,& ! KPRGPNS - splitting level in N-S direction in grid-point space [1] ! KPRGPEW - splitting level in E-W direction in grid-point space [1] ! KPRTRW - splitting level in wave direction in spectral space [1] -! KCOMBFLEN - Size of communication buffer [1800000 (*8bytes) ] +! KCOMBFLEN - Size of communication buffer [1800000 (*8bytes) ] (deprecated) ! LDMPOFF - switch off message passing [false] ! LDSYNC_TRANS - switch to activate barriers in trmtol trltom [false] ! KTRANS_SYNC_LEVEL - use of synchronization/blocking [0] @@ -74,7 +74,7 @@ SUBROUTINE SETUP_TRANS0(KOUT,KERR,KPRINTLEV,KMAX_RESOL,KPROMATR,& USE TPM_GEN ,ONLY : NERR, NOUT, LMPOFF, LSYNC_TRANS, NTRANS_SYNC_LEVEL, MSETUP0, & & NMAX_RESOL, NPRINTLEV, NPROMATR, LALLOPERM -USE TPM_DISTR ,ONLY : LEQ_REGIONS, NCOMBFLEN, NPRGPEW,NPRGPNS, NPRTRW, NPRTRV, MYSETV +USE TPM_DISTR ,ONLY : LEQ_REGIONS, NPRGPEW,NPRGPNS, NPRTRW, NPRTRV, MYSETV USE TPM_CONSTANTS ,ONLY : RA USE MPL_MODULE @@ -171,7 +171,6 @@ SUBROUTINE SETUP_TRANS0(KOUT,KERR,KPRINTLEV,KMAX_RESOL,KPROMATR,& N_REGIONS_NS=1 N_REGIONS_EW=1 NPROMATR = 0 -NCOMBFLEN = 1800000 LMPOFF = .FALSE. LSYNC_TRANS=.FALSE. NTRANS_SYNC_LEVEL=0 @@ -239,7 +238,10 @@ SUBROUTINE SETUP_TRANS0(KOUT,KERR,KPRINTLEV,KMAX_RESOL,KPROMATR,& NPRTRW = KPRTRW ENDIF IF(PRESENT(KCOMBFLEN)) THEN - NCOMBFLEN = KCOMBFLEN + WRITE(NOUT,'(A)') + WRITE(NOUT,'(A)') '*** WARNING ***' + WRITE(NOUT,'(A)') 'KCOMBFLEN argument passed to SETUP_TRANS0 is deprecated' + WRITE(NOUT,'(A)') ENDIF IF(PRESENT(LDMPOFF)) THEN LMPOFF = LDMPOFF diff --git a/src/trans/gpu/internal/tpm_distr.F90 b/src/trans/gpu/internal/tpm_distr.F90 index 088122929..990c80145 100755 --- a/src/trans/gpu/internal/tpm_distr.F90 +++ b/src/trans/gpu/internal/tpm_distr.F90 @@ -33,7 +33,6 @@ MODULE TPM_DISTR INTEGER(KIND=JPIM) :: MYPROC ! My processor number INTEGER(KIND=JPIM) :: MYSETW ! My set number in wave direction (spectral space) INTEGER(KIND=JPIM) :: MYSETV ! My set number in field direction(S.S and F.S) -INTEGER(KIND=JPIM) :: NCOMBFLEN ! Size of communication buffer INTEGER(KIND=JPIM) :: MTAGLETR ! Tag INTEGER(KIND=JPIM) :: MTAGML ! Tag From 4f4c6fdba5ed095bf65af491aa02defeaff62ad5 Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Tue, 30 Jul 2024 19:50:53 +0300 Subject: [PATCH 02/86] Tidy up USE statements in GPU code path --- src/trans/cpu/internal/suleg_mod.F90 | 2 - src/trans/gpu/algor/hicblas_mod.F90 | 23 ++++--- src/trans/gpu/algor/seefmm_mix.F90 | 6 +- src/trans/gpu/algor/wts500_mod.F90 | 3 +- src/trans/gpu/external/dir_trans.F90 | 27 ++++---- src/trans/gpu/external/dir_transad.F90 | 4 +- src/trans/gpu/external/dist_grid.F90 | 16 ++--- src/trans/gpu/external/dist_grid_32.F90 | 16 ++--- src/trans/gpu/external/dist_spec.F90 | 19 +++-- src/trans/gpu/external/gath_grid.F90 | 16 ++--- src/trans/gpu/external/gath_grid_32.F90 | 16 ++--- src/trans/gpu/external/gath_spec.F90 | 19 +++-- src/trans/gpu/external/get_current.F90 | 6 +- src/trans/gpu/external/gpnorm_trans.F90 | 33 ++++----- src/trans/gpu/external/gpnorm_trans_gpu.F90 | 28 ++++---- src/trans/gpu/external/ini_spec_dist.F90 | 9 ++- src/trans/gpu/external/inv_trans.F90 | 36 ++++------ src/trans/gpu/external/inv_transad.F90 | 4 +- src/trans/gpu/external/setup_trans.F90 | 69 +++++++++---------- src/trans/gpu/external/setup_trans0.F90 | 25 ++++--- src/trans/gpu/external/specnorm.F90 | 13 ++-- src/trans/gpu/external/sugawc.F90 | 4 +- src/trans/gpu/external/trans_end.F90 | 30 ++++---- src/trans/gpu/external/trans_inq.F90 | 22 +++--- src/trans/gpu/external/trans_pnm.F90 | 23 +++---- src/trans/gpu/external/trans_release.F90 | 4 +- src/trans/gpu/external/vordiv_to_uv.F90 | 15 ++-- src/trans/gpu/internal/abort_trans_mod.F90 | 8 +-- .../gpu/internal/buffered_allocator_mod.F90 | 16 ++--- src/trans/gpu/internal/cdmap_mod.F90 | 12 ++-- src/trans/gpu/internal/cpledn_mod.F90 | 2 +- src/trans/gpu/internal/dealloc_resol_mod.F90 | 25 +++---- src/trans/gpu/internal/dir_trans_ctl_mod.F90 | 28 ++++---- .../gpu/internal/dist_grid_32_ctl_mod.F90 | 17 +++-- src/trans/gpu/internal/dist_grid_ctl_mod.F90 | 18 +++-- .../gpu/internal/dist_spec_control_mod.F90 | 15 ++-- src/trans/gpu/internal/eq_regions_mod.F90 | 12 +--- src/trans/gpu/internal/ext_acc.F90 | 18 ++--- src/trans/gpu/internal/field_split_mod.F90 | 8 +-- src/trans/gpu/internal/fsc_mod.F90 | 21 +++--- src/trans/gpu/internal/ftdir_mod.F90 | 18 ++--- src/trans/gpu/internal/ftinv_mod.F90 | 19 +++-- .../gpu/internal/gath_grid_32_ctl_mod.F90 | 16 ++--- src/trans/gpu/internal/gath_grid_ctl_mod.F90 | 18 ++--- .../gpu/internal/gath_spec_control_mod.F90 | 18 ++--- src/trans/gpu/internal/gawl_mod.F90 | 5 +- src/trans/gpu/internal/inigptr_mod.F90 | 13 ++-- src/trans/gpu/internal/inv_trans_ctl_mod.F90 | 29 ++++---- src/trans/gpu/internal/ledir_mod.F90 | 35 ++++------ src/trans/gpu/internal/leinv_mod.F90 | 33 ++++----- src/trans/gpu/internal/ltdir_mod.F90 | 48 ++++++------- src/trans/gpu/internal/ltinv_mod.F90 | 50 +++++++------- src/trans/gpu/internal/myrecvset_mod.F90 | 4 +- src/trans/gpu/internal/mysendset_mod.F90 | 4 +- src/trans/gpu/internal/parkind_ectrans.F90 | 2 +- src/trans/gpu/internal/pe2set_mod.F90 | 10 ++- src/trans/gpu/internal/pre_suleg_mod.F90 | 12 ++-- src/trans/gpu/internal/prepsnm_mod.F90 | 10 ++- src/trans/gpu/internal/prfi1_mod.F90 | 8 +-- src/trans/gpu/internal/prfi1b_mod.F90 | 9 +-- src/trans/gpu/internal/read_legpol_mod.F90 | 21 +++--- src/trans/gpu/internal/set2pe_mod.F90 | 9 ++- src/trans/gpu/internal/set_resol_mod.F90 | 24 +++---- src/trans/gpu/internal/setup_dims_mod.F90 | 7 +- src/trans/gpu/internal/setup_geom_mod.F90 | 13 ++-- src/trans/gpu/internal/shuffle_mod.F90 | 7 +- src/trans/gpu/internal/spnorm_ctl_mod.F90 | 12 ++-- src/trans/gpu/internal/spnormc_mod.F90 | 11 ++- src/trans/gpu/internal/spnormd_mod.F90 | 7 +- src/trans/gpu/internal/spnsde_mod.F90 | 12 ++-- src/trans/gpu/internal/sufft_mod.F90 | 13 ++-- src/trans/gpu/internal/sugaw_mod.F90 | 18 +++-- src/trans/gpu/internal/suleg_mod.F90 | 52 +++++++------- src/trans/gpu/internal/sump_trans0_mod.F90 | 24 +++---- src/trans/gpu/internal/sump_trans_mod.F90 | 27 +++----- .../gpu/internal/sump_trans_preleg_mod.F90 | 13 ++-- src/trans/gpu/internal/sumplat_mod.F90 | 14 ++-- src/trans/gpu/internal/sumplatb_mod.F90 | 6 +- src/trans/gpu/internal/sumplatbeq_mod.F90 | 9 ++- src/trans/gpu/internal/sumplatf_mod.F90 | 8 +-- src/trans/gpu/internal/supol_mod.F90 | 4 +- src/trans/gpu/internal/supolf_mod.F90 | 5 +- src/trans/gpu/internal/sustaonl_mod.F90 | 21 +++--- src/trans/gpu/internal/sutrle_mod.F90 | 18 +++-- src/trans/gpu/internal/suwavedi_mod.F90 | 2 +- src/trans/gpu/internal/tpm_constants.F90 | 2 +- src/trans/gpu/internal/tpm_ctl.F90 | 4 +- src/trans/gpu/internal/tpm_dim.F90 | 2 +- src/trans/gpu/internal/tpm_distr.F90 | 2 +- src/trans/gpu/internal/tpm_fft.F90 | 2 +- src/trans/gpu/internal/tpm_fields.F90 | 2 +- src/trans/gpu/internal/tpm_flt.F90 | 4 +- src/trans/gpu/internal/tpm_gen.F90 | 2 +- src/trans/gpu/internal/tpm_geometry.F90 | 2 +- src/trans/gpu/internal/tpm_hicfft.F90 | 13 ++-- src/trans/gpu/internal/tpm_pol.F90 | 2 +- src/trans/gpu/internal/tpm_stats.F90 | 6 +- src/trans/gpu/internal/tpm_trans.F90 | 3 +- src/trans/gpu/internal/trgtol_mod.F90 | 45 ++++++------ src/trans/gpu/internal/trltog_mod.F90 | 43 ++++++------ src/trans/gpu/internal/trltom_mod.F90 | 26 +++---- src/trans/gpu/internal/trltom_pack_unpack.F90 | 43 ++++++------ src/trans/gpu/internal/trmtol_mod.F90 | 26 +++---- src/trans/gpu/internal/trmtol_pack_unpack.F90 | 42 +++++------ src/trans/gpu/internal/updsp_mod.F90 | 10 ++- src/trans/gpu/internal/updspb_mod.F90 | 8 +-- src/trans/gpu/internal/uvtvd_mod.F90 | 10 ++- src/trans/gpu/internal/vd2uv_ctl_mod.F90 | 8 +-- src/trans/gpu/internal/vd2uv_mod.F90 | 18 +++-- src/trans/gpu/internal/vdtuv_mod.F90 | 11 ++- src/trans/gpu/internal/write_legpol_mod.F90 | 17 +++-- 111 files changed, 806 insertions(+), 953 deletions(-) diff --git a/src/trans/cpu/internal/suleg_mod.F90 b/src/trans/cpu/internal/suleg_mod.F90 index 619666c6d..c51a3574d 100644 --- a/src/trans/cpu/internal/suleg_mod.F90 +++ b/src/trans/cpu/internal/suleg_mod.F90 @@ -17,7 +17,6 @@ SUBROUTINE SULEG USE PARKIND2 ,ONLY : JPRH USE MPL_MODULE ,ONLY : MPL_BYTES, MPL_BARRIER, JP_NON_BLOCKING_STANDARD, MPL_RECV, & & MPL_SEND, MPL_WAIT - USE TPM_GEN ,ONLY : NOUT, LMPOFF, NPRINTLEV USE TPM_DIM ,ONLY : R USE TPM_CONSTANTS ,ONLY : RA @@ -27,7 +26,6 @@ SUBROUTINE SULEG USE TPM_GEOMETRY ,ONLY : G USE TPM_CTL ,ONLY : C USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS - USE PRE_SULEG_MOD ,ONLY : PRE_SULEG USE SUGAW_MOD ,ONLY : SUGAW USE SUPOL_MOD ,ONLY : SUPOL diff --git a/src/trans/gpu/algor/hicblas_mod.F90 b/src/trans/gpu/algor/hicblas_mod.F90 index d6e2a5a35..8e900856f 100644 --- a/src/trans/gpu/algor/hicblas_mod.F90 +++ b/src/trans/gpu/algor/hicblas_mod.F90 @@ -16,9 +16,8 @@ MODULE HICBLAS_MOD -USE PARKIND1, ONLY : JPIM, JPRM, JPRD +USE PARKIND1, ONLY: JPIM, JPRM, JPRD USE GROWING_ALLOCATOR_MOD, ONLY: GROWING_ALLOCATION_TYPE -USE ISO_C_BINDING USE OPENACC_LIB, ONLY: ACC_GET_HIP_STREAM IMPLICIT NONE @@ -41,7 +40,7 @@ MODULE HICBLAS_MOD ! SUBROUTINE HIP_SGEMM(CTA, CTB, M, N, K,& ALPHA, A, LDA, B, LDB, BETA, C, LDC) BIND(C,NAME='hipblasSgemm') -USE ISO_C_BINDING +USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_FLOAT CHARACTER(1,C_CHAR),VALUE :: CTA, CTB INTEGER(C_INT), VALUE :: M,N,K,LDA,LDB,LDC REAL(C_FLOAT), VALUE :: ALPHA,BETA @@ -57,7 +56,7 @@ END SUBROUTINE HIP_SGEMM ! SUBROUTINE HIP_DGEMM(CTA, CTB, M, N, K,& ALPHA, A, LDA, B, LDB, BETA, C, LDC) BIND(C,NAME='hipblasDgemm') -USE ISO_C_BINDING +USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_DOUBLE CHARACTER(1,C_CHAR),VALUE :: CTA, CTB INTEGER(C_INT), VALUE :: M,N,K,LDA,LDB,LDC REAL(C_DOUBLE), VALUE :: ALPHA,BETA @@ -78,7 +77,7 @@ SUBROUTINE HIP_DGEMM_BATCHED( & & C, LDC, TDC, & & BATCHCOUNT, STREAM, ALLOC & &) BIND(C, NAME='hipblas_dgemm_wrapper') - USE ISO_C_BINDING + USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_DOUBLE, C_SIZE_T, C_PTR CHARACTER(1,C_CHAR), VALUE :: CTA, CTB INTEGER(C_INT), VALUE :: M, N, K, LDA, LDB, LDC, TDA, TDB, TDC, BATCHCOUNT REAL(C_DOUBLE), VALUE :: ALPHA,BETA @@ -101,7 +100,7 @@ SUBROUTINE HIP_DGEMM_STRIDED_BATCHED(& & C, LDC, TDC, & & BATCHCOUNT, STREAM & &) BIND(C, NAME='hipblasDgemmStridedBatched_wrapper') - USE ISO_C_BINDING + USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_DOUBLE, C_SIZE_T CHARACTER(1,C_CHAR), VALUE :: CTA, CTB INTEGER(C_INT), VALUE :: M, N, K, LDA, LDB, LDC, BATCHCOUNT INTEGER(C_INT), VALUE :: TDA,TDB,TDC @@ -129,7 +128,7 @@ SUBROUTINE HIP_SGEMM_BATCHED( & & C, LDC, TDC, & & BATCHCOUNT, STREAM, ALLOC & &) BIND(C, NAME='hipblas_sgemm_wrapper') - USE ISO_C_BINDING + USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_FLOAT, C_SIZE_T, C_PTR CHARACTER(1,C_CHAR), VALUE :: CTA, CTB INTEGER(C_INT), VALUE :: M, N, K, LDA, LDB, LDC, TDA, TDB, TDC, BATCHCOUNT REAL(C_FLOAT), VALUE :: ALPHA, BETA @@ -152,7 +151,7 @@ SUBROUTINE HIP_SGEMM_STRIDED_BATCHED(& & C, LDC, TDC, & & BATCHCOUNT, STREAM & &) BIND(C, NAME='hipblasSgemmStridedBatched_wrapper') - USE ISO_C_BINDING + USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_FLOAT, C_SIZE_T CHARACTER(1,C_CHAR), VALUE :: CTA, CTB INTEGER(C_INT), VALUE :: M, N, K, LDA, LDB, LDC, BATCHCOUNT INTEGER(C_INT), VALUE :: TDA,TDB,TDC @@ -180,7 +179,7 @@ SUBROUTINE HIP_DGEMM_GROUPED( & & C, LDC, OFFSETC, & & BATCHCOUNT, STREAM, ALLOC & &) BIND(C, NAME='blas_dgemm_wrapper_grouped') - USE ISO_C_BINDING + USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_DOUBLE, C_SIZE_T, C_PTR CHARACTER(1,C_CHAR), VALUE :: CTA, CTB INTEGER(C_INT), VALUE :: BLAS_ID, M, LDA, LDB, LDC, BATCHCOUNT INTEGER(C_INT) :: N(*), K(*), OFFSETA(*), OFFSETB(*), OFFSETC(*) @@ -199,7 +198,7 @@ SUBROUTINE HIP_SGEMM_GROUPED( & & C, LDC, OFFSETC, & & BATCHCOUNT, STREAM, ALLOC & &) BIND(C, NAME='blas_sgemm_wrapper_grouped') - USE ISO_C_BINDING + USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_FLOAT, C_SIZE_T, C_PTR CHARACTER(1,C_CHAR), VALUE :: CTA, CTB INTEGER(C_INT), VALUE :: BLAS_ID, M, LDA, LDB, LDC, BATCHCOUNT INTEGER(C_INT) :: N(*), K(*), OFFSETA(*), OFFSETB(*), OFFSETC(*) @@ -221,6 +220,7 @@ SUBROUTINE HIP_DGEMM_BATCHED_OVERLOAD( & & BETA, & & CARRAY, LDC, STRIDEC, & & BATCHCOUNT, STREAM, ALLOC) + USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_LONG, C_LOC CHARACTER(1,C_CHAR), VALUE :: TRANSA, TRANSB INTEGER(KIND=JPIM) :: M INTEGER(KIND=JPIM) :: N @@ -270,6 +270,7 @@ SUBROUTINE HIP_SGEMM_BATCHED_OVERLOAD( & & BETA, & & CARRAY, LDC, STRIDEC, & & BATCHCOUNT, STREAM, ALLOC) + USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_LONG, C_LOC CHARACTER(1,C_CHAR), VALUE :: TRANSA, TRANSB INTEGER(KIND=JPIM) :: M INTEGER(KIND=JPIM) :: N @@ -314,6 +315,7 @@ SUBROUTINE HIP_DGEMM_GROUPED_OVERLOAD( & & BETA, & & CARRAY, LDC, OFFSETC, & & BATCHCOUNT, STREAM, ALLOC) + USE ISO_C_BINDING, ONLY: C_INT, C_CHAR, C_LONG, C_LOC INTEGER(KIND=C_INT), INTENT(IN) :: BLAS_ID CHARACTER(1,C_CHAR), VALUE :: TRANSA, TRANSB INTEGER(KIND=JPIM) :: M @@ -359,6 +361,7 @@ SUBROUTINE HIP_SGEMM_GROUPED_OVERLOAD(& & BETA, & & CARRAY, LDC, OFFSETC, & & BATCHCOUNT, STREAM, ALLOC) + USE ISO_C_BINDING, ONLY: C_INT, C_CHAR, C_LONG, C_LOC INTEGER(KIND=C_INT), INTENT(IN) :: BLAS_ID CHARACTER(1,C_CHAR), VALUE :: TRANSA, TRANSB INTEGER(KIND=JPIM) :: M diff --git a/src/trans/gpu/algor/seefmm_mix.F90 b/src/trans/gpu/algor/seefmm_mix.F90 index 5fa125350..2cf7e2714 100644 --- a/src/trans/gpu/algor/seefmm_mix.F90 +++ b/src/trans/gpu/algor/seefmm_mix.F90 @@ -38,9 +38,9 @@ module seefmm_mix ! ------------------------------------------------------------------ -use parkind_ectrans,only : jpim ,JPRBT, jprd -use ecsort_mix -use wts500_mod +use parkind_ectrans, only: jpim, jprbt, jprd +use ecsort_mix, only: keysort +use wts500_mod, only: wts500 integer(kind=jpim) :: nfmm_lim=200 ! Appr. break-even limit for FMM integer(kind=jpim),parameter :: nquadEm14=28 ! Quadrature size for eps~=1.e-14 diff --git a/src/trans/gpu/algor/wts500_mod.F90 b/src/trans/gpu/algor/wts500_mod.F90 index 0859d78e0..dc45a4091 100644 --- a/src/trans/gpu/algor/wts500_mod.F90 +++ b/src/trans/gpu/algor/wts500_mod.F90 @@ -11,7 +11,8 @@ MODULE WTS500_MOD CONTAINS SUBROUTINE WTS500(PX,PW,KN) -USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT +USE EC_PARKIND, ONLY: JPIM +USE PARKIND_ECTRANS, ONLY: JPRBT IMPLICIT NONE INTEGER(KIND=JPIM), INTENT(IN) :: KN diff --git a/src/trans/gpu/external/dir_trans.F90 b/src/trans/gpu/external/dir_trans.F90 index e3541a5d7..5cf48fddf 100755 --- a/src/trans/gpu/external/dir_trans.F90 +++ b/src/trans/gpu/external/dir_trans.F90 @@ -108,23 +108,22 @@ SUBROUTINE DIR_TRANS(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB +USE PARKIND1, ONLY: JPIM, JPRB !ifndef INTERFACE -USE TPM_GEN ,ONLY : NERR, NOUT, LSYNC_TRANS -USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, LATLON, & - & NF_SC2, NF_SC3A, NF_SC3B, & - & NGPBLKS, NPROMA -USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV -USE TPM_FLT ,ONLY : S -USE TPM_GEOMETRY ,ONLY : G -USE SET_RESOL_MOD ,ONLY : SET_RESOL -USE DIR_TRANS_CTL_MOD,ONLY : DIR_TRANS_CTL -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS -USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK -USE MPL_MODULE ,ONLY : MPL_BARRIER -USE TPM_STATS ,ONLY : GSTATS => GSTATS_NVTX +USE TPM_GEN, ONLY: NERR, NOUT, LSYNC_TRANS +USE TPM_TRANS, ONLY: LDIVGP, LSCDERS, LUVDER, LVORGP, LATLON, NF_SC2, NF_SC3A, NF_SC3B, & + & NGPBLKS, NPROMA +USE TPM_DISTR, ONLY: D, NPRTRV, MYSETV +USE TPM_FLT, ONLY: S +USE TPM_GEOMETRY, ONLY: G +USE SET_RESOL_MOD, ONLY: SET_RESOL +USE DIR_TRANS_CTL_MOD, ONLY: DIR_TRANS_CTL +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK +USE MPL_MODULE, ONLY: MPL_BARRIER +USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX !endif INTERFACE diff --git a/src/trans/gpu/external/dir_transad.F90 b/src/trans/gpu/external/dir_transad.F90 index ee2f0723f..39d8c3190 100755 --- a/src/trans/gpu/external/dir_transad.F90 +++ b/src/trans/gpu/external/dir_transad.F90 @@ -106,11 +106,11 @@ SUBROUTINE DIR_TRANSAD(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB +USE PARKIND1, ONLY: JPIM, JPRB !ifndef INTERFACE -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS !endif INTERFACE diff --git a/src/trans/gpu/external/dist_grid.F90 b/src/trans/gpu/external/dist_grid.F90 index 05f34ee5a..bdf0d6a48 100755 --- a/src/trans/gpu/external/dist_grid.F90 +++ b/src/trans/gpu/external/dist_grid.F90 @@ -48,18 +48,16 @@ SUBROUTINE DIST_GRID(PGPG,KPROMA,KFDISTG,KFROM,KRESOL,PGP,KSORT) ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB +USE PARKIND1, ONLY: JPIM, JPRB !ifndef INTERFACE -USE TPM_GEN ,ONLY : NERR, NOUT -!USE TPM_DIM -USE TPM_DISTR ,ONLY : D, MYPROC, NPROC - -USE SET_RESOL_MOD ,ONLY : SET_RESOL -USE DIST_GRID_CTL_MOD ,ONLY : DIST_GRID_CTL -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS -USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE TPM_GEN, ONLY: NERR, NOUT +USE TPM_DISTR, ONLY: D, MYPROC, NPROC +USE SET_RESOL_MOD, ONLY: SET_RESOL +USE DIST_GRID_CTL_MOD, ONLY: DIST_GRID_CTL +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK !endif INTERFACE diff --git a/src/trans/gpu/external/dist_grid_32.F90 b/src/trans/gpu/external/dist_grid_32.F90 index ab3d3f00e..b60288acf 100755 --- a/src/trans/gpu/external/dist_grid_32.F90 +++ b/src/trans/gpu/external/dist_grid_32.F90 @@ -46,18 +46,16 @@ SUBROUTINE DIST_GRID_32(PGPG,KPROMA,KFDISTG,KFROM,KRESOL,PGP) ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRM ,JPRB +USE PARKIND1, ONLY: JPIM, JPRM !ifndef INTERFACE -USE TPM_GEN -USE TPM_DIM -USE TPM_DISTR - -USE SET_RESOL_MOD -USE DIST_GRID_32_CTL_MOD -USE ABORT_TRANS_MOD -USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE TPM_GEN, ONLY: NERR, NOUT +USE TPM_DISTR, ONLY: D, NPROC, MYPROC +USE SET_RESOL_MOD, ONLY: SET_RESOL +USE DIST_GRID_32_CTL_MOD, ONLY: DIST_GRID_32_CTL +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK !endif INTERFACE diff --git a/src/trans/gpu/external/dist_spec.F90 b/src/trans/gpu/external/dist_spec.F90 index 084c7474b..b823aeae4 100755 --- a/src/trans/gpu/external/dist_spec.F90 +++ b/src/trans/gpu/external/dist_spec.F90 @@ -49,19 +49,18 @@ SUBROUTINE DIST_SPEC(PSPECG,KFDISTG,KFROM,KVSET,KRESOL,PSPEC,& ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB +USE PARKIND1, ONLY: JPIM, JPRB !ifndef INTERFACE -USE TPM_GEN ,ONLY : NERR -USE TPM_DIM ,ONLY : R -USE TPM_DISTR ,ONLY : D, NPRTRV, NPRTRW, MYSETV, MYSETW, MYPROC, NPROC - -USE SET_RESOL_MOD ,ONLY : SET_RESOL -USE DIST_SPEC_CONTROL_MOD ,ONLY : DIST_SPEC_CONTROL -USE SUWAVEDI_MOD ,ONLY : SUWAVEDI -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS -USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE TPM_GEN, ONLY: NERR +USE TPM_DIM, ONLY: R +USE TPM_DISTR, ONLY: D, NPRTRV, NPRTRW, MYSETV, MYSETW, MYPROC, NPROC +USE SET_RESOL_MOD, ONLY: SET_RESOL +USE DIST_SPEC_CONTROL_MOD, ONLY: DIST_SPEC_CONTROL +USE SUWAVEDI_MOD, ONLY: SUWAVEDI +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK !endif INTERFACE diff --git a/src/trans/gpu/external/gath_grid.F90 b/src/trans/gpu/external/gath_grid.F90 index 8cfcc40e5..798224945 100755 --- a/src/trans/gpu/external/gath_grid.F90 +++ b/src/trans/gpu/external/gath_grid.F90 @@ -46,18 +46,16 @@ SUBROUTINE GATH_GRID(PGPG,KPROMA,KFGATHG,KTO,KRESOL,PGP) ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB +USE PARKIND1, ONLY: JPIM, JPRB !ifndef INTERFACE -USE TPM_GEN ,ONLY : NERR, NOUT -!USE TPM_DIM -USE TPM_DISTR ,ONLY : D, MYPROC, NPROC - -USE SET_RESOL_MOD ,ONLY : SET_RESOL -USE GATH_GRID_CTL_MOD ,ONLY : GATH_GRID_CTL -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS -USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE TPM_GEN, ONLY: NERR, NOUT +USE TPM_DISTR, ONLY: D, MYPROC, NPROC +USE SET_RESOL_MOD, ONLY: SET_RESOL +USE GATH_GRID_CTL_MOD, ONLY: GATH_GRID_CTL +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK !endif INTERFACE diff --git a/src/trans/gpu/external/gath_grid_32.F90 b/src/trans/gpu/external/gath_grid_32.F90 index 052552f33..6ba1675c2 100755 --- a/src/trans/gpu/external/gath_grid_32.F90 +++ b/src/trans/gpu/external/gath_grid_32.F90 @@ -46,18 +46,16 @@ SUBROUTINE GATH_GRID_32(PGPG,KPROMA,KFGATHG,KTO,KRESOL,PGP) ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRM +USE PARKIND1, ONLY: JPIM, JPRB, JPRM !ifndef INTERFACE -USE TPM_GEN -USE TPM_DIM -USE TPM_DISTR - -USE SET_RESOL_MOD -USE GATH_GRID_32_CTL_MOD -USE ABORT_TRANS_MOD -USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE TPM_GEN, ONLY: NERR,NOUT +USE TPM_DISTR, ONLY: D, NPROC, MYPROC +USE SET_RESOL_MOD, ONLY: SET_RESOL +USE GATH_GRID_32_CTL_MOD, ONLY: GATH_GRID_32_CTL +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK !endif INTERFACE diff --git a/src/trans/gpu/external/gath_spec.F90 b/src/trans/gpu/external/gath_spec.F90 index 9af1f81ab..2e7746780 100755 --- a/src/trans/gpu/external/gath_spec.F90 +++ b/src/trans/gpu/external/gath_spec.F90 @@ -48,19 +48,18 @@ SUBROUTINE GATH_SPEC(PSPECG,KFGATHG,KTO,KVSET,KRESOL,PSPEC,LDIM1_IS_FLD,KSMAX,LD ! Modified 13-10-10 P. Marguinaud add LDZA0IP option ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB +USE PARKIND1, ONLY: JPIM, JPRB !ifndef INTERFACE -USE TPM_GEN ,ONLY : NERR -USE TPM_DIM ,ONLY : R -USE TPM_DISTR ,ONLY : D, NPRTRV, NPRTRW, MYSETV, MYSETW, MYPROC, NPROC - -USE SET_RESOL_MOD ,ONLY : SET_RESOL -USE GATH_SPEC_CONTROL_MOD ,ONLY : GATH_SPEC_CONTROL -USE SUWAVEDI_MOD ,ONLY : SUWAVEDI -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS -USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE TPM_GEN, ONLY: NERR +USE TPM_DIM, ONLY: R +USE TPM_DISTR, ONLY: D, NPRTRV, NPRTRW, MYSETV, MYSETW, MYPROC, NPROC +USE SET_RESOL_MOD, ONLY: SET_RESOL +USE GATH_SPEC_CONTROL_MOD, ONLY: GATH_SPEC_CONTROL +USE SUWAVEDI_MOD, ONLY: SUWAVEDI +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK !endif INTERFACE diff --git a/src/trans/gpu/external/get_current.F90 b/src/trans/gpu/external/get_current.F90 index 802701ade..71a5ee154 100755 --- a/src/trans/gpu/external/get_current.F90 +++ b/src/trans/gpu/external/get_current.F90 @@ -41,12 +41,12 @@ SUBROUTINE GET_CURRENT(KRESOL,LDLAM) ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM +USE PARKIND1, ONLY: JPIM !ifndef INTERFACE -USE TPM_GEN -USE TPM_GEOMETRY +USE TPM_GEN, ONLY: NCUR_RESOL +USE TPM_GEOMETRY, ONLY: G !endif INTERFACE diff --git a/src/trans/gpu/external/gpnorm_trans.F90 b/src/trans/gpu/external/gpnorm_trans.F90 index 7b8e5e009..86ba77863 100755 --- a/src/trans/gpu/external/gpnorm_trans.F90 +++ b/src/trans/gpu/external/gpnorm_trans.F90 @@ -51,25 +51,26 @@ SUBROUTINE GPNORM_TRANS(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB , JPRD -USE PARKIND_ECTRANS ,ONLY : JPRBT +USE PARKIND1, ONLY: JPIM, JPRB, JPRD +USE PARKIND_ECTRANS, ONLY: JPRBT !ifndef INTERFACE -USE TPM_GEN ,ONLY : NOUT -USE TPM_DIM ,ONLY : R -USE TPM_TRANS ,ONLY : LGPNORM, NGPBLKS, NPROMA -USE TPM_DISTR ,ONLY : D, NPRCIDS, NPRTRV, NPRTRW, MYSETV, MYSETW, NPROC, D_NSTAGTF,D_NPTRLS, MYPROC -USE TPM_GEOMETRY ,ONLY : G,G_NLOEN -USE TPM_FIELDS ,ONLY : F,F_RW -USE SET_RESOL_MOD ,ONLY : SET_RESOL -USE SET2PE_MOD ,ONLY : SET2PE -USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, JP_BLOCKING_STANDARD -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS -USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK -USE TRGTOL_MOD -USE TPM_TRANS, ONLY:GROWING_ALLOCATION -USE BUFFERED_ALLOCATOR_MOD +USE TPM_GEN, ONLY: NOUT +USE TPM_DIM, ONLY: R +USE TPM_TRANS, ONLY: LGPNORM, NGPBLKS, NPROMA +USE TPM_DISTR, ONLY: D, NPRCIDS, NPRTRV, NPRTRW, MYSETV, MYSETW, NPROC, D_NSTAGTF, & + & D_NPTRLS, MYPROC +USE TPM_GEOMETRY, ONLY: G, G_NLOEN +USE TPM_FIELDS, ONLY: F, F_RW +USE SET_RESOL_MOD, ONLY: SET_RESOL +USE SET2PE_MOD, ONLY: SET2PE +USE MPL_MODULE, ONLY: MPL_RECV, MPL_SEND, JP_BLOCKING_STANDARD +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK +USE TRGTOL_MOD, ONLY: TRGTOL_HANDLE, PREPARE_TRGTOL, TRGTOL +USE TPM_TRANS, ONLY: GROWING_ALLOCATION +USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, MAKE_BUFFERED_ALLOCATOR !endif INTERFACE diff --git a/src/trans/gpu/external/gpnorm_trans_gpu.F90 b/src/trans/gpu/external/gpnorm_trans_gpu.F90 index 8d941f78d..61bf243c4 100755 --- a/src/trans/gpu/external/gpnorm_trans_gpu.F90 +++ b/src/trans/gpu/external/gpnorm_trans_gpu.F90 @@ -50,23 +50,23 @@ SUBROUTINE GPNORM_TRANS_GPU(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB , JPRD -USE PARKIND_ECTRANS ,ONLY : JPRBT +USE PARKIND1, ONLY: JPIM, JPRB, JPRD +USE PARKIND_ECTRANS, ONLY: JPRBT !ifndef INTERFACE -USE TPM_GEN ,ONLY : NOUT -USE TPM_DIM ,ONLY : R -USE TPM_TRANS ,ONLY : LGPNORM, NGPBLKS, NPROMA -USE TPM_DISTR ,ONLY : D, NPRCIDS, NPRTRV, NPRTRW, MYSETV, MYSETW, NPROC, D_NSTAGTF,D_NPTRLS, MYPROC -USE TPM_GEOMETRY ,ONLY : G,G_NLOEN,G_NLOEN_MAX -USE TPM_FIELDS ,ONLY : F_RW -USE SET_RESOL_MOD ,ONLY : SET_RESOL -USE TRGTOL_MOD ,ONLY : TRGTOL -USE SET2PE_MOD ,ONLY : SET2PE -USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, JP_BLOCKING_STANDARD -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS -USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE TPM_GEN, ONLY: NOUT +USE TPM_DIM, ONLY: R +USE TPM_TRANS, ONLY: LGPNORM, NGPBLKS, NPROMA +USE TPM_DISTR, ONLY: D, NPRCIDS, NPRTRV, NPRTRW, MYSETV, MYSETW, NPROC, D_NSTAGTF, D_NPTRLS +USE TPM_GEOMETRY, ONLY: G, G_NLOEN, G_NLOEN_MAX +USE TPM_FIELDS, ONLY: F_RW +USE SET_RESOL_MOD, ONLY: SET_RESOL +USE TRGTOL_MOD, ONLY: TRGTOL +USE SET2PE_MOD, ONLY: SET2PE +USE MPL_MODULE, ONLY: MPL_RECV, MPL_SEND, JP_BLOCKING_STANDARD +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK !endif INTERFACE diff --git a/src/trans/gpu/external/ini_spec_dist.F90 b/src/trans/gpu/external/ini_spec_dist.F90 index 7dfe61d5f..cde0805f0 100755 --- a/src/trans/gpu/external/ini_spec_dist.F90 +++ b/src/trans/gpu/external/ini_spec_dist.F90 @@ -54,10 +54,13 @@ SUBROUTINE INI_SPEC_DIST(KSMAX,KTMAX,KPRTRW,KMYSETW,KASM0,KSPOLEGL,KPROCM,& ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB +USE PARKIND1, ONLY: JPIM, JPRB + !ifndef INTERFACE -USE SUWAVEDI_MOD ,ONLY : SUWAVEDI -USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK + +USE SUWAVEDI_MOD, ONLY: SUWAVEDI +USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK + !endif INTERFACE IMPLICIT NONE diff --git a/src/trans/gpu/external/inv_trans.F90 b/src/trans/gpu/external/inv_trans.F90 index a0ac0c317..a85d7ebdb 100755 --- a/src/trans/gpu/external/inv_trans.F90 +++ b/src/trans/gpu/external/inv_trans.F90 @@ -124,32 +124,22 @@ SUBROUTINE INV_TRANS(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB +USE PARKIND1, ONLY: JPIM, JPRB !ifndef INTERFACE -USE TPM_GEN ,ONLY : NERR, NOUT, NPROMATR, LSYNC_TRANS -!USE TPM_DIM -USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, LATLON, & - & NF_SC2, NF_SC3A, NF_SC3B, NGPBLKS, NPROMA - -USE TPM_FLT ,ONLY : S -USE TPM_GEOMETRY ,ONLY : G -!USE TPM_GEOMETRY -!USE TPM_FIELDS -!USE TPM_FFT -USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV, MYPROC -USE SET_RESOL_MOD ,ONLY : SET_RESOL -USE INV_TRANS_CTL_MOD ,ONLY : INV_TRANS_CTL -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS -USE MPL_MODULE ,ONLY : MPL_BARRIER -USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK -USE TPM_STATS ,ONLY : GSTATS => GSTATS_NVTX - -#ifdef _OPENACC -USE OPENACC -!USE ACCEL_LIB !only for NVIDIA GPUs -#endif +USE TPM_GEN, ONLY: NERR, NOUT, NPROMATR, LSYNC_TRANS +USE TPM_TRANS, ONLY: LDIVGP, LSCDERS, LUVDER, LVORGP, LATLON, NF_SC2, NF_SC3A, NF_SC3B, & + & NGPBLKS, NPROMA +USE TPM_FLT, ONLY: S +USE TPM_GEOMETRY, ONLY: G +USE TPM_DISTR, ONLY: D, NPRTRV, MYSETV, MYPROC +USE SET_RESOL_MOD, ONLY: SET_RESOL +USE INV_TRANS_CTL_MOD, ONLY: INV_TRANS_CTL +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE MPL_MODULE, ONLY: MPL_BARRIER +USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK +USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX !endif INTERFACE diff --git a/src/trans/gpu/external/inv_transad.F90 b/src/trans/gpu/external/inv_transad.F90 index 0635b20f0..eade20194 100755 --- a/src/trans/gpu/external/inv_transad.F90 +++ b/src/trans/gpu/external/inv_transad.F90 @@ -121,11 +121,11 @@ SUBROUTINE INV_TRANSAD(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB +USE PARKIND1, ONLY: JPIM, JPRB !ifndef INTERFACE -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS !endif INTERFACE diff --git a/src/trans/gpu/external/setup_trans.F90 b/src/trans/gpu/external/setup_trans.F90 index c9a0055f7..5799069ad 100755 --- a/src/trans/gpu/external/setup_trans.F90 +++ b/src/trans/gpu/external/setup_trans.F90 @@ -98,49 +98,48 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& ! R. El Khatib 07-Mar-2016 Better flexibility for Legendre polynomials computation in stretched mode ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB , JPRD -USE PARKIND_ECTRANS ,ONLY : JPRBT -USE EC_ENV_MOD ,ONLY : EC_GETENV -USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR, C_INT,C_ASSOCIATED,C_SIZE_T +USE PARKIND1, ONLY: JPIM, JPRB, JPRD +USE PARKIND_ECTRANS, ONLY: JPRBT !ifndef INTERFACE -USE TPM_GEN ,ONLY : NOUT, MSETUP0, NCUR_RESOL, NDEF_RESOL, & - & NMAX_RESOL, NPRINTLEV, LENABLED, NERR -USE TPM_DIM ,ONLY : R, DIM_RESOL, R_NSMAX,R_NTMAX, R_NDGNH, R_NDGL -USE TPM_DISTR ,ONLY : D, DISTR_RESOL,NPROC,NPRTRV, D_NUMP,D_NDGL_FS,D_MYMS,D_NSTAGT0B,D_NSTAGT1B,D_NPROCL,D_NPNTGTB1, D_NASM0, & -& D_NSTAGTF,D_MSTABF,D_NPNTGTB0,D_NPROCM,D_NPTRLS,MYSETV,MYSETW, MYPROC,D_OFFSETS_GEMM1, D_OFFSETS_GEMM2 -USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL, G_NDGLU, G_NMEN, G_NMEN_MAX,G_NLOEN, G_NLOEN_MAX -USE TPM_FIELDS ,ONLY : FIELDS_RESOL, F,F_RW, F_RN, F_RLAPIN, F_RACTHE, ZEPSNM, & -& ZAA,ZAS,& -& ZAA0,ZAS0,KMLOC0 -! IZBA,IZCAT -USE TPM_FFT ,ONLY : T, FFT_RESOL -USE TPM_HICFFT ,ONLY : HICT, HICFFT_RESOL - +USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR, C_INT, C_ASSOCIATED, C_SIZE_T +USE EC_ENV_MOD, ONLY: EC_GETENV +USE TPM_GEN, ONLY: NOUT, MSETUP0, NCUR_RESOL, NDEF_RESOL, & + & NMAX_RESOL, NPRINTLEV, LENABLED, NERR +USE TPM_DIM, ONLY: R, DIM_RESOL, R_NSMAX,R_NTMAX, R_NDGNH, R_NDGL +USE TPM_DISTR, ONLY: D, DISTR_RESOL, NPROC, NPRTRV, D_NUMP, D_NDGL_FS, D_MYMS, & + & D_NSTAGT0B, D_NSTAGT1B, D_NPROCL, D_NPNTGTB1, D_NASM0, & + & D_NSTAGTF, D_MSTABF, D_NPNTGTB0, D_NPROCM, D_NPTRLS, & + & MYPROC, D_OFFSETS_GEMM1, D_OFFSETS_GEMM2 +USE TPM_GEOMETRY, ONLY: G, GEOM_RESOL, G_NDGLU, G_NMEN, G_NMEN_MAX, G_NLOEN, & + & G_NLOEN_MAX +USE TPM_FIELDS, ONLY: FIELDS_RESOL, F, F_RW, F_RN, F_RLAPIN, F_RACTHE, ZEPSNM, & + & ZAA, ZAS, ZAA0, ZAS0, KMLOC0 +USE TPM_FFT, ONLY: T, FFT_RESOL +USE TPM_HICFFT, ONLY: HICT, HICFFT_RESOL #ifdef WITH_FFTW -USE TPM_FFTW ,ONLY : TW, FFTW_RESOL +USE TPM_FFTW, ONLY: TW, FFTW_RESOL #endif -USE TPM_FLT -USE TPM_CTL - -USE SET_RESOL_MOD ,ONLY : SET_RESOL -USE SETUP_DIMS_MOD ,ONLY : SETUP_DIMS -USE SUMP_TRANS_MOD ,ONLY : SUMP_TRANS -USE SUMP_TRANS_PRELEG_MOD ,ONLY : SUMP_TRANS_PRELEG -USE SULEG_MOD ,ONLY : SULEG -USE PRE_SULEG_MOD ,ONLY : PRE_SULEG -USE SUFFT_MOD ,ONLY : SUFFT -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS -USE SHAREDMEM_MOD ,ONLY : SHAREDMEM_CREATE -USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK -!USE CUDA_DEVICE_MOD ! only for NVIDIA GPUs -USE PREPSNM_MOD ,ONLY : PREPSNM +USE TPM_FLT, ONLY: FLT_RESOL, S +USE TPM_CTL, ONLY: CTL_RESOL, C +USE SET_RESOL_MOD, ONLY: SET_RESOL +USE SETUP_DIMS_MOD, ONLY: SETUP_DIMS +USE SUMP_TRANS_MOD, ONLY: SUMP_TRANS +USE SUMP_TRANS_PRELEG_MOD, ONLY: SUMP_TRANS_PRELEG +USE SULEG_MOD, ONLY: SULEG +USE PRE_SULEG_MOD, ONLY: PRE_SULEG +USE SUFFT_MOD, ONLY: SUFFT +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE SHAREDMEM_MOD, ONLY: SHAREDMEM_CREATE +USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK +USE PREPSNM_MOD, ONLY: PREPSNM #ifdef ACCGPU -USE OPENACC +USE OPENACC, ONLY: ACC_DEVICE_KIND #endif #ifdef OMPGPU -USE OMP_LIB +! TODO: add OMP equivalents to ACC library routines +!USE OMP_LIB #endif !endif INTERFACE diff --git a/src/trans/gpu/external/setup_trans0.F90 b/src/trans/gpu/external/setup_trans0.F90 index 36ddbcfba..549f2c316 100755 --- a/src/trans/gpu/external/setup_trans0.F90 +++ b/src/trans/gpu/external/setup_trans0.F90 @@ -68,23 +68,22 @@ SUBROUTINE SETUP_TRANS0(KOUT,KERR,KPRINTLEV,KMAX_RESOL,KPROMATR,& ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRD +USE PARKIND1, ONLY: JPIM, JPRB, JPRD !ifndef INTERFACE -USE TPM_GEN ,ONLY : NERR, NOUT, LMPOFF, LSYNC_TRANS, NTRANS_SYNC_LEVEL, MSETUP0, & - & NMAX_RESOL, NPRINTLEV, NPROMATR, LALLOPERM -USE TPM_DISTR ,ONLY : LEQ_REGIONS, NPRGPEW,NPRGPNS, NPRTRW, NPRTRV, MYSETV -USE TPM_CONSTANTS ,ONLY : RA -USE MPL_MODULE - -USE SUMP_TRANS0_MOD ,ONLY : SUMP_TRANS0 -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS -USE EQ_REGIONS_MOD ,ONLY : N_REGIONS, N_REGIONS_EW, N_REGIONS_NS -USE ECTRANS_VERSION_MOD ,ONLY : ECTRANS_VERSION_STR, ECTRANS_GIT_SHA1 -USE EC_ENV_MOD ,ONLY : EC_GETENV +USE TPM_GEN, ONLY: NERR, NOUT, LMPOFF, LSYNC_TRANS, NTRANS_SYNC_LEVEL, MSETUP0, & + & NMAX_RESOL, NPRINTLEV, NPROMATR, LALLOPERM +USE TPM_DISTR, ONLY: LEQ_REGIONS, NPRGPEW, NPRGPNS, NPRTRW, NPRTRV, MYSETV +USE TPM_CONSTANTS, ONLY: RA +USE MPL_MODULE, ONLY: MPL_MYRANK +USE SUMP_TRANS0_MOD, ONLY: SUMP_TRANS0 +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE EQ_REGIONS_MOD, ONLY: N_REGIONS, N_REGIONS_EW, N_REGIONS_NS +USE ECTRANS_VERSION_MOD, ONLY: ECTRANS_VERSION_STR, ECTRANS_GIT_SHA1 +USE EC_ENV_MOD, ONLY: EC_GETENV #ifdef _OPENACC -USE OPENACC +USE OPENACC, ONLY: ACC_DEVICE_KIND #endif !endif INTERFACE diff --git a/src/trans/gpu/external/specnorm.F90 b/src/trans/gpu/external/specnorm.F90 index 1e032da70..95ade4354 100755 --- a/src/trans/gpu/external/specnorm.F90 +++ b/src/trans/gpu/external/specnorm.F90 @@ -46,16 +46,15 @@ SUBROUTINE SPECNORM(PNORM,PSPEC,KVSET,KMASTER,KRESOL,PMET) ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB +USE PARKIND1, ONLY: JPIM, JPRB !ifndef INTERFACE -USE TPM_GEN ,ONLY : NERR -USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV, MYPROC - -USE SET_RESOL_MOD ,ONLY : SET_RESOL -USE SPNORM_CTL_MOD ,ONLY : SPNORM_CTL -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE TPM_GEN, ONLY: NERR +USE TPM_DISTR, ONLY: D, NPRTRV, MYSETV, MYPROC +USE SET_RESOL_MOD, ONLY: SET_RESOL +USE SPNORM_CTL_MOD, ONLY: SPNORM_CTL +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS !endif INTERFACE diff --git a/src/trans/gpu/external/sugawc.F90 b/src/trans/gpu/external/sugawc.F90 index 8f23f491b..59e3ecb12 100755 --- a/src/trans/gpu/external/sugawc.F90 +++ b/src/trans/gpu/external/sugawc.F90 @@ -46,11 +46,11 @@ SUBROUTINE SUGAWC(KDGLG,PMU,PW) ! ------------------------------------------------------------------ -USE EC_PARKIND ,ONLY : JPRD, JPIM +USE EC_PARKIND, ONLY: JPRD, JPIM !ifndef INTERFACE -USE SUGAW_MOD, ONLY : SUGAW +USE SUGAW_MOD, ONLY: SUGAW !endif INTERFACE diff --git a/src/trans/gpu/external/trans_end.F90 b/src/trans/gpu/external/trans_end.F90 index e3f6f8b92..20a037629 100755 --- a/src/trans/gpu/external/trans_end.F90 +++ b/src/trans/gpu/external/trans_end.F90 @@ -42,24 +42,24 @@ SUBROUTINE TRANS_END(CDMODE) ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM, JPRB +USE PARKIND1, ONLY: JPIM, JPRB !ifndef INTERFACE -USE TPM_GEN ,ONLY : MSETUP0, NCUR_RESOL, NMAX_RESOL, LENABLED,NDEF_RESOL -USE TPM_DIM ,ONLY : R, DIM_RESOL, R_NSMAX,R_NTMAX, R_NDGNH, R_NDGL -USE TPM_DISTR ,ONLY : D, DISTR_RESOL, NPRCIDS,D_NUMP,D_MYMS,D_NSTAGT0B,D_NSTAGT1B,D_NPROCL,D_NPNTGTB1, D_NASM0, & -& D_NSTAGTF,D_MSTABF,D_NPNTGTB0,D_NPROCM,D_NPTRLS -USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL, G_NDGLU, G_NMEN, G_NMEN_MAX,G_NLOEN, G_NLOEN_MAX -USE TPM_FIELDS ,ONLY : F, FIELDS_RESOL,F_RW,ZEPSNM,ZAA,ZAS,ZAA0,ZAS0 -USE TPM_FFT ,ONLY : T, FFT_RESOL -USE TPM_CTL ,ONLY : C, CTL_RESOL -USE TPM_FLT -USE TPM_TRANS ,ONLY : FOUBUF, FOUBUF_IN - -USE EQ_REGIONS_MOD ,ONLY : N_REGIONS -USE SET_RESOL_MOD ,ONLY : SET_RESOL -USE DEALLOC_RESOL_MOD ,ONLY : DEALLOC_RESOL +USE TPM_GEN, ONLY: MSETUP0, NCUR_RESOL, NMAX_RESOL, LENABLED, NDEF_RESOL +USE TPM_DIM, ONLY: R, DIM_RESOL, R_NSMAX, R_NTMAX, R_NDGNH, R_NDGL +USE TPM_DISTR, ONLY: D, DISTR_RESOL, NPRCIDS, D_NUMP, D_MYMS, D_NSTAGT0B, D_NSTAGT1B, & + & D_NPROCL, D_NPNTGTB1, D_NASM0, D_NSTAGTF, D_MSTABF, D_NPNTGTB0, & + & D_NPROCM, D_NPTRLS +USE TPM_GEOMETRY, ONLY: G, GEOM_RESOL, G_NDGLU, G_NMEN, G_NMEN_MAX,G_NLOEN, G_NLOEN_MAX +USE TPM_FIELDS, ONLY: F, FIELDS_RESOL, F_RW, ZEPSNM, ZAA, ZAS, ZAA0, ZAS0 +USE TPM_FFT, ONLY: T, FFT_RESOL +USE TPM_CTL, ONLY: C, CTL_RESOL +USE TPM_FLT, ONLY: S, FLT_RESOL +USE TPM_TRANS, ONLY: FOUBUF, FOUBUF_IN +USE EQ_REGIONS_MOD, ONLY: N_REGIONS +USE SET_RESOL_MOD, ONLY: SET_RESOL +USE DEALLOC_RESOL_MOD, ONLY: DEALLOC_RESOL ! IMPLICIT NONE diff --git a/src/trans/gpu/external/trans_inq.F90 b/src/trans/gpu/external/trans_inq.F90 index b0c19042f..6000d1d62 100755 --- a/src/trans/gpu/external/trans_inq.F90 +++ b/src/trans/gpu/external/trans_inq.F90 @@ -118,21 +118,19 @@ SUBROUTINE TRANS_INQ(KRESOL,KSPEC,KSPEC2,KSPEC2G,KSPEC2MX,KNUMP,& ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB, JPRD +USE PARKIND1, ONLY: JPIM, JPRB, JPRD !ifndef INTERFACE -USE TPM_GEN ,ONLY : NDEF_RESOL -USE TPM_DIM ,ONLY : R -USE TPM_DISTR ,ONLY : D, NPRTRNS, NPRTRW, MYSETV, MYSETW, NPRTRV -USE TPM_GEOMETRY ,ONLY : G -USE TPM_FIELDS ,ONLY : F -USE TPM_FLT - -USE SET_RESOL_MOD ,ONLY : SET_RESOL -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS -USE EQ_REGIONS_MOD ,ONLY : MY_REGION_EW, MY_REGION_NS, & - & N_REGIONS_EW, N_REGIONS_NS +USE TPM_GEN, ONLY: NDEF_RESOL +USE TPM_DIM, ONLY: R +USE TPM_DISTR, ONLY: D, NPRTRNS, NPRTRW, MYSETV, MYSETW, NPRTRV +USE TPM_GEOMETRY, ONLY: G +USE TPM_FIELDS, ONLY: F +USE TPM_FLT, ONLY: S +USE SET_RESOL_MOD, ONLY: SET_RESOL +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE EQ_REGIONS_MOD, ONLY: MY_REGION_EW, MY_REGION_NS, N_REGIONS_EW, N_REGIONS_NS !endif INTERFACE diff --git a/src/trans/gpu/external/trans_pnm.F90 b/src/trans/gpu/external/trans_pnm.F90 index c8ccd0c03..cd89a85c7 100755 --- a/src/trans/gpu/external/trans_pnm.F90 +++ b/src/trans/gpu/external/trans_pnm.F90 @@ -45,21 +45,20 @@ SUBROUTINE TRANS_PNM(KRESOL,KM,PRPNM,LDTRANSPOSE,LDCHEAP) ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPRD, JPIM -USE PARKIND_ECTRANS,ONLY : JPRBT +USE PARKIND1, ONLY: JPRD, JPIM +USE PARKIND_ECTRANS, ONLY: JPRBT !ifndef INTERFACE -USE TPM_DIM ,ONLY : R -USE TPM_DISTR ,ONLY : D -USE TPM_GEOMETRY ,ONLY : G -USE TPM_FIELDS ,ONLY : F -USE TPM_FLT ,ONLY : S - -USE SET_RESOL_MOD ,ONLY : SET_RESOL -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS -USE TPM_POL -USE SUPOLF_MOD +USE TPM_DIM, ONLY: R +USE TPM_DISTR, ONLY: D +USE TPM_GEOMETRY, ONLY: G +USE TPM_FIELDS, ONLY: F +USE TPM_FLT, ONLY: S +USE SET_RESOL_MOD, ONLY: SET_RESOL +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE TPM_POL, ONLY: INI_POL +USE SUPOLF_MOD, ONLY: SUPOLF !endif INTERFACE diff --git a/src/trans/gpu/external/trans_release.F90 b/src/trans/gpu/external/trans_release.F90 index ea97b3cff..29ccab61c 100755 --- a/src/trans/gpu/external/trans_release.F90 +++ b/src/trans/gpu/external/trans_release.F90 @@ -39,11 +39,11 @@ SUBROUTINE TRANS_RELEASE(KRESOL) ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM +USE PARKIND1, ONLY: JPIM !ifndef INTERFACE -USE DEALLOC_RESOL_MOD ,ONLY : DEALLOC_RESOL +USE DEALLOC_RESOL_MOD, ONLY: DEALLOC_RESOL ! IMPLICIT NONE diff --git a/src/trans/gpu/external/vordiv_to_uv.F90 b/src/trans/gpu/external/vordiv_to_uv.F90 index 7aa8342c8..c8667a2f0 100755 --- a/src/trans/gpu/external/vordiv_to_uv.F90 +++ b/src/trans/gpu/external/vordiv_to_uv.F90 @@ -50,17 +50,16 @@ SUBROUTINE VORDIV_TO_UV(PSPVOR,PSPDIV,PSPU,PSPV,KSMAX,KVSETUV) ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB +USE PARKIND1, ONLY: JPIM, JPRB !ifndef INTERFACE -USE TPM_GEN ,ONLY : NERR, NOUT,MSETUP0 -USE TPM_DISTR ,ONLY : D, NPRTRV, MYSETV - -USE SET_RESOL_MOD ,ONLY : SET_RESOL -USE VD2UV_CTL_MOD ,ONLY : VD2UV_CTL -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS -USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK +USE TPM_GEN, ONLY: NERR, NOUT,MSETUP0 +USE TPM_DISTR, ONLY: D, NPRTRV, MYSETV +USE SET_RESOL_MOD, ONLY: SET_RESOL +USE VD2UV_CTL_MOD, ONLY: VD2UV_CTL +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK !endif INTERFACE diff --git a/src/trans/gpu/internal/abort_trans_mod.F90 b/src/trans/gpu/internal/abort_trans_mod.F90 index b92131d14..2fe9e7830 100755 --- a/src/trans/gpu/internal/abort_trans_mod.F90 +++ b/src/trans/gpu/internal/abort_trans_mod.F90 @@ -12,10 +12,10 @@ MODULE ABORT_TRANS_MOD CONTAINS SUBROUTINE ABORT_TRANS(CDTEXT) -USE TPM_GEN ,ONLY : NOUT,NERR -USE TPM_DISTR ,ONLY : NPROC,MYPROC -USE MPL_MODULE ,ONLY : MPL_ABORT -USE SDL_MOD ,ONLY : SDL_TRACEBACK, SDL_SRLABORT +USE TPM_GEN, ONLY: NOUT,NERR +USE TPM_DISTR, ONLY: NPROC,MYPROC +USE MPL_MODULE, ONLY: MPL_ABORT +USE SDL_MOD, ONLY: SDL_TRACEBACK, SDL_SRLABORT IMPLICIT NONE diff --git a/src/trans/gpu/internal/buffered_allocator_mod.F90 b/src/trans/gpu/internal/buffered_allocator_mod.F90 index 86149ea5f..fa891b8d2 100644 --- a/src/trans/gpu/internal/buffered_allocator_mod.F90 +++ b/src/trans/gpu/internal/buffered_allocator_mod.F90 @@ -8,10 +8,11 @@ #define ALIGN(I, A) (((I)+(A)-1)/(A)*(A)) MODULE BUFFERED_ALLOCATOR_MOD - USE PARKIND_ECTRANS ,ONLY : JPIM - USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS - USE ISO_C_BINDING ,ONLY : C_INT8_T, C_SIZE_T, C_LOC, C_F_POINTER - USE GROWING_ALLOCATOR_MOD + USE PARKIND_ECTRANS, ONLY: JPIM + USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS + USE ISO_C_BINDING, ONLY: C_INT8_T, C_SIZE_T, C_LOC, C_F_POINTER + USE GROWING_ALLOCATOR_MOD, ONLY: GROWING_ALLOCATION_TYPE + USE OPENACC, ONLY: ACC_ASYNC_SYNC IMPLICIT NONE @@ -83,6 +84,7 @@ FUNCTION RESERVE(ALLOCATOR, SZ) END FUNCTION RESERVE SUBROUTINE INSTANTIATE_ALLOCATOR(ALLOCATOR, GROWING_ALLOCATION) + USE GROWING_ALLOCATOR_MOD, ONLY: REALLOCATE_GROWING_ALLOCATION IMPLICIT NONE TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR !!TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN), POINTER :: GROWING_ALLOCATION @@ -116,8 +118,7 @@ FUNCTION GET_ALLOCATION(ALLOCATOR, RESERVATION) END FUNCTION GET_ALLOCATION SUBROUTINE ASSIGN_PTR_FLOAT(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE, SET_STREAM) - USE ISO_C_BINDING - USE OPENACC, ONLY: ACC_ASYNC_SYNC + USE ISO_C_BINDING, ONLY: C_FLOAT IMPLICIT NONE INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: SRC(:) REAL(KIND=C_FLOAT), POINTER, INTENT(OUT) :: DST(:) @@ -150,8 +151,7 @@ SUBROUTINE ASSIGN_PTR_FLOAT(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE & [SIZEOF(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1))/SIZEOF(DST(0))]) END SUBROUTINE ASSIGN_PTR_FLOAT SUBROUTINE ASSIGN_PTR_DOUBLE(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE, SET_STREAM) - USE ISO_C_BINDING - USE OPENACC, ONLY: ACC_ASYNC_SYNC + USE ISO_C_BINDING, ONLY: C_DOUBLE IMPLICIT NONE INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: SRC(:) REAL(KIND=C_DOUBLE), POINTER, INTENT(OUT) :: DST(:) diff --git a/src/trans/gpu/internal/cdmap_mod.F90 b/src/trans/gpu/internal/cdmap_mod.F90 index 10648a06c..649aefa65 100755 --- a/src/trans/gpu/internal/cdmap_mod.F90 +++ b/src/trans/gpu/internal/cdmap_mod.F90 @@ -13,13 +13,11 @@ MODULE CDMAP_MOD SUBROUTINE CDMAP(KM,KMLOC,KSL,KSLO,PEPSNM, KDIR, KDGNH, KDGNHD,& & KFIELDS, PCOEFA, PCOEFS) -USE PARKIND_ECTRANS ,ONLY : JPIM, JPRB, JPRBT -USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK -USE TPM_FLT -USE TPM_GEOMETRY -USE TPM_DISTR ,ONLY : D -USE TPM_TRANS ,ONLY : FOUBUF_IN, FOUBUF -USE SEEFMM_MIX +USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT +USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK +USE TPM_FLT, ONLY: S +USE TPM_DISTR, ONLY: D +USE TPM_TRANS, ONLY: FOUBUF_IN, FOUBUF !**** *CDMAP* - REMAP ROOTS ! diff --git a/src/trans/gpu/internal/cpledn_mod.F90 b/src/trans/gpu/internal/cpledn_mod.F90 index 17f7504ca..e0f054782 100755 --- a/src/trans/gpu/internal/cpledn_mod.F90 +++ b/src/trans/gpu/internal/cpledn_mod.F90 @@ -63,7 +63,7 @@ SUBROUTINE CPLEDN(KN,KODD,PFN,PX,KFLAG,PW,PXN,PXMOD) ! F. Vana 05-Mar-2015 Support for single precision ! ------------------------------------------------------------------ -USE EC_PARKIND ,ONLY : JPRD, JPIM +USE EC_PARKIND, ONLY: JPRD, JPIM ! ------------------------------------------------------------------ diff --git a/src/trans/gpu/internal/dealloc_resol_mod.F90 b/src/trans/gpu/internal/dealloc_resol_mod.F90 index 6a6bd20e9..792fc46d8 100755 --- a/src/trans/gpu/internal/dealloc_resol_mod.F90 +++ b/src/trans/gpu/internal/dealloc_resol_mod.F90 @@ -41,20 +41,17 @@ SUBROUTINE DEALLOC_RESOL(KRESOL) ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM - -USE TPM_DIM ,ONLY : R -USE TPM_GEN ,ONLY : LENABLED, NOUT,NDEF_RESOL -USE TPM_DISTR ,ONLY : D,NPRTRV -USE TPM_GEOMETRY ,ONLY : G -USE TPM_FIELDS ,ONLY : F -USE TPM_FFT ,ONLY : T -USE TPM_FLT ,ONLY : S -USE TPM_CTL ,ONLY : C -USE TPM_HICFFT ,ONLY : DESTROY_ALL_PLANS_FFT -USE SEEFMM_MIX - -USE SET_RESOL_MOD ,ONLY : SET_RESOL +USE PARKIND_ECTRANS, ONLY: JPIM +USE TPM_DIM, ONLY: R +USE TPM_GEN, ONLY: LENABLED, NOUT, NDEF_RESOL +USE TPM_DISTR, ONLY: D, NPRTRV +USE TPM_GEOMETRY, ONLY: G +USE TPM_FIELDS, ONLY: F +USE TPM_FFT, ONLY: T +USE TPM_FLT, ONLY: S +USE TPM_CTL, ONLY: C +USE TPM_HICFFT, ONLY: DESTROY_ALL_PLANS_FFT +USE SET_RESOL_MOD, ONLY: SET_RESOL ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/dir_trans_ctl_mod.F90 b/src/trans/gpu/internal/dir_trans_ctl_mod.F90 index a3ba882d5..55b60b3b8 100755 --- a/src/trans/gpu/internal/dir_trans_ctl_mod.F90 +++ b/src/trans/gpu/internal/dir_trans_ctl_mod.F90 @@ -73,20 +73,20 @@ SUBROUTINE DIR_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_UV,KF_SCALARS,& ! ------------------------------------------------------------------ - USE PARKIND_ECTRANS ,ONLY : JPRBT, JPRD, JPRB, JPIM - - USE TPM_GEN ,ONLY : NPROMATR, NOUT - USE TPM_DISTR, ONLY: NPROC - USE MPL_MODULE ,ONLY : MPL_BARRIER - USE TPM_TRANS, ONLY: GROWING_ALLOCATION - USE TPM_GEN - USE BUFFERED_ALLOCATOR_MOD - - USE FTDIR_MOD - USE LTDIR_MOD - USE TRGTOL_MOD - USE TRLTOM_MOD - USE TRLTOM_PACK_UNPACK + USE PARKIND_ECTRANS, ONLY: JPRBT, JPRD, JPRB, JPIM + USE TPM_GEN, ONLY: NPROMATR, NOUT + USE TPM_DISTR, ONLY: NPROC + USE MPL_MODULE, ONLY: MPL_BARRIER + USE TPM_TRANS, ONLY: GROWING_ALLOCATION + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, MAKE_BUFFERED_ALLOCATOR, & + & INSTANTIATE_ALLOCATOR + USE FTDIR_MOD, ONLY: FTDIR_HANDLE, PREPARE_FTDIR, FTDIR + USE LTDIR_MOD, ONLY: LTDIR_HANDLE, PREPARE_LTDIR, LTDIR + USE TRGTOL_MOD, ONLY: TRGTOL_HANDLE, PREPARE_TRGTOL, TRGTOL + USE TRLTOM_MOD, ONLY: TRLTOM_HANDLE, PREPARE_TRLTOM, TRLTOM + USE TRLTOM_PACK_UNPACK, ONLY: TRLTOM_PACK_HANDLE, TRLTOM_UNPACK_HANDLE, & + & PREPARE_TRLTOM_PACK, PREPARE_TRLTOM_UNPACK, TRLTOM_PACK, & + & TRLTOM_UNPACK IMPLICIT NONE diff --git a/src/trans/gpu/internal/dist_grid_32_ctl_mod.F90 b/src/trans/gpu/internal/dist_grid_32_ctl_mod.F90 index 792d9896a..1c28c33e7 100755 --- a/src/trans/gpu/internal/dist_grid_32_ctl_mod.F90 +++ b/src/trans/gpu/internal/dist_grid_32_ctl_mod.F90 @@ -43,15 +43,14 @@ SUBROUTINE DIST_GRID_32_CTL(PGPG,KFDISTG,KPROMA,KFROM,PGP) ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRM -USE MPL_MODULE - -USE TPM_DISTR -USE TPM_GEOMETRY - -USE SET2PE_MOD -USE ABORT_TRANS_MOD -USE EQ_REGIONS_MOD +USE PARKIND_ECTRANS, ONLY: JPIM, JPRM +USE MPL_MODULE, ONLY: MPL_RECV, JP_BLOCKING_STANDARD, MPL_SEND, JP_NON_BLOCKING_STANDARD, & + & MPL_WAIT, MPL_BARRIER +USE TPM_DISTR, ONLY: D, NPROC, MYPROC, MTAGDISTGP, NPRCIDS +USE TPM_GEOMETRY, ONLY: G +USE SET2PE_MOD, ONLY: SET2PE +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE EQ_REGIONS_MOD, ONLY: N_REGIONS_NS, N_REGIONS IMPLICIT NONE diff --git a/src/trans/gpu/internal/dist_grid_ctl_mod.F90 b/src/trans/gpu/internal/dist_grid_ctl_mod.F90 index 184a71845..d9711b755 100755 --- a/src/trans/gpu/internal/dist_grid_ctl_mod.F90 +++ b/src/trans/gpu/internal/dist_grid_ctl_mod.F90 @@ -45,16 +45,14 @@ SUBROUTINE DIST_GRID_CTL(PGPG,KFDISTG,KPROMA,KFROM,PGP,KSORT) ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB -USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, & - & JP_BLOCKING_STANDARD, JP_NON_BLOCKING_STANDARD - -USE TPM_DISTR ,ONLY : D, MTAGDISTGP, NPRCIDS, MYPROC, NPROC -USE TPM_GEOMETRY ,ONLY : G - -USE SET2PE_MOD ,ONLY : SET2PE -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS -USE EQ_REGIONS_MOD ,ONLY : N_REGIONS, N_REGIONS_NS +USE PARKIND1, ONLY: JPIM, JPRB +USE MPL_MODULE, ONLY: MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, JP_BLOCKING_STANDARD, & + & JP_NON_BLOCKING_STANDARD +USE TPM_DISTR, ONLY: D, MTAGDISTGP, NPRCIDS, MYPROC, NPROC +USE TPM_GEOMETRY, ONLY: G +USE SET2PE_MOD, ONLY: SET2PE +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE EQ_REGIONS_MOD, ONLY: N_REGIONS, N_REGIONS_NS ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/dist_spec_control_mod.F90 b/src/trans/gpu/internal/dist_spec_control_mod.F90 index 449889a90..a0c418a68 100755 --- a/src/trans/gpu/internal/dist_spec_control_mod.F90 +++ b/src/trans/gpu/internal/dist_spec_control_mod.F90 @@ -47,16 +47,11 @@ SUBROUTINE DIST_SPEC_CONTROL(PSPECG,KFDISTG,KFROM,KVSET,PSPEC,LDIM1_IS_FLD,& ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB -USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, & - & JP_NON_BLOCKING_STANDARD - -!USE TPM_GEN -!USE TPM_DIM -USE TPM_DISTR ,ONLY : MTAGDISTSP, MYSETV, NPRCIDS, NPRTRW, MYPROC, NPROC - -USE SET2PE_MOD ,ONLY : SET2PE -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE PARKIND1, ONLY: JPIM,JPRB +USE MPL_MODULE, ONLY: MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, JP_NON_BLOCKING_STANDARD +USE TPM_DISTR, ONLY: MTAGDISTSP, MYSETV, NPRCIDS, NPRTRW, MYPROC, NPROC +USE SET2PE_MOD, ONLY: SET2PE +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/eq_regions_mod.F90 b/src/trans/gpu/internal/eq_regions_mod.F90 index 5888c1070..78e2af964 100755 --- a/src/trans/gpu/internal/eq_regions_mod.F90 +++ b/src/trans/gpu/internal/eq_regions_mod.F90 @@ -70,7 +70,7 @@ MODULE eq_regions_mod ! !-------------------------------------------------------------------------------- ! -USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT +USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT IMPLICIT NONE @@ -144,7 +144,6 @@ subroutine eq_regions(N) ! eq_regions uses the zonal equal area sphere partitioning algorithm to partition ! the surface of a sphere into N regions of equal area and small diameter. ! -USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT IMPLICIT NONE integer(kind=jpim),intent(in) :: N integer(kind=jpim) :: n_collars,j @@ -223,7 +222,6 @@ function num_collars(N,c_polar,a_ideal) result(num_c) ! Given N, an ideal angle, and c_polar, ! determine n_collars, the number of collars between the polar caps. ! -USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT IMPLICIT NONE integer(kind=jpim),intent(in) :: N real(kind=JPRBT),intent(in) :: a_ideal,c_polar @@ -251,7 +249,6 @@ subroutine ideal_region_list(N,c_polar,n_collars,r_regions) ! r_regions[n_collars+2] is 1. ! The sum of r_regions is N. ! -USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT IMPLICIT NONE integer(kind=jpim),intent(in) :: N,n_collars real(kind=JPRBT),intent(in) :: c_polar @@ -285,7 +282,6 @@ function ideal_collar_angle(N) result(ideal) ! IDEAL_COLLAR_ANGLE(N) sets ANGLE to the ideal angle for the ! spherical collars of an EQ partition of the unit sphere S^2 into N regions. ! -USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT IMPLICIT NONE integer(kind=jpim),intent(in) :: N real(kind=JPRBT) :: ideal @@ -305,7 +301,6 @@ subroutine round_to_naturals(N,n_collars,r_regions) ! n_regions[n_collars+2] is 1. ! The sum of n_regions is N. ! -USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT IMPLICIT NONE integer(kind=jpim),intent(in) :: N,n_collars real(kind=JPRBT),intent(in) :: r_regions(n_collars+2) @@ -324,7 +319,6 @@ function polar_colat(N) result(polar_c) ! ! Given N, determine the colatitude of the North polar spherical cap. ! -USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT IMPLICIT NONE integer(kind=jpim),intent(in) :: N real(kind=JPRBT) :: area @@ -343,7 +337,6 @@ function area_of_ideal_region(N) result(area) ! AREA_OF_IDEAL_REGION(N) sets AREA to be the area of one of N equal ! area regions on S^2, that is 1/N times AREA_OF_SPHERE. ! -USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT IMPLICIT NONE integer(kind=jpim),intent(in) :: N real(kind=JPRBT) :: area_of_sphere @@ -358,7 +351,6 @@ function sradius_of_cap(area) result(sradius) ! SRADIUS_OF_CAP(AREA) returns the spherical radius of ! an S^2 spherical cap of area AREA. ! -USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT IMPLICIT NONE real(kind=JPRBT),intent(in) :: area real(kind=JPRBT) :: sradius @@ -374,7 +366,6 @@ function area_of_collar(a_top, a_bot) result(area) ! collar specified by A_TOP, A_BOT, where A_TOP is top (smaller) spherical radius, ! A_BOT is bottom (larger) spherical radius. ! -USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT IMPLICIT NONE real(kind=JPRBT),intent(in) :: a_top,a_bot real(kind=JPRBT) area @@ -397,7 +388,6 @@ end function area_of_cap function gamma(x) result(gamma_res) ! -USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT IMPLICIT NONE real(kind=JPRBT),intent(in) :: x real(kind=JPRBT) :: gamma_res diff --git a/src/trans/gpu/internal/ext_acc.F90 b/src/trans/gpu/internal/ext_acc.F90 index bf42d9a5f..b2ea8687a 100644 --- a/src/trans/gpu/internal/ext_acc.F90 +++ b/src/trans/gpu/internal/ext_acc.F90 @@ -6,7 +6,7 @@ ! granted to it by virtue of its status as an intergovernmental organisation ! nor does it submit to any jurisdiction. module openacc_ext_type - use iso_c_binding + use iso_c_binding, only: c_size_t implicit none private public :: ext_acc_arr_desc @@ -17,10 +17,9 @@ module openacc_ext_type end type end module module openacc_ext - use iso_c_binding - use iso_fortran_env - use openacc, only : acc_create, acc_copyin, acc_handle_kind - use openacc_ext_type + use iso_c_binding, only: c_ptr, c_size_t, c_loc + use openacc, only: acc_create, acc_copyin, acc_handle_kind + use openacc_ext_type, only: ext_acc_arr_desc implicit none private @@ -248,7 +247,8 @@ function get_common_pointers(in_ptrs, out_ptrs) result(num_ranges) enddo end function subroutine ext_acc_create(ptrs, stream) - use openacc, only : acc_create, acc_async_sync + use openacc, only: acc_create, acc_async_sync + use iso_fortran_env, only: int32 implicit none type(ext_acc_arr_desc), intent(in) :: ptrs(:) integer(acc_handle_kind), optional :: stream @@ -274,7 +274,7 @@ subroutine ext_acc_create(ptrs, stream) enddo end subroutine subroutine ext_acc_copyin(ptrs, stream) - use openacc + use openacc, only: acc_async_sync implicit none type(ext_acc_arr_desc), intent(in) :: ptrs(:) integer(acc_handle_kind), optional :: stream @@ -301,7 +301,7 @@ subroutine ext_acc_copyin(ptrs, stream) enddo end subroutine subroutine ext_acc_copyout(ptrs, stream) - use openacc + use openacc, only: acc_async_sync, acc_copyout implicit none type(ext_acc_arr_desc), intent(in) :: ptrs(:) integer(acc_handle_kind), optional :: stream @@ -328,7 +328,7 @@ subroutine ext_acc_copyout(ptrs, stream) enddo end subroutine subroutine ext_acc_delete(ptrs, stream) - use openacc + use openacc, only: acc_async_sync implicit none type(ext_acc_arr_desc), intent(in) :: ptrs(:) integer(acc_handle_kind), optional :: stream diff --git a/src/trans/gpu/internal/field_split_mod.F90 b/src/trans/gpu/internal/field_split_mod.F90 index 20719e6c1..cb9664782 100755 --- a/src/trans/gpu/internal/field_split_mod.F90 +++ b/src/trans/gpu/internal/field_split_mod.F90 @@ -58,11 +58,9 @@ SUBROUTINE FIELD_SPLIT(KBLK,KF_GP,KKF_UV_G,KVSETUV,KVSETSC,& ! Original : 01-01-03 ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM - -USE TPM_GEN ,ONLY : NPROMATR -!USE TPM_TRANS -USE TPM_DISTR ,ONLY : MYSETV, NPRTRV +USE PARKIND1, ONLY: JPIM +USE TPM_GEN, ONLY: NPROMATR +USE TPM_DISTR, ONLY: MYSETV, NPRTRV ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/fsc_mod.F90 b/src/trans/gpu/internal/fsc_mod.F90 index 6c0a2d162..d5a06641b 100755 --- a/src/trans/gpu/internal/fsc_mod.F90 +++ b/src/trans/gpu/internal/fsc_mod.F90 @@ -10,7 +10,9 @@ ! MODULE FSC_MOD - USE BUFFERED_ALLOCATOR_MOD + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE TPM_DISTR, ONLY: D IMPLICIT NONE PRIVATE @@ -21,9 +23,6 @@ MODULE FSC_MOD CONTAINS FUNCTION PREPARE_FSC(ALLOCATOR) RESULT(HFSC) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT - USE TPM_DISTR, ONLY: D - IMPLICIT NONE TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR @@ -65,14 +64,12 @@ SUBROUTINE FSC(ALLOCATOR,HFSC,PREEL_COMPLEX, KF_FS, KF_UV, KF_SCALARS, KUV_OFFSE ! ------------------------------------------------------------------ -USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT - -USE TPM_TRANS ,ONLY : LATLON -USE TPM_DISTR ,ONLY : D, MYSETW, MYPROC, NPROC, D_NUMP, D_NPTRLS, D_NSTAGTF -USE TPM_GEOMETRY ,ONLY : G_NMEN, G_NLOEN, G_NLOEN_MAX -USE TPM_FIELDS ,ONLY : F_RACTHE -USE TPM_GEN ,ONLY : NOUT -USE TPM_DIM ,ONLY : R_NSMAX +USE TPM_TRANS, ONLY: LATLON +USE TPM_DISTR, ONLY: MYSETW, MYPROC, NPROC, D_NUMP, D_NPTRLS, D_NSTAGTF +USE TPM_GEOMETRY, ONLY: G_NMEN, G_NLOEN, G_NLOEN_MAX +USE TPM_FIELDS, ONLY: F_RACTHE +USE TPM_GEN, ONLY: NOUT +USE TPM_DIM, ONLY: R_NSMAX ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/ftdir_mod.F90 b/src/trans/gpu/internal/ftdir_mod.F90 index b131947c7..da5910c77 100755 --- a/src/trans/gpu/internal/ftdir_mod.F90 +++ b/src/trans/gpu/internal/ftdir_mod.F90 @@ -10,7 +10,6 @@ ! MODULE FTDIR_MOD - USE BUFFERED_ALLOCATOR_MOD IMPLICIT NONE PRIVATE @@ -58,14 +57,15 @@ SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD) ! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW ! ------------------------------------------------------------------ - USE TPM_GEN ,ONLY : LSYNC_TRANS - USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT - - USE TPM_DISTR ,ONLY : MYSETW, MYPROC, NPROC, D_NSTAGT0B, D_NSTAGTF,D_NPTRLS, D_NPNTGTB0, D_NPROCM, D_NDGL_FS - USE TPM_GEOMETRY ,ONLY : G_NMEN, G_NLOEN - USE TPM_HICFFT ,ONLY : EXECUTE_DIR_FFT - USE MPL_MODULE ,ONLY : MPL_BARRIER - USE TPM_STATS, ONLY : GSTATS => GSTATS_NVTX + USE TPM_GEN, ONLY: LSYNC_TRANS + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE TPM_DISTR, ONLY: MYSETW, MYPROC, NPROC, D_NSTAGT0B, D_NSTAGTF,D_NPTRLS, & + & D_NPNTGTB0, D_NPROCM, D_NDGL_FS + USE TPM_GEOMETRY, ONLY: G_NMEN, G_NLOEN + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR + USE TPM_HICFFT, ONLY: EXECUTE_DIR_FFT + USE MPL_MODULE, ONLY: MPL_BARRIER + USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX IMPLICIT NONE diff --git a/src/trans/gpu/internal/ftinv_mod.F90 b/src/trans/gpu/internal/ftinv_mod.F90 index 77a8cf223..e0e3804e3 100755 --- a/src/trans/gpu/internal/ftinv_mod.F90 +++ b/src/trans/gpu/internal/ftinv_mod.F90 @@ -10,7 +10,7 @@ ! MODULE FTINV_MOD - USE BUFFERED_ALLOCATOR_MOD + USE BUFFERED_ALLOCATOR_MOD ,ONLY : BUFFERED_ALLOCATOR IMPLICIT NONE PRIVATE @@ -20,8 +20,6 @@ MODULE FTINV_MOD END TYPE CONTAINS FUNCTION PREPARE_FTINV(ALLOCATOR) RESULT(HFTINV) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT - IMPLICIT NONE TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR @@ -61,14 +59,13 @@ SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD) ! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW ! ------------------------------------------------------------------ - USE TPM_GEN ,ONLY : LSYNC_TRANS - USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT - - USE TPM_DISTR ,ONLY : MYSETW, MYPROC, NPROC, D_NPTRLS, D_NDGL_FS, D_NSTAGTF - USE TPM_GEOMETRY ,ONLY : G_NLOEN - USE TPM_HICFFT ,ONLY : EXECUTE_INV_FFT - USE MPL_MODULE ,ONLY : MPL_BARRIER - USE TPM_STATS, ONLY : GSTATS => GSTATS_NVTX + USE TPM_GEN, ONLY: LSYNC_TRANS + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE TPM_DISTR, ONLY: MYSETW, D_NPTRLS, D_NDGL_FS, D_NSTAGTF + USE TPM_GEOMETRY, ONLY: G_NLOEN + USE TPM_HICFFT, ONLY: EXECUTE_INV_FFT + USE MPL_MODULE, ONLY: MPL_BARRIER + USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX IMPLICIT NONE diff --git a/src/trans/gpu/internal/gath_grid_32_ctl_mod.F90 b/src/trans/gpu/internal/gath_grid_32_ctl_mod.F90 index 9af5b0180..0b1fd230d 100755 --- a/src/trans/gpu/internal/gath_grid_32_ctl_mod.F90 +++ b/src/trans/gpu/internal/gath_grid_32_ctl_mod.F90 @@ -32,16 +32,12 @@ SUBROUTINE GATH_GRID_32_CTL(PGPG,KFGATHG,KPROMA,KTO,PGP) ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRM -USE MPL_MODULE - -USE TPM_GEN -USE TPM_DIM -USE TPM_GEOMETRY -USE TPM_DISTR - -USE SET2PE_MOD -USE EQ_REGIONS_MOD +USE PARKIND1, ONLY: JPIM, JPRM +USE MPL_MODULE, ONLY: MPL_SEND, JP_NON_BLOCKING_STANDARD, MPL_RECV, JP_BLOCKING_STANDARD, & + & MPL_WAIT, MPL_ALLTOALLV +USE TPM_GEOMETRY, ONLY: G +USE TPM_DISTR, ONLY: D, NPROC, MYPROC, MTAGDISTSP, NPRCIDS +USE EQ_REGIONS_MOD, ONLY: N_REGIONS_NS, N_REGIONS IMPLICIT NONE diff --git a/src/trans/gpu/internal/gath_grid_ctl_mod.F90 b/src/trans/gpu/internal/gath_grid_ctl_mod.F90 index c8f5a3d4e..d6d9496a4 100755 --- a/src/trans/gpu/internal/gath_grid_ctl_mod.F90 +++ b/src/trans/gpu/internal/gath_grid_ctl_mod.F90 @@ -33,17 +33,13 @@ SUBROUTINE GATH_GRID_CTL(PGPG,KFGATHG,KPROMA,KTO,PGP) ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB -USE MPL_MODULE ,ONLY : MPL_ALLTOALLV, MPL_RECV, MPL_SEND, MPL_WAIT, & - & JP_BLOCKING_STANDARD, JP_NON_BLOCKING_STANDARD - -!USE TPM_GEN -!USE TPM_DIM -USE TPM_GEOMETRY ,ONLY : G -USE TPM_DISTR ,ONLY : D, MTAGDISTSP, NPRCIDS, MYPROC, NPROC - -USE SET2PE_MOD ,ONLY : SET2PE -USE EQ_REGIONS_MOD ,ONLY : N_REGIONS, N_REGIONS_NS +USE PARKIND1, ONLY: JPIM, JPRB +USE MPL_MODULE, ONLY: MPL_ALLTOALLV, MPL_RECV, MPL_SEND, MPL_WAIT, JP_BLOCKING_STANDARD, & + & JP_NON_BLOCKING_STANDARD +USE TPM_GEOMETRY, ONLY: G +USE TPM_DISTR, ONLY: D, MTAGDISTSP, NPRCIDS, MYPROC, NPROC +USE SET2PE_MOD, ONLY: SET2PE +USE EQ_REGIONS_MOD, ONLY: N_REGIONS, N_REGIONS_NS IMPLICIT NONE diff --git a/src/trans/gpu/internal/gath_spec_control_mod.F90 b/src/trans/gpu/internal/gath_spec_control_mod.F90 index 88f7d213c..da94be477 100755 --- a/src/trans/gpu/internal/gath_spec_control_mod.F90 +++ b/src/trans/gpu/internal/gath_spec_control_mod.F90 @@ -35,18 +35,12 @@ SUBROUTINE GATH_SPEC_CONTROL(PSPECG,KFGATHG,KTO,KVSET,PSPEC,LDIM1_IS_FLD,& ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB -USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, & - & JP_BLOCKING_STANDARD, JP_NON_BLOCKING_STANDARD - -!USE TPM_GEN -!USE TPM_DIM -USE TPM_DISTR ,ONLY : MTAGDISTSP, NPRCIDS, NPRTRW, & - & MYSETV, MYSETW, MYPROC, NPROC -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS - -USE SET2PE_MOD ,ONLY : SET2PE -!USE SUWAVEDI_MOD +USE PARKIND1, ONLY: JPIM, JPRB +USE MPL_MODULE, ONLY: MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, JP_BLOCKING_STANDARD, & + & JP_NON_BLOCKING_STANDARD +USE TPM_DISTR, ONLY: MTAGDISTSP, NPRCIDS, NPRTRW, MYSETV, MYSETW, MYPROC, NPROC +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE SET2PE_MOD, ONLY: SET2PE ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/gawl_mod.F90 b/src/trans/gpu/internal/gawl_mod.F90 index b42178f0e..a236638f4 100755 --- a/src/trans/gpu/internal/gawl_mod.F90 +++ b/src/trans/gpu/internal/gawl_mod.F90 @@ -61,9 +61,8 @@ SUBROUTINE GAWL(PFN,PL,PW,PEPS,KN,KITER,PMOD) ! F. Vana 05-Mar-2015 Support for single precision ! ------------------------------------------------------------------ -USE EC_PARKIND ,ONLY : JPRD, JPIM - -USE CPLEDN_MOD ,ONLY : CPLEDN +USE EC_PARKIND, ONLY: JPRD, JPIM +USE CPLEDN_MOD, ONLY: CPLEDN ! ------------------------------------------------------------------ diff --git a/src/trans/gpu/internal/inigptr_mod.F90 b/src/trans/gpu/internal/inigptr_mod.F90 index e1fcd7818..53a836431 100755 --- a/src/trans/gpu/internal/inigptr_mod.F90 +++ b/src/trans/gpu/internal/inigptr_mod.F90 @@ -15,13 +15,12 @@ SUBROUTINE INIGPTR(KGPTRSEND,KGPTRRECV) ! Compute tables to assist GP to/from Fourier space transpositions -USE PARKIND1 ,ONLY : JPIM ,JPRB - -USE TPM_GEN ,ONLY : NOUT -USE TPM_DISTR ,ONLY : D, NPRTRNS -USE TPM_TRANS ,ONLY : NGPBLKS, NPROMA -USE EQ_REGIONS_MOD ,ONLY : MY_REGION_EW -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE PARKIND1, ONLY: JPIM, JPRB +USE TPM_GEN, ONLY: NOUT +USE TPM_DISTR, ONLY: D, NPRTRNS +USE TPM_TRANS, ONLY: NGPBLKS, NPROMA +USE EQ_REGIONS_MOD, ONLY: MY_REGION_EW +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/inv_trans_ctl_mod.F90 b/src/trans/gpu/internal/inv_trans_ctl_mod.F90 index d00e53796..80fbc3932 100644 --- a/src/trans/gpu/internal/inv_trans_ctl_mod.F90 +++ b/src/trans/gpu/internal/inv_trans_ctl_mod.F90 @@ -85,20 +85,21 @@ SUBROUTINE INV_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_OUT_LT,& ! ------------------------------------------------------------------ - USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT, JPRD - USE ISO_C_BINDING, ONLY: C_INT8_T - - USE TPM_GEN ,ONLY : NPROMATR, NOUT - USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, GROWING_ALLOCATION - USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS - USE BUFFERED_ALLOCATOR_MOD - - USE TRMTOL_MOD - USE LTINV_MOD - USE TRMTOL_PACK_UNPACK - USE FSC_MOD - USE FTINV_MOD - USE TRLTOG_MOD + USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT, JPRD + USE ISO_C_BINDING, ONLY: C_INT8_T + USE TPM_GEN, ONLY: NPROMATR, NOUT + USE TPM_TRANS, ONLY: LDIVGP, LSCDERS, LUVDER, LVORGP, GROWING_ALLOCATION + USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, MAKE_BUFFERED_ALLOCATOR, & + & INSTANTIATE_ALLOCATOR + USE TRMTOL_MOD, ONLY: PREPARE_TRMTOL, TRMTOL_HANDLE, TRMTOL + USE LTINV_MOD, ONLY: PREPARE_LTINV, LTINV_HANDLE, LTINV + USE TRMTOL_PACK_UNPACK, ONLY: TRMTOL_PACK_HANDLE, TRMTOL_UNPACK_HANDLE, & + & PREPARE_TRMTOL_PACK, PREPARE_TRMTOL_UNPACK, TRMTOL_PACK, & + & TRMTOL_UNPACK + USE FSC_MOD, ONLY: FSC_HANDLE, PREPARE_FSC, FSC + USE FTINV_MOD, ONLY: FTINV_HANDLE, PREPARE_FTINV, FTINV + USE TRLTOG_MOD, ONLY: TRLTOG_HANDLE, PREPARE_TRLTOG, TRLTOG IMPLICIT NONE diff --git a/src/trans/gpu/internal/ledir_mod.F90 b/src/trans/gpu/internal/ledir_mod.F90 index 1effbae63..1e7700076 100755 --- a/src/trans/gpu/internal/ledir_mod.F90 +++ b/src/trans/gpu/internal/ledir_mod.F90 @@ -11,9 +11,8 @@ ! MODULE LEDIR_MOD - USE PARKIND_ECTRANS ,ONLY : JPIM -! USE TPM_TRANS, ONLY: LEDIR_CONFIG - USE BUFFERED_ALLOCATOR_MOD + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR IMPLICIT NONE PRIVATE @@ -23,9 +22,8 @@ MODULE LEDIR_MOD CONTAINS SUBROUTINE LEDIR_STRIDES(KF_FS,IOUT_STRIDES0,IOUT_SIZE,IIN_STRIDES0,IIN_SIZE,& IOUT0_STRIDES0,IOUT0_SIZE,IIN0_STRIDES0,IIN0_SIZE) - USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT, JPRD - USE TPM_DIM ,ONLY : R - USE TPM_DISTR, ONLY: D,D_OFFSETS_GEMM1,D_OFFSETS_GEMM2 + USE TPM_DIM, ONLY: R + USE TPM_DISTR, ONLY: D, D_OFFSETS_GEMM1, D_OFFSETS_GEMM2 IMPLICIT NONE @@ -96,20 +94,17 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) ! F. Vana 05-Mar-2015 Support for single precision ! ------------------------------------------------------------------ - USE TPM_GEN ,ONLY : LSYNC_TRANS - USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT, JPRD - USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK - USE TPM_DIM ,ONLY : R_NDGNH,R_NSMAX,R_NTMAX,R_NDGL - USE TPM_GEOMETRY ,ONLY : G_NDGLU - USE TPM_FIELDS ,ONLY : ZAA,ZAS,ZAA0,ZAS0,KMLOC0 - USE TPM_DISTR ,ONLY : D_NUMP,D_MYMS,D_OFFSETS_GEMM1,D_OFFSETS_GEMM2 - USE HICBLAS_MOD ,ONLY : HIP_GEMM_BATCHED, HIP_DGEMM_BATCHED_OVERLOAD, & - & HIP_DGEMM_GROUPED_OVERLOAD, HIP_SGEMM_GROUPED_OVERLOAD - USE MPL_MODULE ,ONLY : MPL_BARRIER - USE TPM_STATS, ONLY : GSTATS => GSTATS_NVTX - USE, INTRINSIC :: ISO_C_BINDING - USE IEEE_ARITHMETIC - USE OPENACC + USE TPM_GEN, ONLY: LSYNC_TRANS + USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK + USE TPM_DIM, ONLY: R_NDGNH,R_NSMAX,R_NTMAX,R_NDGL + USE TPM_GEOMETRY, ONLY: G_NDGLU + USE TPM_FIELDS, ONLY: ZAA,ZAS,ZAA0,ZAS0,KMLOC0 + USE TPM_DISTR, ONLY: D_NUMP, D_MYMS, D_OFFSETS_GEMM1, D_OFFSETS_GEMM2 + USE HICBLAS_MOD, ONLY: HIP_GEMM_BATCHED, HIP_DGEMM_BATCHED_OVERLOAD, & + & HIP_DGEMM_GROUPED_OVERLOAD, HIP_SGEMM_GROUPED_OVERLOAD + USE MPL_MODULE, ONLY: MPL_BARRIER + USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT #ifdef TRANS_SINGLE #define HIP_GEMM HIP_SGEMM_GROUPED_OVERLOAD diff --git a/src/trans/gpu/internal/leinv_mod.F90 b/src/trans/gpu/internal/leinv_mod.F90 index 3d9c56425..c18cd59a4 100755 --- a/src/trans/gpu/internal/leinv_mod.F90 +++ b/src/trans/gpu/internal/leinv_mod.F90 @@ -11,8 +11,8 @@ ! MODULE LEINV_MOD - USE PARKIND_ECTRANS ,ONLY : JPIM - USE BUFFERED_ALLOCATOR_MOD + USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT, JPRD + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR IMPLICIT NONE PRIVATE @@ -23,9 +23,8 @@ MODULE LEINV_MOD CONTAINS SUBROUTINE LEINV_STRIDES(KF_LEG,IOUT_STRIDES0,IOUT_SIZE,IIN_STRIDES0,IIN_SIZE,& IOUT0_STRIDES0,IOUT0_SIZE,IIN0_STRIDES0,IIN0_SIZE) - USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT, JPRD - USE TPM_DIM ,ONLY : R - USE TPM_DISTR, ONLY: D,D_OFFSETS_GEMM1,D_OFFSETS_GEMM2 + USE TPM_DIM, ONLY: R + USE TPM_DISTR, ONLY: D, D_OFFSETS_GEMM1, D_OFFSETS_GEMM2 IMPLICIT NONE @@ -94,25 +93,23 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) ! F. Vana 05-Mar-2015 Support for single precision ! ------------------------------------------------------------------ - USE TPM_GEN ,ONLY : LSYNC_TRANS - USE PARKIND_ECTRANS ,ONLY : JPIM,JPRB, JPRBT, JPRD - USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK - USE TPM_DIM ,ONLY : R_NDGNH,R_NSMAX, R_NDGL - USE TPM_GEOMETRY ,ONLY : G_NDGLU - USE TPM_FIELDS ,ONLY : ZAA,ZAS,ZAA0,ZAS0,KMLOC0 - USE TPM_DISTR ,ONLY : D_NUMP,D_MYMS,MYPROC,D_OFFSETS_GEMM1,D_OFFSETS_GEMM2 - USE HICBLAS_MOD ,ONLY : HIP_GEMM_BATCHED, HIP_DGEMM_BATCHED_OVERLOAD, & - & HIP_DGEMM_GROUPED_OVERLOAD, HIP_SGEMM_GROUPED_OVERLOAD + USE TPM_GEN, ONLY: LSYNC_TRANS + USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK + USE TPM_DIM, ONLY: R_NDGNH, R_NSMAX, R_NDGL + USE TPM_GEOMETRY, ONLY: G_NDGLU + USE TPM_FIELDS, ONLY: ZAA, ZAS, ZAA0, ZAS0, KMLOC0 + USE TPM_DISTR, ONLY: D_NUMP, D_MYMS, MYPROC, D_OFFSETS_GEMM1, D_OFFSETS_GEMM2 + USE HICBLAS_MOD, ONLY: HIP_GEMM_BATCHED, HIP_DGEMM_BATCHED_OVERLOAD, & + & HIP_DGEMM_GROUPED_OVERLOAD, HIP_SGEMM_GROUPED_OVERLOAD + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT + USE MPL_MODULE, ONLY: MPL_BARRIER + USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX #ifdef TRANS_SINGLE #define HIP_GEMM HIP_SGEMM_GROUPED_OVERLOAD #else #define HIP_GEMM HIP_DGEMM_GROUPED_OVERLOAD #endif - USE, INTRINSIC :: ISO_C_BINDING - USE MPL_MODULE ,ONLY : MPL_BARRIER - USE TPM_STATS, ONLY : GSTATS => GSTATS_NVTX - IMPLICIT NONE REAL(KIND=JPRB), INTENT(IN) :: PIA(:,:,:) diff --git a/src/trans/gpu/internal/ltdir_mod.F90 b/src/trans/gpu/internal/ltdir_mod.F90 index 3f3f2eb07..918c887a9 100755 --- a/src/trans/gpu/internal/ltdir_mod.F90 +++ b/src/trans/gpu/internal/ltdir_mod.F90 @@ -11,7 +11,8 @@ ! MODULE LTDIR_MOD - USE BUFFERED_ALLOCATOR_MOD + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRB, JPRD + USE BUFFERED_ALLOCATOR_MOD, ONLY: ALLOCATION_RESERVATION_HANDLE IMPLICIT NONE PRIVATE @@ -23,12 +24,11 @@ MODULE LTDIR_MOD CONTAINS FUNCTION PREPARE_LTDIR(ALLOCATOR, KF_FS, KF_UV) RESULT(HLTDIR) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD - USE TPM_DISTR, ONLY: D - USE TPM_DIM, ONLY: R - USE ISO_C_BINDING - USE LEDIR_MOD - USE BUFFERED_ALLOCATOR_MOD + USE TPM_DISTR, ONLY: D + USE TPM_DIM, ONLY: R + USE ISO_C_BINDING, ONLY: C_SIZE_T + USE LEDIR_MOD, ONLY: LEDIR_STRIDES + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE IMPLICIT NONE @@ -63,25 +63,21 @@ SUBROUTINE LTDIR(ALLOCATOR,HLTDIR,ZINPS,ZINPA,ZINPS0,ZINPA0,KF_FS,KF_UV,KF_SCALA & PSPSC3A,PSPSC3B,PSPSC2, & & KFLDPTRUV,KFLDPTRSC) - USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT, JPRD, JPRB - USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK - - USE TPM_DIM ,ONLY : R - USE TPM_DISTR ,ONLY : D - USE TPM_GEOMETRY - - USE PREPSNM_MOD ,ONLY : PREPSNM - USE LEDIR_MOD - USE UVTVD_MOD - USE UPDSP_MOD ,ONLY : UPDSP - USE UPDSPB_MOD ,ONLY : UPDSPB - USE MPL_MODULE ,ONLY : MPL_BARRIER - USE TPM_GEN ,ONLY : LSYNC_TRANS - USE TPM_TRANS ,ONLY : NF_SC2, NF_SC3A, NF_SC3B - USE TPM_STATS, ONLY : GSTATS => GSTATS_NVTX - USE BUFFERED_ALLOCATOR_MOD - USE ISO_C_BINDING, ONLY: C_SIZE_T, C_F_POINTER, C_LOC - + USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK + USE TPM_DIM, ONLY: R + USE TPM_DISTR, ONLY: D + USE TPM_GEOMETRY, ONLY: G + USE PREPSNM_MOD, ONLY: PREPSNM + USE LEDIR_MOD, ONLY: LEDIR_STRIDES, LEDIR + USE UVTVD_MOD, ONLY: UVTVD + USE UPDSP_MOD, ONLY: UPDSP + USE UPDSPB_MOD, ONLY: UPDSPB + USE MPL_MODULE, ONLY: MPL_BARRIER + USE TPM_GEN, ONLY: LSYNC_TRANS + USE TPM_TRANS, ONLY: NF_SC2, NF_SC3A, NF_SC3B + USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_F_POINTER, C_LOC !**** *LTDIR* - Control of Direct Legendre transform step diff --git a/src/trans/gpu/internal/ltinv_mod.F90 b/src/trans/gpu/internal/ltinv_mod.F90 index 861aea97b..54c1a19b1 100755 --- a/src/trans/gpu/internal/ltinv_mod.F90 +++ b/src/trans/gpu/internal/ltinv_mod.F90 @@ -11,7 +11,7 @@ ! MODULE LTINV_MOD - USE BUFFERED_ALLOCATOR_MOD + USE BUFFERED_ALLOCATOR_MOD, ONLY: ALLOCATION_RESERVATION_HANDLE IMPLICIT NONE @@ -25,12 +25,12 @@ MODULE LTINV_MOD CONTAINS FUNCTION PREPARE_LTINV(ALLOCATOR,KF_UV,KF_SCALARS,LVORGP,LDIVGP,LSCDERS) RESULT(HLTINV) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD - USE TPM_DISTR, ONLY: D - USE TPM_DIM, ONLY: R - USE ISO_C_BINDING - USE LEINV_MOD - USE BUFFERED_ALLOCATOR_MOD + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD + USE TPM_DISTR, ONLY: D + USE TPM_DIM, ONLY: R + USE ISO_C_BINDING, ONLY: C_SIZE_T + USE LEINV_MOD, ONLY: LEINV_STRIDES + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE IMPLICIT NONE @@ -101,25 +101,23 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,& & PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2, & & ZOUTS,ZOUTA,ZOUTS0,ZOUTA0) - USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT - USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK - - USE TPM_DIM ,ONLY : R - USE TPM_TRANS ,ONLY : LDIVGP, LVORGP, NF_SC2, NF_SC3A, NF_SC3B, LSCDERS - USE TPM_FLT - USE TPM_GEOMETRY - USE TPM_DISTR ,ONLY : D - USE PRFI1B_MOD ,ONLY : PRFI1B - USE VDTUV_MOD ,ONLY : VDTUV - USE SPNSDE_MOD ,ONLY : SPNSDE - USE LEINV_MOD - USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS - use ieee_arithmetic - USE TPM_FIELDS ,ONLY : F,ZEPSNM - USE MPL_MODULE ,ONLY : MPL_BARRIER - USE TPM_GEN ,ONLY : LSYNC_TRANS - USE TPM_STATS, ONLY : GSTATS => GSTATS_NVTX - + USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT, JPRD + USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK + USE TPM_DIM, ONLY: R + USE TPM_TRANS, ONLY: LDIVGP, LVORGP, NF_SC2, NF_SC3A, NF_SC3B, LSCDERS + USE TPM_GEOMETRY, ONLY: G + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION + USE TPM_DISTR, ONLY: D + USE PRFI1B_MOD, ONLY: PRFI1B + USE VDTUV_MOD, ONLY: VDTUV + USE SPNSDE_MOD, ONLY: SPNSDE + USE LEINV_MOD, ONLY: LEINV_STRIDES, LEINV + USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS + USE TPM_FIELDS, ONLY: F,ZEPSNM + USE MPL_MODULE, ONLY: MPL_BARRIER + USE TPM_GEN, ONLY: LSYNC_TRANS + USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_LOC !**** *LTINV* - Inverse Legendre transform ! diff --git a/src/trans/gpu/internal/myrecvset_mod.F90 b/src/trans/gpu/internal/myrecvset_mod.F90 index 093323f8e..fd01109c5 100755 --- a/src/trans/gpu/internal/myrecvset_mod.F90 +++ b/src/trans/gpu/internal/myrecvset_mod.F90 @@ -52,8 +52,8 @@ FUNCTION MYRECVSET(KSETS,KMYSET,KSET) ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE PARKIND1, ONLY: JPIM +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/mysendset_mod.F90 b/src/trans/gpu/internal/mysendset_mod.F90 index 636025e3d..59be163b6 100755 --- a/src/trans/gpu/internal/mysendset_mod.F90 +++ b/src/trans/gpu/internal/mysendset_mod.F90 @@ -50,8 +50,8 @@ FUNCTION MYSENDSET(KSETS,KMYSET,KSET) ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE PARKIND1, ONLY: JPIM +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS IMPLICIT NONE INTEGER(KIND=JPIM) :: MYSENDSET diff --git a/src/trans/gpu/internal/parkind_ectrans.F90 b/src/trans/gpu/internal/parkind_ectrans.F90 index d2d880333..cae345bbb 100644 --- a/src/trans/gpu/internal/parkind_ectrans.F90 +++ b/src/trans/gpu/internal/parkind_ectrans.F90 @@ -12,7 +12,7 @@ MODULE PARKIND_ECTRANS ! Re-export precision-related symbols defined in fiat / parkind1, ! and add ECTRANS-internal precision-related symbols -USE PARKIND1 +USE PARKIND1 ! Import everything from PARKIND1 ! IMPLICIT NONE SAVE diff --git a/src/trans/gpu/internal/pe2set_mod.F90 b/src/trans/gpu/internal/pe2set_mod.F90 index 9a8ce8d57..c430b7506 100755 --- a/src/trans/gpu/internal/pe2set_mod.F90 +++ b/src/trans/gpu/internal/pe2set_mod.F90 @@ -12,7 +12,6 @@ MODULE PE2SET_MOD CONTAINS SUBROUTINE PE2SET(KPE,KPRGPNS,KPRGPEW,KPRTRW,KPRTRV) - !**** *PE2SET* - Convert from PE number to set numbers ! Purpose. @@ -70,11 +69,10 @@ SUBROUTINE PE2SET(KPE,KPRGPNS,KPRGPEW,KPRTRW,KPRTRV) ! Revision : 98-10-13 row ordering ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB - -USE TPM_DISTR ,ONLY : LEQ_REGIONS, NPRGPEW, NPROC, NPRTRV -USE EQ_REGIONS_MOD ,ONLY : N_REGIONS, N_REGIONS_NS -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE PARKIND1, ONLY: JPIM, JPRB +USE TPM_DISTR, ONLY: LEQ_REGIONS, NPRGPEW, NPROC, NPRTRV +USE EQ_REGIONS_MOD, ONLY: N_REGIONS, N_REGIONS_NS +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/pre_suleg_mod.F90 b/src/trans/gpu/internal/pre_suleg_mod.F90 index 024341091..82fded5fd 100755 --- a/src/trans/gpu/internal/pre_suleg_mod.F90 +++ b/src/trans/gpu/internal/pre_suleg_mod.F90 @@ -12,12 +12,12 @@ MODULE PRE_SULEG_MOD IMPLICIT NONE CONTAINS SUBROUTINE PRE_SULEG -USE PARKIND1 ,ONLY : JPRD, JPIM -USE TPM_GEN ,ONLY : NPRINTLEV,NOUT -USE TPM_DIM ,ONLY : R -USE TPM_CONSTANTS ,ONLY: RA -USE TPM_DISTR ,ONLY : D -USE TPM_FIELDS,ONLY : F +USE PARKIND1, ONLY: JPRD, JPIM +USE TPM_GEN, ONLY: NPRINTLEV, NOUT +USE TPM_DIM, ONLY: R +USE TPM_CONSTANTS, ONLY: RA +USE TPM_DISTR, ONLY: D +USE TPM_FIELDS, ONLY: F INTEGER(KIND=JPIM) :: IM, ICOUNT,JMLOC,JN LOGICAL :: LLP1,LLP2 diff --git a/src/trans/gpu/internal/prepsnm_mod.F90 b/src/trans/gpu/internal/prepsnm_mod.F90 index 0f79edbfc..b6f165df7 100755 --- a/src/trans/gpu/internal/prepsnm_mod.F90 +++ b/src/trans/gpu/internal/prepsnm_mod.F90 @@ -50,12 +50,10 @@ SUBROUTINE PREPSNM ! ------------------------------------------------------------------ - USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT - - USE TPM_DIM ,ONLY : R - USE TPM_FIELDS ,ONLY : F, ZEPSNM - USE TPM_DISTR ,ONLY : D - USE TPM_GEN ,ONLY : NOUT + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE TPM_DIM, ONLY: R + USE TPM_FIELDS, ONLY: F, ZEPSNM + USE TPM_DISTR, ONLY: D ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/prfi1_mod.F90 b/src/trans/gpu/internal/prfi1_mod.F90 index ca07a7f00..38661d24c 100755 --- a/src/trans/gpu/internal/prfi1_mod.F90 +++ b/src/trans/gpu/internal/prfi1_mod.F90 @@ -13,12 +13,8 @@ MODULE PRFI1_MOD SUBROUTINE PRFI1(KM,KF_UV,KF_SCALARS,PIA,PSPVOR,PSPDIV,PSPSCALAR,& & KFLDPTRUV,KFLDPTRSC) -USE PARKIND1 ,ONLY : JPIM ,JPRB - -!USE TPM_DISTR -!USE TPM_TRANS - -USE PRFI1B_MOD ,ONLY : PRFI1B +USE PARKIND1, ONLY: JPIM, JPRB +USE PRFI1B_MOD, ONLY: PRFI1B !**** *PRFI1* - Prepare spectral fields for inverse Legendre transform diff --git a/src/trans/gpu/internal/prfi1b_mod.F90 b/src/trans/gpu/internal/prfi1b_mod.F90 index 73098879b..8c0fe9a34 100755 --- a/src/trans/gpu/internal/prfi1b_mod.F90 +++ b/src/trans/gpu/internal/prfi1b_mod.F90 @@ -13,12 +13,9 @@ MODULE PRFI1B_MOD CONTAINS SUBROUTINE PRFI1B(PIA,PSPEC,KFIELDS,KDIM,KFLDPTR) - USE PARKIND1 ,ONLY : JPIM ,JPRB - - USE TPM_GEN ,ONLY : NOUT - USE TPM_DIM ,ONLY : R,R_NSMAX - USE TPM_DISTR ,ONLY : D,D_NUMP,D_MYMS,D_NASM0 - USE IEEE_ARITHMETIC + USE PARKIND1, ONLY: JPIM, JPRB + USE TPM_DIM, ONLY: R, R_NSMAX + USE TPM_DISTR, ONLY: D, D_NUMP, D_MYMS, D_NASM0 !**** *PRFI1* - Prepare spectral fields for inverse Legendre transform diff --git a/src/trans/gpu/internal/read_legpol_mod.F90 b/src/trans/gpu/internal/read_legpol_mod.F90 index 7f145c6e6..7d0f5853d 100755 --- a/src/trans/gpu/internal/read_legpol_mod.F90 +++ b/src/trans/gpu/internal/read_legpol_mod.F90 @@ -11,16 +11,17 @@ MODULE READ_LEGPOL_MOD CONTAINS SUBROUTINE READ_LEGPOL -USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT ,JPRD -USE TPM_GEN -USE TPM_DISTR -USE TPM_DIM -USE TPM_GEOMETRY -USE TPM_FLT -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS -USE TPM_CTL -USE BYTES_IO_MOD -USE SHAREDMEM_MOD + +USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT +USE TPM_GEN, ONLY: NERR +USE TPM_DISTR, ONLY: D, NPRTRV +USE TPM_DIM, ONLY: R +USE TPM_GEOMETRY, ONLY: G +USE TPM_FLT, ONLY: S +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE TPM_CTL, ONLY: C +USE BYTES_IO_MOD, ONLY: BYTES_IO_READ, JPBYTES_IO_SUCCESS, BYTES_IO_CLOSE, BYTES_IO_OPEN +USE SHAREDMEM_MOD, ONLY: SHAREDMEM_ASSOCIATE !**** *READ_LEGPOL * - read in Leg.Pol. and assocciated arrays from file or memory segment diff --git a/src/trans/gpu/internal/set2pe_mod.F90 b/src/trans/gpu/internal/set2pe_mod.F90 index c7f69d31a..380a27698 100755 --- a/src/trans/gpu/internal/set2pe_mod.F90 +++ b/src/trans/gpu/internal/set2pe_mod.F90 @@ -67,11 +67,10 @@ SUBROUTINE SET2PE(KPE,KPRGPNS,KPRGPEW,KPRTRW,KPRTRV) ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM - -USE TPM_DISTR ,ONLY : LEQ_REGIONS, NPRGPEW, NPRGPNS, NPRTRV, NPRTRW -USE EQ_REGIONS_MOD ,ONLY : N_REGIONS, N_REGIONS_NS -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE PARKIND1, ONLY: JPIM +USE TPM_DISTR, ONLY: LEQ_REGIONS, NPRGPEW, NPRGPNS, NPRTRV, NPRTRW +USE EQ_REGIONS_MOD , ONLY: N_REGIONS, N_REGIONS_NS +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/set_resol_mod.F90 b/src/trans/gpu/internal/set_resol_mod.F90 index 9fb18ac66..f1ac30cff 100755 --- a/src/trans/gpu/internal/set_resol_mod.F90 +++ b/src/trans/gpu/internal/set_resol_mod.F90 @@ -12,19 +12,17 @@ MODULE SET_RESOL_MOD CONTAINS SUBROUTINE SET_RESOL(KRESOL,LDSETUP) -USE PARKIND1 ,ONLY : JPIM - -USE TPM_GEN ,ONLY : NOUT, MSETUP0, NCUR_RESOL, NMAX_RESOL,LENABLED -USE TPM_DIM ,ONLY : R, DIM_RESOL -!USE TPM_TRANS -USE TPM_DISTR ,ONLY : D, DISTR_RESOL -USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL -USE TPM_FIELDS ,ONLY : F, FIELDS_RESOL -USE TPM_FFT ,ONLY : T, FFT_RESOL -USE TPM_HICFFT ,ONLY : HICT, HICFFT_RESOL -USE TPM_FLT -USE TPM_CTL ,ONLY : C, CTL_RESOL -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE PARKIND1, ONLY: JPIM +USE TPM_GEN, ONLY: NOUT, MSETUP0, NCUR_RESOL, NMAX_RESOL, LENABLED +USE TPM_DIM, ONLY: R, DIM_RESOL +USE TPM_DISTR, ONLY: D, DISTR_RESOL +USE TPM_GEOMETRY, ONLY: G, GEOM_RESOL +USE TPM_FIELDS, ONLY: F, FIELDS_RESOL +USE TPM_FFT, ONLY: T, FFT_RESOL +USE TPM_HICFFT, ONLY: HICT, HICFFT_RESOL +USE TPM_FLT, ONLY: S, FLT_RESOL +USE TPM_CTL, ONLY: C, CTL_RESOL +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/setup_dims_mod.F90 b/src/trans/gpu/internal/setup_dims_mod.F90 index c0277d3d6..db6e47bfb 100755 --- a/src/trans/gpu/internal/setup_dims_mod.F90 +++ b/src/trans/gpu/internal/setup_dims_mod.F90 @@ -12,10 +12,9 @@ MODULE SETUP_DIMS_MOD CONTAINS SUBROUTINE SETUP_DIMS -USE PARKIND1 ,ONLY : JPIM - -USE TPM_DIM ,ONLY : R -USE TPM_FLT ,ONLY : S +USE PARKIND1, ONLY: JPIM +USE TPM_DIM, ONLY: R +USE TPM_FLT, ONLY: S ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/setup_geom_mod.F90 b/src/trans/gpu/internal/setup_geom_mod.F90 index 9c688302d..12d89af51 100755 --- a/src/trans/gpu/internal/setup_geom_mod.F90 +++ b/src/trans/gpu/internal/setup_geom_mod.F90 @@ -12,13 +12,12 @@ MODULE SETUP_GEOM_MOD CONTAINS SUBROUTINE SETUP_GEOM -USE PARKIND1 ,ONLY : JPRD, JPIM - -USE TPM_GEN ,ONLY : NOUT, NPRINTLEV -USE TPM_DIM ,ONLY : R -USE TPM_DISTR ,ONLY : D -USE TPM_FIELDS ,ONLY : F -USE TPM_GEOMETRY ,ONLY : G +USE PARKIND1, ONLY: JPRD, JPIM +USE TPM_GEN, ONLY: NOUT, NPRINTLEV +USE TPM_DIM, ONLY: R +USE TPM_DISTR, ONLY: D +USE TPM_FIELDS, ONLY: F +USE TPM_GEOMETRY, ONLY: G ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/shuffle_mod.F90 b/src/trans/gpu/internal/shuffle_mod.F90 index 5cfd1738c..8ef2ee06c 100755 --- a/src/trans/gpu/internal/shuffle_mod.F90 +++ b/src/trans/gpu/internal/shuffle_mod.F90 @@ -55,11 +55,8 @@ SUBROUTINE SHUFFLE(KF_UV_G,KF_SCALARS_G,KSHFUV_G,KIVSETUV,KSHFSC_G,KIVSETSC,& ! Original : 01-01-03 ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM - -!USE TPM_GEN -!USE TPM_TRANS -USE TPM_DISTR ,ONLY : NPRTRV +USE PARKIND1, ONLY: JPIM +USE TPM_DISTR, ONLY: NPRTRV ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/spnorm_ctl_mod.F90 b/src/trans/gpu/internal/spnorm_ctl_mod.F90 index 6d1025599..aa05c9327 100755 --- a/src/trans/gpu/internal/spnorm_ctl_mod.F90 +++ b/src/trans/gpu/internal/spnorm_ctl_mod.F90 @@ -12,13 +12,11 @@ MODULE SPNORM_CTL_MOD CONTAINS SUBROUTINE SPNORM_CTL(PNORM,PSPEC,KFLD,KFLD_G,KVSET,KMASTER,PMET) -USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT - -USE TPM_DIM ,ONLY : R -USE TPM_DISTR ,ONLY : D, MYPROC, MYSETV - -USE SPNORMD_MOD ,ONLY : SPNORMD -USE SPNORMC_MOD ,ONLY : SPNORMC +USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT +USE TPM_DIM, ONLY: R +USE TPM_DISTR, ONLY: D, MYPROC, MYSETV +USE SPNORMD_MOD, ONLY: SPNORMD +USE SPNORMC_MOD, ONLY: SPNORMC ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/spnormc_mod.F90 b/src/trans/gpu/internal/spnormc_mod.F90 index 337685cdd..397934351 100755 --- a/src/trans/gpu/internal/spnormc_mod.F90 +++ b/src/trans/gpu/internal/spnormc_mod.F90 @@ -12,13 +12,10 @@ MODULE SPNORMC_MOD CONTAINS SUBROUTINE SPNORMC(PSM,KFLD_G,KVSET,KMASTER,KSMAX,PGM) - -USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT -USE MPL_MODULE ,ONLY : MPL_RECV, MPL_SEND, MPL_BARRIER - -USE TPM_DISTR ,ONLY : D, NPRCIDS, NPRTRV, MYPROC, NPROC - -USE PE2SET_MOD ,ONLY : PE2SET +USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT +USE MPL_MODULE, ONLY: MPL_RECV, MPL_SEND, MPL_BARRIER +USE TPM_DISTR, ONLY: D, NPRCIDS, NPRTRV, MYPROC, NPROC +USE PE2SET_MOD, ONLY: PE2SET IMPLICIT NONE diff --git a/src/trans/gpu/internal/spnormd_mod.F90 b/src/trans/gpu/internal/spnormd_mod.F90 index 77aafd3f5..e6419d71e 100755 --- a/src/trans/gpu/internal/spnormd_mod.F90 +++ b/src/trans/gpu/internal/spnormd_mod.F90 @@ -12,10 +12,9 @@ MODULE SPNORMD_MOD CONTAINS SUBROUTINE SPNORMD(PSPEC,KFLD,PMET,PSM) -USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT - -USE TPM_DIM ,ONLY : R -USE TPM_DISTR ,ONLY : D +USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT +USE TPM_DIM, ONLY: R +USE TPM_DISTR, ONLY: D ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/spnsde_mod.F90 b/src/trans/gpu/internal/spnsde_mod.F90 index 5354728a3..dea51b549 100755 --- a/src/trans/gpu/internal/spnsde_mod.F90 +++ b/src/trans/gpu/internal/spnsde_mod.F90 @@ -13,14 +13,10 @@ MODULE SPNSDE_MOD CONTAINS SUBROUTINE SPNSDE(KF_SCALARS,PEPSNM,PF,PNSD) -USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT - -USE TPM_GEN ,ONLY : NOUT -USE TPM_DIM ,ONLY : R, R_NTMAX -USE TPM_DISTR ,ONLY : D, D_MYMS, D_NUMP -USE TPM_FIELDS ,ONLY : ZEPSNM -!USE TPM_TRANS - +USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT +USE TPM_DIM, ONLY: R, R_NTMAX +USE TPM_DISTR, ONLY: D, D_MYMS, D_NUMP +USE TPM_FIELDS, ONLY: ZEPSNM !**** *SPNSDE* - Compute North-South derivative in spectral space diff --git a/src/trans/gpu/internal/sufft_mod.F90 b/src/trans/gpu/internal/sufft_mod.F90 index 707243a85..b279f7278 100755 --- a/src/trans/gpu/internal/sufft_mod.F90 +++ b/src/trans/gpu/internal/sufft_mod.F90 @@ -13,14 +13,11 @@ MODULE SUFFT_MOD CONTAINS SUBROUTINE SUFFT - USE PARKIND1 ,ONLY : JPIM - - USE TPM_DIM ,ONLY : R - USE TPM_GEN ,ONLY : NOUT, NPRINTLEV - USE TPM_DISTR ,ONLY : D, MYSETW - USE TPM_GEOMETRY ,ONLY : G - USE TPM_FFT ,ONLY : T - USE TPM_HICFFT ,ONLY : HICT, INIT_PLANS_FFT + USE PARKIND1, ONLY: JPIM + USE TPM_DIM, ONLY: R + USE TPM_GEN, ONLY: NOUT, NPRINTLEV + USE TPM_DISTR, ONLY: D + USE TPM_HICFFT, ONLY: INIT_PLANS_FFT ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/sugaw_mod.F90 b/src/trans/gpu/internal/sugaw_mod.F90 index ef9b892fb..c5752fc97 100755 --- a/src/trans/gpu/internal/sugaw_mod.F90 +++ b/src/trans/gpu/internal/sugaw_mod.F90 @@ -12,16 +12,14 @@ MODULE SUGAW_MOD CONTAINS SUBROUTINE SUGAW(KDGL,KM,KN,PL,PW,PANM,PFN) -USE PARKIND1 ,ONLY : JPRD, JPIM -USE PARKIND2 ,ONLY : JPRH - -USE TPM_CONSTANTS ,ONLY : RA - -USE TPM_GEN ,ONLY : NOUT -USE GAWL_MOD ,ONLY : GAWL -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS -USE SUPOLF_MOD -USE TPM_POL +USE PARKIND1, ONLY: JPRD, JPIM +USE PARKIND2, ONLY: JPRH +USE TPM_CONSTANTS, ONLY: RA +USE TPM_GEN, ONLY: NOUT +USE GAWL_MOD, ONLY: GAWL +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE SUPOLF_MOD, ONLY: SUPOLF +USE TPM_POL, ONLY: DDI !**** *SUGAW * - Routine to initialize the Gaussian ! abcissa and the associated weights diff --git a/src/trans/gpu/internal/suleg_mod.F90 b/src/trans/gpu/internal/suleg_mod.F90 index 09ea4ce55..3f90e62ad 100755 --- a/src/trans/gpu/internal/suleg_mod.F90 +++ b/src/trans/gpu/internal/suleg_mod.F90 @@ -16,33 +16,31 @@ MODULE SULEG_MOD SUBROUTINE SULEG !DEC$ OPTIMIZE:1 -USE PARKIND_ECTRANS ,ONLY : JPRD, JPIM, JPRBT -USE PARKIND2 ,ONLY : JPRH -USE MPL_MODULE - -USE TPM_GEN -USE TPM_DIM -USE TPM_CONSTANTS -USE TPM_DISTR -USE TPM_FIELDS ,ONLY : FIELDS_RESOL, F -USE TPM_FLT -USE TPM_GEOMETRY -USE TPM_CTL -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS - -USE PRE_SULEG_MOD -USE SUGAW_MOD -USE SUPOL_MOD -USE SUPOLF_MOD -USE TPM_POL -USE SUTRLE_MOD -USE SETUP_GEOM_MOD -USE SEEFMM_MIX -USE SET2PE_MOD -USE ABORT_TRANS_MOD -USE PREPSNM_MOD ,ONLY : PREPSNM -USE WRITE_LEGPOL_MOD -USE READ_LEGPOL_MOD +USE PARKIND_ECTRANS, ONLY: JPRD, JPIM, JPRBT +USE PARKIND2, ONLY: JPRH +USE MPL_MODULE, ONLY: MPL_BYTES, MPL_BARRIER, JP_NON_BLOCKING_STANDARD, MPL_RECV, MPL_SEND, & + & MPL_WAIT +USE TPM_GEN, ONLY: NOUT, LMPOFF, NPRINTLEV +USE TPM_DIM, ONLY: R +USE TPM_CONSTANTS, ONLY: RA +USE TPM_DISTR, ONLY: NPRTRV, NPRTRW, NPROC, D, MTAGLETR, MYSETV, MYSETW, NPRCIDS +USE TPM_FIELDS, ONLY: F +USE TPM_FLT, ONLY: S +USE TPM_GEOMETRY, ONLY: G +USE TPM_CTL, ONLY: C +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE PRE_SULEG_MOD, ONLY: PRE_SULEG +USE SUGAW_MOD, ONLY: SUGAW +USE SUPOL_MOD, ONLY: SUPOL +USE SUPOLF_MOD, ONLY: SUPOLF +USE TPM_POL, ONLY: INI_POL, END_POL +USE SUTRLE_MOD, ONLY: SUTRLE +USE SETUP_GEOM_MOD, ONLY: SETUP_GEOM +USE SEEFMM_MIX, ONLY: SETUP_SEEFMM +USE SET2PE_MOD, ONLY: SET2PE +USE PREPSNM_MOD, ONLY: PREPSNM +USE WRITE_LEGPOL_MOD, ONLY: WRITE_LEGPOL +USE READ_LEGPOL_MOD, ONLY: READ_LEGPOL !**** *SULEG * - initialize the Legendre polynomials diff --git a/src/trans/gpu/internal/sump_trans0_mod.F90 b/src/trans/gpu/internal/sump_trans0_mod.F90 index e61269e70..aa8b94926 100755 --- a/src/trans/gpu/internal/sump_trans0_mod.F90 +++ b/src/trans/gpu/internal/sump_trans0_mod.F90 @@ -14,20 +14,16 @@ SUBROUTINE SUMP_TRANS0 ! Set up distributed environment for the transform package (part 0) -USE PARKIND1 ,ONLY : JPIM -USE MPL_MODULE ,ONLY : MPL_GROUPS_CREATE, MPL_MYRANK, MPL_NPROC - -USE TPM_GEN ,ONLY : NOUT, LMPOFF, NPRINTLEV -USE TPM_DISTR ,ONLY : LEQ_REGIONS, MTAGDISTGP, MTAGDISTSP, MTAGGL, & - & MTAGLETR, MTAGLG, MTAGLM, MTAGML, MTAGPART, & - & MYSETV, MYSETW, NPRCIDS, & - & NPRGPEW, NPRGPNS, NPRTRNS, NPRTRV, NPRTRW, & - & MYPROC, NPROC - -USE EQ_REGIONS_MOD ,ONLY : EQ_REGIONS, MY_REGION_EW, MY_REGION_NS, & - & N_REGIONS, N_REGIONS_EW, N_REGIONS_NS -USE PE2SET_MOD ,ONLY : PE2SET -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE PARKIND1, ONLY: JPIM +USE MPL_MODULE, ONLY: MPL_GROUPS_CREATE, MPL_MYRANK, MPL_NPROC +USE TPM_GEN, ONLY: NOUT, LMPOFF, NPRINTLEV +USE TPM_DISTR, ONLY: LEQ_REGIONS, MTAGDISTGP, MTAGDISTSP, MTAGGL, MTAGLETR, MTAGLG, MTAGLM, & + & MTAGML, MTAGPART, MYSETV, MYSETW, NPRCIDS, NPRGPEW, NPRGPNS, NPRTRNS, & + & NPRTRV, NPRTRW, MYPROC, NPROC +USE EQ_REGIONS_MOD, ONLY: EQ_REGIONS, MY_REGION_EW, MY_REGION_NS, N_REGIONS, N_REGIONS_EW, & + & N_REGIONS_NS +USE PE2SET_MOD, ONLY: PE2SET +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS IMPLICIT NONE diff --git a/src/trans/gpu/internal/sump_trans_mod.F90 b/src/trans/gpu/internal/sump_trans_mod.F90 index 64c907f4f..074013786 100755 --- a/src/trans/gpu/internal/sump_trans_mod.F90 +++ b/src/trans/gpu/internal/sump_trans_mod.F90 @@ -19,22 +19,17 @@ SUBROUTINE SUMP_TRANS ! Modifications : ! P.Marguinaud : 11-Sep-2012 : Fix twice allocated pointer -USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT, JPRD - -USE TPM_GEN ,ONLY : NOUT, NPRINTLEV -USE TPM_DIM ,ONLY : R -USE TPM_GEOMETRY ,ONLY : G -USE TPM_DISTR ,ONLY : D, LEQ_REGIONS, MYSETW, NPRTRNS, NPRTRW, NPROC, MYPROC - -!USE SUWAVEDI_MOD -!USE PE2SET_MOD -USE SUMPLATF_MOD ,ONLY : SUMPLATF -USE SUMPLAT_MOD ,ONLY : SUMPLAT -USE SUSTAONL_MOD ,ONLY : SUSTAONL -USE MYSENDSET_MOD ,ONLY : MYSENDSET -USE MYRECVSET_MOD ,ONLY : MYRECVSET -USE EQ_REGIONS_MOD ,ONLY : MY_REGION_NS, MY_REGION_EW, & - & N_REGIONS, N_REGIONS_EW, N_REGIONS_NS +USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD +USE TPM_GEN, ONLY: NOUT, NPRINTLEV +USE TPM_DIM, ONLY: R +USE TPM_GEOMETRY, ONLY: G +USE TPM_DISTR, ONLY: D, LEQ_REGIONS, MYSETW, NPRTRNS, NPRTRW, NPROC, MYPROC +USE SUMPLATF_MOD, ONLY: SUMPLATF +USE SUMPLAT_MOD, ONLY: SUMPLAT +USE SUSTAONL_MOD, ONLY: SUSTAONL +USE MYSENDSET_MOD, ONLY: MYSENDSET +USE MYRECVSET_MOD, ONLY: MYRECVSET +USE EQ_REGIONS_MOD, ONLY: MY_REGION_NS, MY_REGION_EW, N_REGIONS, N_REGIONS_EW, N_REGIONS_NS ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/sump_trans_preleg_mod.F90 b/src/trans/gpu/internal/sump_trans_preleg_mod.F90 index 78038f4ef..0b342f6db 100755 --- a/src/trans/gpu/internal/sump_trans_preleg_mod.F90 +++ b/src/trans/gpu/internal/sump_trans_preleg_mod.F90 @@ -14,14 +14,11 @@ SUBROUTINE SUMP_TRANS_PRELEG ! Set up distributed environment for the transform package (part 1) -USE PARKIND1 ,ONLY : JPIM - -USE TPM_GEN ,ONLY : NOUT, NPRINTLEV -USE TPM_DIM ,ONLY : R -USE TPM_DISTR ,ONLY : D, NPRTRW, NPRTRV, MYSETW - -USE SUWAVEDI_MOD ,ONLY : SUWAVEDI -!USE ABORT_TRANS_MOD +USE PARKIND1, ONLY: JPIM +USE TPM_GEN, ONLY: NOUT, NPRINTLEV +USE TPM_DIM, ONLY: R +USE TPM_DISTR, ONLY: D, NPRTRW, NPRTRV, MYSETW +USE SUWAVEDI_MOD, ONLY: SUWAVEDI ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/sumplat_mod.F90 b/src/trans/gpu/internal/sumplat_mod.F90 index effffa239..4b444d333 100755 --- a/src/trans/gpu/internal/sumplat_mod.F90 +++ b/src/trans/gpu/internal/sumplat_mod.F90 @@ -88,14 +88,12 @@ SUBROUTINE SUMPLAT(KDGL,KPROC,KPROCA,KMYSETA,LDSPLIT,LDEQ_REGIONS,& ! not lelam features in new routine sumplatb.F ! ------------------------------------------------------------------ -USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT - -USE TPM_GEOMETRY ,ONLY : G -USE TPM_DISTR ,ONLY : MYPROC - -USE SUMPLATB_MOD ,ONLY : SUMPLATB -USE SUMPLATBEQ_MOD ,ONLY : SUMPLATBEQ -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT +USE TPM_GEOMETRY, ONLY: G +USE TPM_DISTR, ONLY: MYPROC +USE SUMPLATB_MOD, ONLY: SUMPLATB +USE SUMPLATBEQ_MOD, ONLY: SUMPLATBEQ +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/sumplatb_mod.F90 b/src/trans/gpu/internal/sumplatb_mod.F90 index fb5033acf..ab2954f0c 100755 --- a/src/trans/gpu/internal/sumplatb_mod.F90 +++ b/src/trans/gpu/internal/sumplatb_mod.F90 @@ -66,10 +66,8 @@ SUBROUTINE SUMPLATB(KDGSA,KDGL,KPROCA,KLOENG,LDSPLIT,LDFOURIER,& ! ------------------------------------------------------------------ -USE PARKIND_ECTRANS ,ONLY : JPIM, JPIB, JPRBT - -USE TPM_DISTR -USE ABORT_TRANS_MOD +USE PARKIND_ECTRANS, ONLY: JPIM, JPIB, JPRBT +!USE TPM_DISTR IMPLICIT NONE diff --git a/src/trans/gpu/internal/sumplatbeq_mod.F90 b/src/trans/gpu/internal/sumplatbeq_mod.F90 index 173877368..88703bce0 100755 --- a/src/trans/gpu/internal/sumplatbeq_mod.F90 +++ b/src/trans/gpu/internal/sumplatbeq_mod.F90 @@ -72,11 +72,10 @@ SUBROUTINE SUMPLATBEQ(KDGSA,KDGL,KPROC,KPROCA,KLOENG,LDSPLIT,LDEQ_REGIONS,& ! ------------------------------------------------------------------ -USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT - -USE TPM_DISTR ,ONLY : MYPROC -USE EQ_REGIONS_MOD ,ONLY : N_REGIONS -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT +USE TPM_DISTR, ONLY: MYPROC +USE EQ_REGIONS_MOD, ONLY: N_REGIONS +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/sumplatf_mod.F90 b/src/trans/gpu/internal/sumplatf_mod.F90 index 7a5545fc8..80d657293 100755 --- a/src/trans/gpu/internal/sumplatf_mod.F90 +++ b/src/trans/gpu/internal/sumplatf_mod.F90 @@ -75,11 +75,9 @@ SUBROUTINE SUMPLATF(KDGL,KPROCA,KMYSETA,& ! not lelam features in new routine sumplatb.F ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM - -USE TPM_GEOMETRY ,ONLY : G - -USE SUMPLATB_MOD ,ONLY : SUMPLATB +USE PARKIND1, ONLY: JPIM +USE TPM_GEOMETRY, ONLY: G +USE SUMPLATB_MOD, ONLY: SUMPLATB ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/supol_mod.F90 b/src/trans/gpu/internal/supol_mod.F90 index 327ec60e0..df6540069 100755 --- a/src/trans/gpu/internal/supol_mod.F90 +++ b/src/trans/gpu/internal/supol_mod.F90 @@ -62,8 +62,8 @@ SUBROUTINE SUPOL(KNSMAX,PDDMU,PFN,PDDPOL) ! F. Vana 05-Mar-2015 Support for single precision ! ------------------------------------------------------------------ -USE EC_PARKIND ,ONLY : JPRD, JPIM -USE TPM_POL ,ONLY : DDI, DDA, DDH, DDE, DDC, DDD +USE EC_PARKIND, ONLY: JPRD, JPIM +USE TPM_POL, ONLY: DDI, DDA, DDH, DDE, DDC, DDD IMPLICIT NONE diff --git a/src/trans/gpu/internal/supolf_mod.F90 b/src/trans/gpu/internal/supolf_mod.F90 index eb3e25b42..9c374b95d 100755 --- a/src/trans/gpu/internal/supolf_mod.F90 +++ b/src/trans/gpu/internal/supolf_mod.F90 @@ -59,9 +59,8 @@ SUBROUTINE SUPOLF(KM,KNSMAX,DDMU,DDPOL,KCHEAP) ! F. Vana 05-Mar-2015 Support for single precision ! ------------------------------------------------------------------ -USE EC_PARKIND ,ONLY : JPRD, JPIM - -USE TPM_POL ,ONLY : DFI, DFB, DFG, DFA, DFF +USE EC_PARKIND, ONLY: JPRD, JPIM +USE TPM_POL, ONLY: DFI, DFB, DFG, DFA, DFF IMPLICIT NONE diff --git a/src/trans/gpu/internal/sustaonl_mod.F90 b/src/trans/gpu/internal/sustaonl_mod.F90 index 9b01daee8..b5e744c93 100755 --- a/src/trans/gpu/internal/sustaonl_mod.F90 +++ b/src/trans/gpu/internal/sustaonl_mod.F90 @@ -64,18 +64,15 @@ SUBROUTINE SUSTAONL(KMEDIAP,KRESTM,LDWEIGHTED_DISTR,PWEIGHT,PMEDIAP,KPROCAGP) ! R. El Khatib 26-Apr-2018 vectorization ! ------------------------------------------------------------------ -USE PARKIND_ECTRANS ,ONLY : JPIM, JPRB, JPRBT, JPRD -USE MPL_MODULE ,ONLY : MPL_ALLGATHERV, MPL_RECV, MPL_SEND - -USE TPM_GEN ,ONLY : NOUT, NPRINTLEV -USE TPM_DIM ,ONLY : R -USE TPM_GEOMETRY ,ONLY : G -USE TPM_DISTR ,ONLY : D, LEQ_REGIONS, MTAGPART, NPRCIDS, MYPROC, NPROC - -USE SET2PE_MOD ,ONLY : SET2PE -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS -USE EQ_REGIONS_MOD ,ONLY : MY_REGION_NS, MY_REGION_EW, & - & N_REGIONS, N_REGIONS_EW, N_REGIONS_NS +USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT, JPRD +USE MPL_MODULE, ONLY: MPL_ALLGATHERV, MPL_RECV, MPL_SEND +USE TPM_GEN, ONLY: NOUT, NPRINTLEV +USE TPM_DIM, ONLY: R +USE TPM_GEOMETRY, ONLY: G +USE TPM_DISTR, ONLY: D, LEQ_REGIONS, MTAGPART, NPRCIDS, MYPROC, NPROC +USE SET2PE_MOD, ONLY: SET2PE +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE EQ_REGIONS_MOD, ONLY: MY_REGION_NS, MY_REGION_EW, N_REGIONS, N_REGIONS_EW, N_REGIONS_NS ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/sutrle_mod.F90 b/src/trans/gpu/internal/sutrle_mod.F90 index 3ceefed1d..6fed2d784 100755 --- a/src/trans/gpu/internal/sutrle_mod.F90 +++ b/src/trans/gpu/internal/sutrle_mod.F90 @@ -52,16 +52,14 @@ SUBROUTINE SUTRLE(PNM,KGL,KLOOP) ! ------------------------------------------------------------------ -USE EC_PARKIND ,ONLY : JPRD, JPIM -USE MPL_MODULE ,ONLY : MPL_ALLREDUCE, MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, & - & JP_NON_BLOCKING_STANDARD - -USE TPM_DIM ,ONLY : R -USE TPM_DISTR ,ONLY : D, MTAGLETR, NPRCIDS, NPRTRW, NPRTRV, & - & MYSETV, MYSETW, NPROC -USE TPM_FIELDS ,ONLY : F -USE SET2PE_MOD ,ONLY : SET2PE -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS +USE EC_PARKIND, ONLY: JPRD, JPIM +USE MPL_MODULE, ONLY: MPL_ALLREDUCE, MPL_RECV, MPL_SEND, MPL_BARRIER, MPL_WAIT, & + & JP_NON_BLOCKING_STANDARD +USE TPM_DIM, ONLY: R +USE TPM_DISTR, ONLY: D, MTAGLETR, NPRCIDS, NPRTRW, NPRTRV, MYSETV, MYSETW, NPROC +USE TPM_FIELDS, ONLY: F +USE SET2PE_MOD, ONLY: SET2PE +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/suwavedi_mod.F90 b/src/trans/gpu/internal/suwavedi_mod.F90 index 6995b2ee1..8f87010c6 100755 --- a/src/trans/gpu/internal/suwavedi_mod.F90 +++ b/src/trans/gpu/internal/suwavedi_mod.F90 @@ -68,7 +68,7 @@ SUBROUTINE SUWAVEDI(KSMAX,KTMAX,KPRTRW,KMYSETW,KASM0,KSPOLEGL,KPROCM,& ! K.YESSAD : 97-02-18 - Add KTMAX, bug correction for KSPOLEGL. ! ------------------------------------------------------------------ -USE EC_PARKIND ,ONLY : JPIM +USE EC_PARKIND, ONLY: JPIM IMPLICIT NONE diff --git a/src/trans/gpu/internal/tpm_constants.F90 b/src/trans/gpu/internal/tpm_constants.F90 index 6f8ab2b70..67ee2e571 100755 --- a/src/trans/gpu/internal/tpm_constants.F90 +++ b/src/trans/gpu/internal/tpm_constants.F90 @@ -9,7 +9,7 @@ ! MODULE TPM_CONSTANTS -USE PARKIND_ECTRANS ,ONLY : JPRBT +USE PARKIND_ECTRANS, ONLY: JPRBT IMPLICIT NONE diff --git a/src/trans/gpu/internal/tpm_ctl.F90 b/src/trans/gpu/internal/tpm_ctl.F90 index 7b967ee02..6f218ab82 100755 --- a/src/trans/gpu/internal/tpm_ctl.F90 +++ b/src/trans/gpu/internal/tpm_ctl.F90 @@ -10,9 +10,7 @@ MODULE TPM_CTL -USE PARKIND1 ,ONLY : JPIM -USE, INTRINSIC :: iso_c_binding, ONLY: C_PTR, C_NULL_PTR -USE SHAREDMEM_MOD ,ONLY : SHAREDMEM +USE SHAREDMEM_MOD, ONLY: SHAREDMEM IMPLICIT NONE SAVE diff --git a/src/trans/gpu/internal/tpm_dim.F90 b/src/trans/gpu/internal/tpm_dim.F90 index 181e4bd77..83f760913 100755 --- a/src/trans/gpu/internal/tpm_dim.F90 +++ b/src/trans/gpu/internal/tpm_dim.F90 @@ -13,7 +13,7 @@ MODULE TPM_DIM ! Module for dimensions. -USE PARKIND1 ,ONLY : JPIM +USE PARKIND1, ONLY: JPIM IMPLICIT NONE diff --git a/src/trans/gpu/internal/tpm_distr.F90 b/src/trans/gpu/internal/tpm_distr.F90 index 990c80145..be744bf40 100755 --- a/src/trans/gpu/internal/tpm_distr.F90 +++ b/src/trans/gpu/internal/tpm_distr.F90 @@ -13,7 +13,7 @@ MODULE TPM_DISTR ! Module for distributed memory environment. -USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT +USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT IMPLICIT NONE diff --git a/src/trans/gpu/internal/tpm_fft.F90 b/src/trans/gpu/internal/tpm_fft.F90 index 01594a808..dece21b08 100755 --- a/src/trans/gpu/internal/tpm_fft.F90 +++ b/src/trans/gpu/internal/tpm_fft.F90 @@ -9,7 +9,7 @@ ! MODULE TPM_FFT -USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT +USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT ! Module for Fourier transforms. diff --git a/src/trans/gpu/internal/tpm_fields.F90 b/src/trans/gpu/internal/tpm_fields.F90 index 1f917730d..f7ecd425a 100755 --- a/src/trans/gpu/internal/tpm_fields.F90 +++ b/src/trans/gpu/internal/tpm_fields.F90 @@ -11,7 +11,7 @@ MODULE TPM_FIELDS -USE PARKIND_ECTRANS ,ONLY : JPIM, JPIB, JPRB, JPRBT, JPRD +USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT, JPRD USE ISO_C_BINDING IMPLICIT NONE diff --git a/src/trans/gpu/internal/tpm_flt.F90 b/src/trans/gpu/internal/tpm_flt.F90 index c1fb4be1d..58e910667 100755 --- a/src/trans/gpu/internal/tpm_flt.F90 +++ b/src/trans/gpu/internal/tpm_flt.F90 @@ -10,8 +10,8 @@ MODULE TPM_FLT -USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT, JPRD -USE SEEFMM_MIX +USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD +USE SEEFMM_MIX, ONLY: FMM_TYPE IMPLICIT NONE SAVE diff --git a/src/trans/gpu/internal/tpm_gen.F90 b/src/trans/gpu/internal/tpm_gen.F90 index cf38f749b..2a8b42c8c 100755 --- a/src/trans/gpu/internal/tpm_gen.F90 +++ b/src/trans/gpu/internal/tpm_gen.F90 @@ -12,7 +12,7 @@ MODULE TPM_GEN ! Module for general control variables. -USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT +USE PARKIND_ECTRANS, ONLY: JPIM IMPLICIT NONE diff --git a/src/trans/gpu/internal/tpm_geometry.F90 b/src/trans/gpu/internal/tpm_geometry.F90 index ce1de2f79..93f888911 100755 --- a/src/trans/gpu/internal/tpm_geometry.F90 +++ b/src/trans/gpu/internal/tpm_geometry.F90 @@ -13,7 +13,7 @@ MODULE TPM_GEOMETRY ! Module containing data describing Gaussian grid. -USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT +USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT IMPLICIT NONE diff --git a/src/trans/gpu/internal/tpm_hicfft.F90 b/src/trans/gpu/internal/tpm_hicfft.F90 index c59df0be1..a4278d024 100755 --- a/src/trans/gpu/internal/tpm_hicfft.F90 +++ b/src/trans/gpu/internal/tpm_hicfft.F90 @@ -19,10 +19,9 @@ MODULE TPM_HICFFT ! Original October 2014 ! HICFFT abstraction for CUDA and HIP August 2023 B. Reuter - USE, INTRINSIC :: ISO_C_BINDING - - USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT - USE GROWING_ALLOCATOR_MOD, ONLY: GROWING_ALLOCATION_TYPE + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT, C_PTR, C_LOC, C_FLOAT, C_DOUBLE + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE GROWING_ALLOCATOR_MOD, ONLY: GROWING_ALLOCATION_TYPE IMPLICIT NONE @@ -258,7 +257,7 @@ SUBROUTINE EXECUTE_DIR_FFT_DOUBLE(PREEL_REAL,PREEL_COMPLEX,KFIELD,LOENS,OFFSETS, INTERFACE SUBROUTINE EXECUTE_DIR_FFT_DOUBLE_C(PREEL_REAL,PREEL_COMPLEX,KFIELD,LOENS,OFFSETS,NFFT,ALLOC) & & BIND(C, NAME="execute_dir_fft_double") - USE ISO_C_BINDING + USE ISO_C_BINDING, ONLY: C_DOUBLE, C_INT, C_PTR REAL(KIND=C_DOUBLE), INTENT(IN) :: PREEL_REAL(*) REAL(KIND=C_DOUBLE), INTENT(OUT) :: PREEL_COMPLEX(*) INTEGER(KIND=C_INT),INTENT(IN),VALUE :: KFIELD @@ -291,7 +290,7 @@ SUBROUTINE EXECUTE_INV_FFT_FLOAT(PREEL_COMPLEX,PREEL_REAL,KFIELD,LOENS,OFFSETS,A INTERFACE SUBROUTINE EXECUTE_INV_FFT_FLOAT_C(PREEL_COMPLEX,PREEL_REAL,KFIELD,LOENS,OFFSETS,NFFT,ALLOC) & & BIND(C, NAME="execute_inv_fft_float") - USE ISO_C_BINDING + USE ISO_C_BINDING, ONLY: C_FLOAT, C_INT, C_PTR REAL(KIND=C_FLOAT), INTENT(IN) :: PREEL_COMPLEX(*) REAL(KIND=C_FLOAT), INTENT(OUT) :: PREEL_REAL(*) INTEGER(KIND=C_INT),INTENT(IN),VALUE :: KFIELD @@ -323,7 +322,7 @@ SUBROUTINE EXECUTE_INV_FFT_DOUBLE(PREEL_COMPLEX,PREEL_REAL,KFIELD,LOENS,OFFSETS, INTERFACE SUBROUTINE EXECUTE_INV_FFT_DOUBLE_C(PREEL_COMPLEX,PREEL_REAL,KFIELD,LOENS,OFFSETS,NFFT,ALLOC) & & BIND(C, NAME="execute_inv_fft_double") - USE ISO_C_BINDING + USE ISO_C_BINDING, ONLY: C_DOUBLE, C_INT, C_PTR REAL(KIND=C_DOUBLE), INTENT(IN) :: PREEL_COMPLEX(*) REAL(KIND=C_DOUBLE), INTENT(OUT) :: PREEL_REAL(*) INTEGER(KIND=C_INT),INTENT(IN),VALUE :: KFIELD diff --git a/src/trans/gpu/internal/tpm_pol.F90 b/src/trans/gpu/internal/tpm_pol.F90 index f563d9609..b1f7ed222 100755 --- a/src/trans/gpu/internal/tpm_pol.F90 +++ b/src/trans/gpu/internal/tpm_pol.F90 @@ -15,7 +15,7 @@ MODULE TPM_POL ! R. El Khatib 17-Feb-2016 Optional allocation/computation of DDC/DDD/DDE ! since they are (big and) not used in supolf. -USE EC_PARKIND ,ONLY : JPRD, JPIM +USE EC_PARKIND, ONLY: JPRD, JPIM IMPLICIT NONE diff --git a/src/trans/gpu/internal/tpm_stats.F90 b/src/trans/gpu/internal/tpm_stats.F90 index c72a3bc70..492f8e505 100644 --- a/src/trans/gpu/internal/tpm_stats.F90 +++ b/src/trans/gpu/internal/tpm_stats.F90 @@ -18,7 +18,7 @@ MODULE TPM_STATS CONTAINS SUBROUTINE GSTATS_LABEL_NVTX(KNUM,CTYPE,CDESC) -USE EC_PARKIND ,ONLY : JPIM +USE EC_PARKIND, ONLY: JPIM IMPLICIT NONE INTEGER(KIND=JPIM) :: KNUM CHARACTER(*) CDESC @@ -31,9 +31,9 @@ SUBROUTINE GSTATS_LABEL_NVTX(KNUM,CTYPE,CDESC) END SUBROUTINE SUBROUTINE GSTATS_NVTX(KNUM,KSWITCH) - USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT #if defined(__NVCOMPILER) - USE NVTX + USE NVTX, ONLY: NVTXSTARTRANGE, NVTXENDRANGE #endif IMPLICIT NONE diff --git a/src/trans/gpu/internal/tpm_trans.F90 b/src/trans/gpu/internal/tpm_trans.F90 index 363d1b9c2..9d1262d10 100755 --- a/src/trans/gpu/internal/tpm_trans.F90 +++ b/src/trans/gpu/internal/tpm_trans.F90 @@ -14,9 +14,8 @@ MODULE TPM_TRANS ! Module to contain variables "local" to a specific call to a transform ! -USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT +USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT USE GROWING_ALLOCATOR_MOD, ONLY: GROWING_ALLOCATION_TYPE -USE ISO_C_BINDING, ONLY: C_INT8_T IMPLICIT NONE diff --git a/src/trans/gpu/internal/trgtol_mod.F90 b/src/trans/gpu/internal/trgtol_mod.F90 index 02cc5be30..45dea22e6 100755 --- a/src/trans/gpu/internal/trgtol_mod.F90 +++ b/src/trans/gpu/internal/trgtol_mod.F90 @@ -11,7 +11,7 @@ ! MODULE TRGTOL_MOD - USE BUFFERED_ALLOCATOR_MOD + USE BUFFERED_ALLOCATOR_MOD, ONLY: ALLOCATION_RESERVATION_HANDLE IMPLICIT NONE PRIVATE @@ -22,10 +22,10 @@ MODULE TRGTOL_MOD END TYPE CONTAINS FUNCTION PREPARE_TRGTOL(ALLOCATOR,KF_GP,KF_FS) RESULT(HTRGTOL) - USE PARKIND_ECTRANS, ONLY : JPIM, JPRB, JPRBT - USE TPM_DISTR, ONLY : D - USE BUFFERED_ALLOCATOR_MOD - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT + USE TPM_DISTR, ONLY: D + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE + USE ISO_C_BINDING, ONLY: C_SIZE_T IMPLICIT NONE @@ -103,23 +103,24 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, ! 09-01-02 G.Mozdzynski: use non-blocking recv and send ! ------------------------------------------------------------------ - - - USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB , JPRBT, jprd - USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK - USE MPL_MODULE ,ONLY : MPL_WAIT, MPL_BARRIER - USE TPM_GEN ,ONLY : LSYNC_TRANS - USE EQ_REGIONS_MOD ,ONLY : MY_REGION_EW, MY_REGION_NS - USE TPM_DISTR ,ONLY : D,MYSETV, MYSETW, MTAGLG,NPRCIDS,MYPROC,NPROC,NPRTRW,NPRTRV - USE PE2SET_MOD ,ONLY : PE2SET - USE MPL_DATA_MODULE ,ONLY : MPL_COMM_OML - USE OML_MOD ,ONLY : OML_MY_THREAD - USE MPI_F08 - USE TPM_STATS ,ONLY : GSTATS => GSTATS_NVTX - USE TPM_TRANS ,ONLY : NPROMA - USE ISO_C_BINDING ,ONLY : C_SIZE_T, c_float, c_double, c_int8_t - USE BUFFERED_ALLOCATOR_MOD - USE OPENACC_EXT + USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT + USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK + USE MPL_MODULE, ONLY: MPL_WAIT, MPL_BARRIER + USE TPM_GEN, ONLY: LSYNC_TRANS + USE EQ_REGIONS_MOD, ONLY: MY_REGION_EW, MY_REGION_NS + USE TPM_DISTR, ONLY: D, MYSETV, MYSETW, MTAGLG, NPRCIDS, MYPROC, NPROC, NPRTRW, & + & NPRTRV + USE PE2SET_MOD, ONLY: PE2SET + USE MPL_DATA_MODULE, ONLY: MPL_COMM_OML + USE OML_MOD, ONLY: OML_MY_THREAD + USE MPI_F08, ONLY: MPI_COMM, MPI_REQUEST, MPI_FLOAT, MPI_DOUBLE + USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX + USE TPM_TRANS, ONLY: NPROMA + USE ISO_C_BINDING, ONLY: C_SIZE_T, C_FLOAT, C_DOUBLE, C_INT8_T + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION + USE OPENACC_EXT, ONLY: EXT_ACC_ARR_DESC, EXT_ACC_PASS, EXT_ACC_CREATE, & + & EXT_ACC_DELETE + USE OPENACC, ONLY: ACC_HANDLE_KIND IMPLICIT NONE diff --git a/src/trans/gpu/internal/trltog_mod.F90 b/src/trans/gpu/internal/trltog_mod.F90 index 22efe3c3f..b51244a21 100755 --- a/src/trans/gpu/internal/trltog_mod.F90 +++ b/src/trans/gpu/internal/trltog_mod.F90 @@ -11,7 +11,7 @@ ! MODULE TRLTOG_MOD - USE BUFFERED_ALLOCATOR_MOD + USE BUFFERED_ALLOCATOR_MOD, ONLY: ALLOCATION_RESERVATION_HANDLE IMPLICIT NONE PRIVATE @@ -22,9 +22,10 @@ MODULE TRLTOG_MOD END TYPE CONTAINS FUNCTION PREPARE_TRLTOG(ALLOCATOR,KF_FS,KF_GP) RESULT(HTRLTOG) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT - USE TPM_DISTR, ONLY: D - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE TPM_DISTR, ONLY: D + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE + USE ISO_C_BINDING, ONLY: C_SIZE_T IMPLICIT NONE @@ -103,22 +104,24 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, ! 09-01-02 G.Mozdzynski: use non-blocking recv and send ! ------------------------------------------------------------------ - USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB , JPRBT - USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK - USE MPL_MODULE ,ONLY : MPL_WAIT, MPL_BARRIER - USE TPM_GEN ,ONLY : LSYNC_TRANS - USE EQ_REGIONS_MOD ,ONLY : MY_REGION_EW, MY_REGION_NS - USE TPM_DISTR ,ONLY : D,MYSETV, MYSETW, MTAGLG,NPRCIDS,MYPROC,NPROC,NPRTRW,NPRTRV - USE PE2SET_MOD ,ONLY : PE2SET - USE MPL_DATA_MODULE ,ONLY : MPL_COMM_OML - USE OML_MOD ,ONLY : OML_MY_THREAD - USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS - USE MPI_F08 - USE TPM_STATS ,ONLY : GSTATS => GSTATS_NVTX - - USE TPM_TRANS ,ONLY : LDIVGP, LSCDERS, LUVDER, LVORGP, NPROMA - USE ISO_C_BINDING ,ONLY : C_SIZE_T - USE OPENACC_EXT + USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT + USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK + USE MPL_MODULE, ONLY: MPL_WAIT, MPL_BARRIER + USE TPM_GEN, ONLY: LSYNC_TRANS + USE EQ_REGIONS_MOD, ONLY: MY_REGION_EW, MY_REGION_NS + USE TPM_DISTR, ONLY: D,MYSETV, MYSETW, MTAGLG,NPRCIDS,MYPROC,NPROC,NPRTRW,NPRTRV + USE PE2SET_MOD, ONLY: PE2SET + USE MPL_DATA_MODULE, ONLY: MPL_COMM_OML + USE OML_MOD, ONLY: OML_MY_THREAD + USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS + USE MPI_F08, ONLY: MPI_COMM, MPI_REQUEST, MPI_FLOAT, MPI_DOUBLE + USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX + USE TPM_TRANS, ONLY: LDIVGP, LSCDERS, LUVDER, LVORGP, NPROMA + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION + USE ISO_C_BINDING, ONLY: C_SIZE_T + USE OPENACC_EXT, ONLY: EXT_ACC_ARR_DESC, EXT_ACC_PASS, EXT_ACC_CREATE, & + & EXT_ACC_DELETE + USE OPENACC, ONLY: ACC_HANDLE_KIND IMPLICIT NONE diff --git a/src/trans/gpu/internal/trltom_mod.F90 b/src/trans/gpu/internal/trltom_mod.F90 index 45bdf1353..d1611652e 100755 --- a/src/trans/gpu/internal/trltom_mod.F90 +++ b/src/trans/gpu/internal/trltom_mod.F90 @@ -10,7 +10,7 @@ ! MODULE TRLTOM_MOD - USE BUFFERED_ALLOCATOR_MOD + USE BUFFERED_ALLOCATOR_MOD, ONLY: ALLOCATION_RESERVATION_HANDLE IMPLICIT NONE PRIVATE @@ -21,9 +21,10 @@ MODULE TRLTOM_MOD END TYPE CONTAINS FUNCTION PREPARE_TRLTOM(ALLOCATOR, KF_FS) RESULT(HTRLTOM) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT - USE TPM_DISTR, ONLY: D - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE TPM_DISTR, ONLY: D + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE + USE ISO_C_BINDING, ONLY: C_SIZE_T IMPLICIT NONE @@ -87,14 +88,15 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS) ! Y.Seity : 07-08-30 Add barrier synchronisation under LSYNC_TRANS ! ------------------------------------------------------------------ - USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT - USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK - USE MPL_MODULE ,ONLY : MPL_ALLTOALLV, MPL_BARRIER, MPL_ALL_MS_COMM, MPL_MYRANK - USE TPM_DISTR ,ONLY : D, NPRTRW, NPROC, MYPROC, MYSETW - USE TPM_GEN ,ONLY : LSYNC_TRANS - USE MPI_F08 - USE TPM_STATS, ONLY : GSTATS => GSTATS_NVTX - USE ISO_C_BINDING, ONLY : C_SIZE_T + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK + USE MPL_MODULE, ONLY: MPL_ALLTOALLV, MPL_BARRIER, MPL_ALL_MS_COMM, MPL_MYRANK + USE TPM_DISTR, ONLY: D, NPRTRW, NPROC, MYPROC, MYSETW + USE TPM_GEN, ONLY: LSYNC_TRANS + USE MPI_F08, ONLY: MPI_COMM, MPI_FLOAT, MPI_DOUBLE, MPI_ALLTOALLV + USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION + USE ISO_C_BINDING, ONLY: C_SIZE_T IMPLICIT NONE diff --git a/src/trans/gpu/internal/trltom_pack_unpack.F90 b/src/trans/gpu/internal/trltom_pack_unpack.F90 index b14a0188b..25172b288 100755 --- a/src/trans/gpu/internal/trltom_pack_unpack.F90 +++ b/src/trans/gpu/internal/trltom_pack_unpack.F90 @@ -11,7 +11,7 @@ ! MODULE TRLTOM_PACK_UNPACK - USE BUFFERED_ALLOCATOR_MOD + USE BUFFERED_ALLOCATOR_MOD, ONLY: ALLOCATION_RESERVATION_HANDLE IMPLICIT NONE PRIVATE @@ -26,9 +26,10 @@ MODULE TRLTOM_PACK_UNPACK END TYPE CONTAINS FUNCTION PREPARE_TRLTOM_PACK(ALLOCATOR, KF_FS) RESULT(HTRLTOM_PACK) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT - USE TPM_DISTR, ONLY: D - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE TPM_DISTR, ONLY: D + USE ISO_C_BINDING, ONLY: C_SIZE_T + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE IMPLICIT NONE @@ -64,12 +65,12 @@ SUBROUTINE TRLTOM_PACK(ALLOCATOR,HTRLTOM_PACK,PREEL_COMPLEX,FOUBUF_IN,KF_FS) ! ------------------------------------------------------------------ - USE BUFFERED_ALLOCATOR_MOD - USE PARKIND_ECTRANS, ONLY : JPIM,JPRBT - USE TPM_DISTR, ONLY : D,MYSETW,D_NSTAGTF,D_NPNTGTB0,D_NPTRLS,D_NDGL_FS - USE TPM_GEOMETRY, ONLY : G_NMEN,G_NLOEN - USE TPM_DIM, ONLY: R_NSMAX - USE ISO_C_BINDING + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE TPM_DISTR, ONLY: D, MYSETW, D_NSTAGTF, D_NPNTGTB0, D_NPTRLS, D_NDGL_FS + USE TPM_GEOMETRY, ONLY: G_NMEN, G_NLOEN + USE TPM_DIM, ONLY: R_NSMAX + USE ISO_C_BINDING, ONLY: C_SIZE_T ! IMPLICIT NONE @@ -129,9 +130,10 @@ SUBROUTINE TRLTOM_PACK(ALLOCATOR,HTRLTOM_PACK,PREEL_COMPLEX,FOUBUF_IN,KF_FS) END SUBROUTINE TRLTOM_PACK FUNCTION PREPARE_TRLTOM_UNPACK(ALLOCATOR, KF_FS) RESULT(HTRLTOM_UNPACK) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD - USE LEDIR_MOD, ONLY: LEDIR_STRIDES - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE + USE LEDIR_MOD, ONLY: LEDIR_STRIDES + USE ISO_C_BINDING, ONLY: C_SIZE_T IMPLICIT NONE @@ -159,13 +161,14 @@ FUNCTION PREPARE_TRLTOM_UNPACK(ALLOCATOR, KF_FS) RESULT(HTRLTOM_UNPACK) END FUNCTION PREPARE_TRLTOM_UNPACK SUBROUTINE TRLTOM_UNPACK(ALLOCATOR,HTRLTOM_UNPACK,FOUBUF,ZINPS,ZINPA,ZINPS0,ZINPA0,KF_FS,KF_UV) - USE PARKIND_ECTRANS, ONLY : JPIM, JPRBT, JPRD - USE TPM_DIM, ONLY : R_NDGNH, R_NDGL - USE TPM_GEOMETRY, ONLY : G_NDGLU - USE TPM_FIELDS, ONLY : F_RW, F_RACTHE - USE TPM_DISTR, ONLY : D_NUMP,D_MYMS, D_NPNTGTB1,D_OFFSETS_GEMM1 - USE LEDIR_MOD, ONLY : LEDIR_STRIDES - USE, INTRINSIC :: ISO_C_BINDING + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD + USE TPM_DIM, ONLY: R_NDGNH, R_NDGL + USE TPM_GEOMETRY, ONLY: G_NDGLU + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION + USE TPM_FIELDS, ONLY: F_RW, F_RACTHE + USE TPM_DISTR, ONLY: D_NUMP, D_MYMS, D_NPNTGTB1, D_OFFSETS_GEMM1 + USE LEDIR_MOD, ONLY: LEDIR_STRIDES + USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_SIZE_T IMPLICIT NONE diff --git a/src/trans/gpu/internal/trmtol_mod.F90 b/src/trans/gpu/internal/trmtol_mod.F90 index 6d5fc29b6..a2d8d40ca 100755 --- a/src/trans/gpu/internal/trmtol_mod.F90 +++ b/src/trans/gpu/internal/trmtol_mod.F90 @@ -10,7 +10,7 @@ ! MODULE TRMTOL_MOD - USE BUFFERED_ALLOCATOR_MOD + USE BUFFERED_ALLOCATOR_MOD, ONLY: ALLOCATION_RESERVATION_HANDLE IMPLICIT NONE PRIVATE @@ -21,9 +21,10 @@ MODULE TRMTOL_MOD END TYPE CONTAINS FUNCTION PREPARE_TRMTOL(ALLOCATOR, KF_LEG) RESULT(HTRMTOL) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT - USE TPM_DISTR, ONLY: D - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE TPM_DISTR, ONLY: D + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE + USE ISO_C_BINDING, ONLY: C_SIZE_T IMPLICIT NONE @@ -87,14 +88,15 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) ! Y.Seity : 07-08-31 add barrier synchronisation under LSYNC_TRANS ! ------------------------------------------------------------------ - USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT - USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK - USE MPL_MODULE ,ONLY : MPL_ALLTOALLV, MPL_BARRIER, MPL_ALL_MS_COMM, MPL_MYRANK - USE TPM_DISTR ,ONLY : D, NPRTRW, NPROC, MYPROC, MYSETW - USE TPM_GEN ,ONLY : LSYNC_TRANS - USE MPI_F08 - USE TPM_STATS, ONLY : GSTATS => GSTATS_NVTX - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK + USE MPL_MODULE, ONLY: MPL_ALLTOALLV, MPL_BARRIER, MPL_ALL_MS_COMM, MPL_MYRANK + USE TPM_DISTR, ONLY: D, NPRTRW, NPROC, MYPROC, MYSETW + USE TPM_GEN, ONLY: LSYNC_TRANS + USE MPI_F08, ONLY: MPI_COMM, MPI_FLOAT, MPI_DOUBLE + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION + USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX + USE ISO_C_BINDING, ONLY: C_SIZE_T IMPLICIT NONE diff --git a/src/trans/gpu/internal/trmtol_pack_unpack.F90 b/src/trans/gpu/internal/trmtol_pack_unpack.F90 index e7076693e..c4bfe7b89 100755 --- a/src/trans/gpu/internal/trmtol_pack_unpack.F90 +++ b/src/trans/gpu/internal/trmtol_pack_unpack.F90 @@ -10,7 +10,7 @@ ! MODULE TRMTOL_PACK_UNPACK - USE BUFFERED_ALLOCATOR_MOD + USE BUFFERED_ALLOCATOR_MOD, ONLY: ALLOCATION_RESERVATION_HANDLE IMPLICIT NONE PRIVATE @@ -26,11 +26,10 @@ MODULE TRMTOL_PACK_UNPACK CONTAINS FUNCTION PREPARE_TRMTOL_PACK(ALLOCATOR,KF_LEG) RESULT(HTRMTOL_PACK) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD - USE TPM_DISTR, ONLY: D - USE ISO_C_BINDING - USE LEINV_MOD - USE BUFFERED_ALLOCATOR_MOD + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE TPM_DISTR, ONLY: D + USE ISO_C_BINDING, ONLY: C_SIZE_T + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE IMPLICIT NONE @@ -85,13 +84,14 @@ SUBROUTINE TRMTOL_PACK(ALLOCATOR,HTRMTOL_PACK,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,FOUBUF_I ! F. Vana 05-Mar-2015 Support for single precision ! ------------------------------------------------------------------ - USE PARKIND_ECTRANS ,ONLY : JPIM,JPRB,JPRBT,JPRD - USE YOMHOOK, ONLY : LHOOK,DR_HOOK, JPHOOK - USE TPM_DIM, ONLY : R_NDGNH,R_NDGL - USE TPM_GEOMETRY,ONLY : G_NDGLU - USE TPM_DISTR, ONLY : D,D_NUMP,D_MYMS,D_NPNTGTB1,D_OFFSETS_GEMM1 - USE LEINV_MOD, ONLY: LEINV_STRIDES - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT, JPRD + USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK + USE TPM_DIM, ONLY: R_NDGNH, R_NDGL + USE TPM_GEOMETRY, ONLY: G_NDGLU + USE TPM_DISTR, ONLY: D, D_NUMP, D_MYMS, D_NPNTGTB1, D_OFFSETS_GEMM1 + USE LEINV_MOD, ONLY: LEINV_STRIDES + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION + USE ISO_C_BINDING, ONLY: C_SIZE_T IMPLICIT NONE @@ -177,9 +177,10 @@ SUBROUTINE TRMTOL_PACK(ALLOCATOR,HTRMTOL_PACK,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,FOUBUF_I END SUBROUTINE TRMTOL_PACK FUNCTION PREPARE_TRMTOL_UNPACK(ALLOCATOR,KF_FS) RESULT(HTRMTOL_UNPACK) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT - USE TPM_DISTR, ONLY: D - USE ISO_C_BINDING, ONLY: C_SIZE_T + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE TPM_DISTR, ONLY: D + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE + USE ISO_C_BINDING, ONLY: C_SIZE_T IMPLICIT NONE @@ -222,10 +223,11 @@ SUBROUTINE TRMTOL_UNPACK(ALLOCATOR,HTRMTOL_UNPACK,FOUBUF,PREEL_COMPLEX,KF_CURREN ! ------------------------------------------------------------------ -USE PARKIND_ECTRANS ,ONLY : JPIM,JPRBT -USE TPM_DISTR ,ONLY : D,MYSETW,MYPROC, NPROC, D_NSTAGTF, D_NPNTGTB0,D_NPTRLS,D_NDGL_FS -USE TPM_GEOMETRY ,ONLY : G_NMEN,G_NLOEN,G_NLOEN_MAX -USE ISO_C_BINDING ,ONLY : C_SIZE_T +USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT +USE TPM_DISTR, ONLY: D, MYSETW, D_NSTAGTF, D_NPNTGTB0, D_NPTRLS, D_NDGL_FS +USE TPM_GEOMETRY, ONLY: G_NMEN, G_NLOEN, G_NLOEN_MAX +USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION +USE ISO_C_BINDING, ONLY: C_SIZE_T ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/updsp_mod.F90 b/src/trans/gpu/internal/updsp_mod.F90 index 3c1209f91..c82789917 100755 --- a/src/trans/gpu/internal/updsp_mod.F90 +++ b/src/trans/gpu/internal/updsp_mod.F90 @@ -61,12 +61,10 @@ SUBROUTINE UPDSP(KF_UV,KF_SCALARS,POA1, & ! MPP Group: 95-10-01 Support for Distributed Memory version ! ------------------------------------------------------------------ -USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT - -USE TPM_TRANS ,ONLY : NF_SC2, NF_SC3A, NF_SC3B -USE TPM_DISTR ,ONLY : D - -USE UPDSPB_MOD ,ONLY : UPDSPB +USE PARKIND_ECTRANS, ONLY: JPIM ,JPRB, JPRBT +USE TPM_TRANS, ONLY: NF_SC2, NF_SC3A, NF_SC3B +USE TPM_DISTR, ONLY: D +USE UPDSPB_MOD, ONLY: UPDSPB IMPLICIT NONE diff --git a/src/trans/gpu/internal/updspb_mod.F90 b/src/trans/gpu/internal/updspb_mod.F90 index 79c5326e1..0f59694ed 100755 --- a/src/trans/gpu/internal/updspb_mod.F90 +++ b/src/trans/gpu/internal/updspb_mod.F90 @@ -56,11 +56,9 @@ SUBROUTINE UPDSPB(KFIELD,POA,PSPEC,KFLDPTR) ! L. Isaksen : 95-06-06 Reordering of spectral arrays ! ------------------------------------------------------------------ -USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT - - USE TPM_DIM ,ONLY : R_NTMAX - !USE TPM_FIELDS - USE TPM_DISTR ,ONLY : D_NUMP,D_MYMS,D_NASM0 + USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT + USE TPM_DIM, ONLY: R_NTMAX + USE TPM_DISTR, ONLY: D_NUMP, D_MYMS, D_NASM0 ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/uvtvd_mod.F90 b/src/trans/gpu/internal/uvtvd_mod.F90 index e234533d3..3ee8521a1 100755 --- a/src/trans/gpu/internal/uvtvd_mod.F90 +++ b/src/trans/gpu/internal/uvtvd_mod.F90 @@ -58,12 +58,10 @@ SUBROUTINE UVTVD(KF_UV,PU,PV,PVOR,PDIV) ! D. Giard : NTMAX instead of NSMAX ! ------------------------------------------------------------------ -USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRBT - -USE TPM_DIM ,ONLY : R, R_NTMAX -USE TPM_FIELDS ,ONLY : F_RN -USE TPM_DISTR ,ONLY : D,D_NUMP,D_MYMS -USE TPM_FIELDS ,ONLY : ZEPSNM +USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT +USE TPM_DIM, ONLY: R, R_NTMAX +USE TPM_DISTR, ONLY: D, D_NUMP, D_MYMS +USE TPM_FIELDS, ONLY: ZEPSNM ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/vd2uv_ctl_mod.F90 b/src/trans/gpu/internal/vd2uv_ctl_mod.F90 index b875505ef..6874774c1 100755 --- a/src/trans/gpu/internal/vd2uv_ctl_mod.F90 +++ b/src/trans/gpu/internal/vd2uv_ctl_mod.F90 @@ -44,11 +44,9 @@ SUBROUTINE VD2UV_CTL(KF_UV,PSPVOR,PSPDIV,PU,PV) ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB - -USE TPM_DISTR ,ONLY : D - -USE VD2UV_MOD ,ONLY : VD2UV +USE PARKIND1, ONLY: JPIM, JPRB +USE TPM_DISTR, ONLY: D +USE VD2UV_MOD, ONLY: VD2UV IMPLICIT NONE diff --git a/src/trans/gpu/internal/vd2uv_mod.F90 b/src/trans/gpu/internal/vd2uv_mod.F90 index 0806699ba..984083312 100755 --- a/src/trans/gpu/internal/vd2uv_mod.F90 +++ b/src/trans/gpu/internal/vd2uv_mod.F90 @@ -12,16 +12,14 @@ MODULE VD2UV_MOD CONTAINS SUBROUTINE VD2UV(KM,KMLOC,KF_UV,KLEI2,PSPVOR,PSPDIV,PU,PV) -USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT -USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK - -USE TPM_CONSTANTS -USE TPM_DIM ,ONLY : R -USE TPM_DISTR ,ONLY : D - -USE PREPSNM_MOD ,ONLY : PREPSNM -USE PRFI1B_MOD ,ONLY : PRFI1B -USE VDTUV_MOD ,ONLY : VDTUV +USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT +USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK +USE TPM_CONSTANTS, ONLY: RA +USE TPM_DIM, ONLY: R +USE TPM_DISTR, ONLY: D +USE PREPSNM_MOD, ONLY: PREPSNM +USE PRFI1B_MOD, ONLY: PRFI1B +USE VDTUV_MOD, ONLY: VDTUV !**** *VD2UV* - U and V from Vor/div diff --git a/src/trans/gpu/internal/vdtuv_mod.F90 b/src/trans/gpu/internal/vdtuv_mod.F90 index 2873adf47..f008914d3 100755 --- a/src/trans/gpu/internal/vdtuv_mod.F90 +++ b/src/trans/gpu/internal/vdtuv_mod.F90 @@ -13,13 +13,10 @@ MODULE VDTUV_MOD CONTAINS SUBROUTINE VDTUV(KFIELD,PEPSNM,PVOR,PDIV,PU,PV) -USE PARKIND_ECTRANS ,ONLY : JPIM ,JPRB, JPRBT - -USE TPM_DIM ,ONLY : R, R_NTMAX -USE TPM_FIELDS ,ONLY : F, F_RLAPIN, F_RN -USE TPM_DISTR ,ONLY : D,D_NUMP,D_MYMS -USE TPM_GEN ,ONLY : NOUT - +USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT +USE TPM_DIM, ONLY: R, R_NTMAX +USE TPM_FIELDS, ONLY: F, F_RLAPIN, F_RN +USE TPM_DISTR, ONLY: D, D_NUMP, D_MYMS !**** *VDTUV* - Compute U,V in spectral space diff --git a/src/trans/gpu/internal/write_legpol_mod.F90 b/src/trans/gpu/internal/write_legpol_mod.F90 index a3cf5d7eb..a4ab0c0e8 100755 --- a/src/trans/gpu/internal/write_legpol_mod.F90 +++ b/src/trans/gpu/internal/write_legpol_mod.F90 @@ -11,15 +11,14 @@ MODULE WRITE_LEGPOL_MOD CONTAINS SUBROUTINE WRITE_LEGPOL -USE PARKIND_ECTRANS ,ONLY : JPIM, JPRBT -USE TPM_GEN -USE TPM_DISTR -USE TPM_DIM -USE TPM_GEOMETRY -USE TPM_FLT -USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS -USE TPM_CTL -USE BYTES_IO_MOD +USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT +USE TPM_DISTR, ONLY: D, NPRTRV +USE TPM_DIM, ONLY: R +USE TPM_GEOMETRY, ONLY: G +USE TPM_FLT, ONLY: S +USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +USE TPM_CTL, ONLY: C +USE BYTES_IO_MOD, ONLY: JPBYTES_IO_SUCCESS, BYTES_IO_CLOSE, BYTES_IO_OPEN, BYTES_IO_WRITE !**** *WRITE_LEGPOL * - write out Leg.Pol. and assocciated arrays to file From 512c2349c4cd1c91a1746a4610512badcfc5052e Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Wed, 31 Jul 2024 14:01:22 +0300 Subject: [PATCH 03/86] Fix all PRINT statements in GPU version --- src/trans/gpu/external/setup_trans.F90 | 4 ++-- src/trans/gpu/external/setup_trans0.F90 | 2 +- src/trans/gpu/internal/dir_trans_ctl_mod.F90 | 6 +++--- src/trans/gpu/internal/growing_allocator_mod.F90 | 7 ++++--- src/trans/gpu/internal/inv_trans_ctl_mod.F90 | 5 ++--- src/trans/gpu/internal/ledir_mod.F90 | 4 ++-- src/trans/gpu/internal/leinv_mod.F90 | 4 ++-- src/trans/gpu/internal/prfi1b_mod.F90 | 10 +++++----- src/trans/gpu/internal/trgtol_mod.F90 | 5 ++--- src/trans/gpu/internal/trltog_mod.F90 | 7 +++---- src/trans/gpu/internal/trltom_mod.F90 | 7 ++++--- src/trans/gpu/internal/trmtol_mod.F90 | 7 ++++--- 12 files changed, 34 insertions(+), 34 deletions(-) diff --git a/src/trans/gpu/external/setup_trans.F90 b/src/trans/gpu/external/setup_trans.F90 index 5799069ad..b38ce981d 100755 --- a/src/trans/gpu/external/setup_trans.F90 +++ b/src/trans/gpu/external/setup_trans.F90 @@ -454,8 +454,8 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& !ISTAT = CUDA_GETDEVICE(IDEV) #endif -print*,'R%NTMAX=',R%NTMAX -print*,'R%NSMAX=',R%NSMAX +WRITE(NOUT,*) 'R%NTMAX=',R%NTMAX +WRITE(NOUT,*) 'R%NSMAX=',R%NSMAX #ifdef ACCGPU !$ACC ENTER DATA COPYIN(F,S,D,R,G) diff --git a/src/trans/gpu/external/setup_trans0.F90 b/src/trans/gpu/external/setup_trans0.F90 index 549f2c316..0d136b0d3 100755 --- a/src/trans/gpu/external/setup_trans0.F90 +++ b/src/trans/gpu/external/setup_trans0.F90 @@ -131,7 +131,7 @@ SUBROUTINE SETUP_TRANS0(KOUT,KERR,KPRINTLEV,KMAX_RESOL,KPROMATR,& MYGPU = MOD(MYPROC-1,NUMDEVS) CALL ACC_SET_DEVICE_NUM(MYGPU, IDEVTYPE) MYGPU = ACC_GET_DEVICE_NUM(IDEVTYPE) -WRITE(*,*) 'MYPROC:',MYPROC, 'GPU:', MYGPU, 'of ', NUMDEVS +WRITE(NOUT,*) 'MYPROC:',MYPROC, 'GPU:', MYGPU, 'of ', NUMDEVS #endif CL_NPROC_PERNODE=' ' diff --git a/src/trans/gpu/internal/dir_trans_ctl_mod.F90 b/src/trans/gpu/internal/dir_trans_ctl_mod.F90 index 55b60b3b8..65c9e4e85 100755 --- a/src/trans/gpu/internal/dir_trans_ctl_mod.F90 +++ b/src/trans/gpu/internal/dir_trans_ctl_mod.F90 @@ -87,6 +87,7 @@ SUBROUTINE DIR_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_UV,KF_SCALARS,& USE TRLTOM_PACK_UNPACK, ONLY: TRLTOM_PACK_HANDLE, TRLTOM_UNPACK_HANDLE, & & PREPARE_TRLTOM_PACK, PREPARE_TRLTOM_UNPACK, TRLTOM_PACK, & & TRLTOM_UNPACK + USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS IMPLICIT NONE @@ -138,9 +139,8 @@ SUBROUTINE DIR_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_UV,KF_SCALARS,& TYPE(TRLTOM_UNPACK_HANDLE) :: HTRLTOM_UNPACK TYPE(LTDIR_HANDLE) :: HLTDIR - IF(NPROMATR > 0) THEN - PRINT *, "ERROR, not implemented right now (NPROMATR > 0)" - STOP 4 + IF (NPROMATR > 0) THEN + CALL ABORT_TRANS("NPROMATR > 0 not supported for GPU") ENDIF ! Prepare everything diff --git a/src/trans/gpu/internal/growing_allocator_mod.F90 b/src/trans/gpu/internal/growing_allocator_mod.F90 index b04863076..f8de0fc90 100644 --- a/src/trans/gpu/internal/growing_allocator_mod.F90 +++ b/src/trans/gpu/internal/growing_allocator_mod.F90 @@ -29,6 +29,7 @@ SUBROUTINE FREE_FUNC_PROC(PTR, SZ) BIND(C) SUBROUTINE REALLOCATE_GROWING_ALLOCATION(ALLOC, SZ) USE ISO_C_BINDING, ONLY: C_SIZE_T + USE TPM_GEN, ONLY: NOUT IMPLICIT NONE TYPE(GROWING_ALLOCATION_TYPE), INTENT(INOUT) :: ALLOC INTEGER(C_SIZE_T) :: SZ @@ -36,7 +37,7 @@ SUBROUTINE REALLOCATE_GROWING_ALLOCATION(ALLOC, SZ) ! Deallocate existing pointer IF (ASSOCIATED(ALLOC%PTR) .AND. SZ > SIZE(ALLOC%PTR, 1, C_SIZE_T)) THEN - PRINT *, "WARNING: REALLOCATING GROWING POINTER CAUSING GRAPH REINSTANTIATION" + WRITE(NOUT,*) "WARNING: REALLOCATING GROWING POINTER CAUSING GRAPH REINSTANTIATION" DO I = 1, ALLOC%FREE_FUNCS_SZ CALL ALLOC%FREE_FUNCS(I)%FUNC(ALLOC%PTR, & SIZE(ALLOC%PTR, 1, C_SIZE_T)) @@ -54,6 +55,7 @@ SUBROUTINE REALLOCATE_GROWING_ALLOCATION(ALLOC, SZ) END SUBROUTINE SUBROUTINE REGISTER_FREE_FUNCTION(ALLOC, FREE_FUNC) + USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS IMPLICIT NONE TYPE(GROWING_ALLOCATION_TYPE) :: ALLOC PROCEDURE(FREE_FUNC_PROC) :: FREE_FUNC @@ -67,8 +69,7 @@ SUBROUTINE REGISTER_FREE_FUNCTION(ALLOC, FREE_FUNC) ALLOC%FREE_FUNCS_SZ = ALLOC%FREE_FUNCS_SZ + 1 IF (ALLOC%FREE_FUNCS_SZ > SIZE(ALLOC%FREE_FUNCS)) THEN - PRINT *, "TOO MANY FREE FUNCTIONS REGISTERED" - STOP 4 + CALL ABORT_TRANS("REGISTER_FREE_FUNCTION: ERROR - Too many free functions registered") ENDIF ALLOC%FREE_FUNCS(ALLOC%FREE_FUNCS_SZ)%FUNC => FREE_FUNC END SUBROUTINE diff --git a/src/trans/gpu/internal/inv_trans_ctl_mod.F90 b/src/trans/gpu/internal/inv_trans_ctl_mod.F90 index 80fbc3932..3029b3f68 100644 --- a/src/trans/gpu/internal/inv_trans_ctl_mod.F90 +++ b/src/trans/gpu/internal/inv_trans_ctl_mod.F90 @@ -157,9 +157,8 @@ SUBROUTINE INV_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_OUT_LT,& ! ------------------------------------------------------------------ - IF(NPROMATR > 0) THEN - print *, "This is currently not supported and/or tested (NPROMATR > 0j" - stop 24 + IF (NPROMATR > 0) THEN + CALL ABORT_TRANS("NPROMATR > 0 not supported for GPU") ENDIF ! Compute Vertical domain decomposition diff --git a/src/trans/gpu/internal/ledir_mod.F90 b/src/trans/gpu/internal/ledir_mod.F90 index 1e7700076..98c932562 100755 --- a/src/trans/gpu/internal/ledir_mod.F90 +++ b/src/trans/gpu/internal/ledir_mod.F90 @@ -94,7 +94,7 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) ! F. Vana 05-Mar-2015 Support for single precision ! ------------------------------------------------------------------ - USE TPM_GEN, ONLY: LSYNC_TRANS + USE TPM_GEN, ONLY: LSYNC_TRANS, NOUT USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK USE TPM_DIM, ONLY: R_NDGNH,R_NSMAX,R_NTMAX,R_NDGL USE TPM_GEOMETRY, ONLY: G_NDGLU @@ -156,7 +156,7 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) ! anti-symmetric IF(KMLOC0 > 0) THEN - PRINT*,'computing m=0 in double precision' + WRITE(NOUT,*) 'computing m=0 in double precision' ENDIF IF (LSYNC_TRANS) THEN diff --git a/src/trans/gpu/internal/leinv_mod.F90 b/src/trans/gpu/internal/leinv_mod.F90 index c18cd59a4..88d63506a 100755 --- a/src/trans/gpu/internal/leinv_mod.F90 +++ b/src/trans/gpu/internal/leinv_mod.F90 @@ -93,7 +93,7 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) ! F. Vana 05-Mar-2015 Support for single precision ! ------------------------------------------------------------------ - USE TPM_GEN, ONLY: LSYNC_TRANS + USE TPM_GEN, ONLY: LSYNC_TRANS, NOUT USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK USE TPM_DIM, ONLY: R_NDGNH, R_NSMAX, R_NDGL USE TPM_GEOMETRY, ONLY: G_NDGLU @@ -153,7 +153,7 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) #endif IF (KMLOC0 > 0) THEN - print*,'computing m=0 in double precision' + WRITE(NOUT,*) 'computing m=0 in double precision' ENDIF ! READ 2:NSMAX+3 diff --git a/src/trans/gpu/internal/prfi1b_mod.F90 b/src/trans/gpu/internal/prfi1b_mod.F90 index 8c0fe9a34..00a2985b3 100755 --- a/src/trans/gpu/internal/prfi1b_mod.F90 +++ b/src/trans/gpu/internal/prfi1b_mod.F90 @@ -13,9 +13,10 @@ MODULE PRFI1B_MOD CONTAINS SUBROUTINE PRFI1B(PIA,PSPEC,KFIELDS,KDIM,KFLDPTR) - USE PARKIND1, ONLY: JPIM, JPRB - USE TPM_DIM, ONLY: R, R_NSMAX - USE TPM_DISTR, ONLY: D, D_NUMP, D_MYMS, D_NASM0 + USE PARKIND1, ONLY: JPIM, JPRB + USE TPM_DIM, ONLY: R, R_NSMAX + USE TPM_DISTR, ONLY: D, D_NUMP, D_MYMS, D_NASM0 + USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS !**** *PRFI1* - Prepare spectral fields for inverse Legendre transform @@ -96,8 +97,7 @@ SUBROUTINE PRFI1B(PIA,PSPEC,KFIELDS,KDIM,KFLDPTR) IF(PRESENT(KFLDPTR)) THEN - PRINT *, "Not implemented" - STOP 4 + CALL ABORT_TRANS("PRFI1B not implemented for GPU") !loop over wavenumber #ifdef OMPGPU diff --git a/src/trans/gpu/internal/trgtol_mod.F90 b/src/trans/gpu/internal/trgtol_mod.F90 index 45dea22e6..92ce4a89d 100755 --- a/src/trans/gpu/internal/trgtol_mod.F90 +++ b/src/trans/gpu/internal/trgtol_mod.F90 @@ -121,6 +121,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, USE OPENACC_EXT, ONLY: EXT_ACC_ARR_DESC, EXT_ACC_PASS, EXT_ACC_CREATE, & & EXT_ACC_DELETE USE OPENACC, ONLY: ACC_HANDLE_KIND + USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS IMPLICIT NONE @@ -229,9 +230,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, ENDIF IF (IOFF /= 2*KF_UV_G+KF_SCALARS_G) THEN - PRINT*, "TRGTOL: ERROR IN IVSET COMPUTATION" - FLUSH(6) - STOP 38 + CALL ABORT_TRANS("TRGTOL: ERROR in IVSET computation") ENDIF IF (LHOOK) CALL DR_HOOK('TRGTOL',0,ZHOOK_HANDLE) diff --git a/src/trans/gpu/internal/trltog_mod.F90 b/src/trans/gpu/internal/trltog_mod.F90 index b51244a21..242f701bf 100755 --- a/src/trans/gpu/internal/trltog_mod.F90 +++ b/src/trans/gpu/internal/trltog_mod.F90 @@ -107,7 +107,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK USE MPL_MODULE, ONLY: MPL_WAIT, MPL_BARRIER - USE TPM_GEN, ONLY: LSYNC_TRANS + USE TPM_GEN, ONLY: LSYNC_TRANS, NERR USE EQ_REGIONS_MOD, ONLY: MY_REGION_EW, MY_REGION_NS USE TPM_DISTR, ONLY: D,MYSETV, MYSETW, MTAGLG,NPRCIDS,MYPROC,NPROC,NPRTRW,NPRTRV USE PE2SET_MOD, ONLY: PE2SET @@ -240,8 +240,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, ENDDO ENDIF IF (IOFF > 0 .AND. IOFF /= KF_SCALARS_G ) THEN - PRINT*, "TRLTOG: ERROR IN IVSETSC COMPUTATION" - STOP 39 + CALL ABORT_TRANS("TRLTOG: Error in IVSETSC computation") ENDIF ENDIF @@ -305,7 +304,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, IF(LSCDERS) IGP3BPAR = IGP3BPAR/3 ENDIF IF (IGP2PAR + IGP3ALEV*IGP3APAR + IGP3BPAR*IGP3BLEV /= KF_SCALARS_G) THEN - PRINT *, IGP2PAR, IGP3APAR, IGP3ALEV, IGP3BPAR, IGP3BLEV + WRITE(NERR,*) IGP2PAR, IGP3APAR, IGP3ALEV, IGP3BPAR, IGP3BLEV CALL ABORT_TRANS("INCONSISTENCY IN SCALARS") ENDIF diff --git a/src/trans/gpu/internal/trltom_mod.F90 b/src/trans/gpu/internal/trltom_mod.F90 index d1611652e..be3a4929d 100755 --- a/src/trans/gpu/internal/trltom_mod.F90 +++ b/src/trans/gpu/internal/trltom_mod.F90 @@ -92,11 +92,12 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS) USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK USE MPL_MODULE, ONLY: MPL_ALLTOALLV, MPL_BARRIER, MPL_ALL_MS_COMM, MPL_MYRANK USE TPM_DISTR, ONLY: D, NPRTRW, NPROC, MYPROC, MYSETW - USE TPM_GEN, ONLY: LSYNC_TRANS + USE TPM_GEN, ONLY: LSYNC_TRANS, NERR USE MPI_F08, ONLY: MPI_COMM, MPI_FLOAT, MPI_DOUBLE, MPI_ALLTOALLV USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS IMPLICIT NONE @@ -144,8 +145,8 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS) ! copy to self workaround IRANK = MPL_MYRANK(MPL_ALL_MS_COMM) IF (ILENS(IRANK) .ne. ILENR(IRANK)) THEN - PRINT *, "ERROR", ILENS(IRANK), ILENR(IRANK) - stop 1 + WRITE(NERR,*) "ERROR", ILENS(IRANK), ILENR(IRANK) + CALL ABORT_TRANS("TRLTOM: Error - ILENS(IRANK) /= ILENR(IRANK)") ENDIF IF (ILENS(IRANK) > 0) THEN FROM_SEND = IOFFS(IRANK) + 1 diff --git a/src/trans/gpu/internal/trmtol_mod.F90 b/src/trans/gpu/internal/trmtol_mod.F90 index a2d8d40ca..4298d790e 100755 --- a/src/trans/gpu/internal/trmtol_mod.F90 +++ b/src/trans/gpu/internal/trmtol_mod.F90 @@ -92,11 +92,12 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK USE MPL_MODULE, ONLY: MPL_ALLTOALLV, MPL_BARRIER, MPL_ALL_MS_COMM, MPL_MYRANK USE TPM_DISTR, ONLY: D, NPRTRW, NPROC, MYPROC, MYSETW - USE TPM_GEN, ONLY: LSYNC_TRANS + USE TPM_GEN, ONLY: LSYNC_TRANS, NERR USE MPI_F08, ONLY: MPI_COMM, MPI_FLOAT, MPI_DOUBLE USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE ISO_C_BINDING, ONLY: C_SIZE_T + USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS IMPLICIT NONE @@ -138,8 +139,8 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) ! copy to self workaround IRANK = MPL_MYRANK(MPL_ALL_MS_COMM) IF (ILENS(IRANK) .ne. ILENR(IRANK)) THEN - PRINT *, "ERROR", ILENS(IRANK), ILENR(IRANK) - stop 1 + WRITE(NERR,*) "ERROR", ILENS(IRANK), ILENR(IRANK) + CALL ABORT_TRANS("TRMTOL: ILENS(IRANK) /= ILENR(IRANK)") ENDIF IF (ILENS(IRANK) > 0) THEN FROM_SEND = IOFFS(IRANK) + 1 From 5da79be9bf6af8fe13878972843912b6fc55e3d7 Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Wed, 31 Jul 2024 15:12:50 +0300 Subject: [PATCH 04/86] Remove unnecessary WITH_FFTW --- src/trans/gpu/external/setup_trans.F90 | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/trans/gpu/external/setup_trans.F90 b/src/trans/gpu/external/setup_trans.F90 index b38ce981d..8eaf0efce 100755 --- a/src/trans/gpu/external/setup_trans.F90 +++ b/src/trans/gpu/external/setup_trans.F90 @@ -118,9 +118,6 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& & ZAA, ZAS, ZAA0, ZAS0, KMLOC0 USE TPM_FFT, ONLY: T, FFT_RESOL USE TPM_HICFFT, ONLY: HICT, HICFFT_RESOL -#ifdef WITH_FFTW -USE TPM_FFTW, ONLY: TW, FFTW_RESOL -#endif USE TPM_FLT, ONLY: FLT_RESOL, S USE TPM_CTL, ONLY: CTL_RESOL, C USE SET_RESOL_MOD, ONLY: SET_RESOL From c7e773daeeeaab280a1331becc477da91ff1ba76 Mon Sep 17 00:00:00 2001 From: marsdeno <41438629+marsdeno@users.noreply.github.com> Date: Mon, 5 Aug 2024 10:24:17 +0100 Subject: [PATCH 05/86] Fix incorrect abort message in inv_trans.F90 --- src/trans/cpu/external/inv_trans.F90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/trans/cpu/external/inv_trans.F90 b/src/trans/cpu/external/inv_trans.F90 index 2d47592f5..4a518aaa0 100644 --- a/src/trans/cpu/external/inv_trans.F90 +++ b/src/trans/cpu/external/inv_trans.F90 @@ -509,8 +509,8 @@ SUBROUTINE INV_TRANS(PSPVOR,PSPDIV,PSPSCALAR,PSPSC3A,PSPSC3B,PSPSC2,& CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') ENDIF IF(IUBOUND(4) < NGPBLKS) THEN - WRITE(NOUT,*)'INV_TRANS:THIRD DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS - CALL ABORT_TRANS('INV_TRANS:THIRD DIMENSION OF PGPUV TOO SMALL ') + WRITE(NOUT,*)'INV_TRANS:FOURTH DIM. OF PGPUV TOO SMALL ',IUBOUND(4),NGPBLKS + CALL ABORT_TRANS('INV_TRANS:FOURTH DIMENSION OF PGPUV TOO SMALL ') ENDIF ENDIF IF(PRESENT(PGP2)) THEN From e8f92156e6fe042e7f90a061959597a3ed9bb90f Mon Sep 17 00:00:00 2001 From: marsdeno <41438629+marsdeno@users.noreply.github.com> Date: Tue, 6 Aug 2024 10:16:56 +0100 Subject: [PATCH 06/86] Update build.yml --- .github/workflows/build.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 90261e7db..f0980fa2e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -171,13 +171,9 @@ jobs: recreate_cache: ${{ matrix.caching == false }} dependencies: | ecmwf/ecbuild - ecmwf/eckit - ecmwf/fckit ecmwf-ifs/fiat dependency_branch: develop dependency_cmake_options: | - ecmwf/eckit: "-G Ninja -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DENABLE_TESTS=OFF -DENABLE_ECKIT_CMD=OFF -DENABLE_ECKIT_SQL=OFF -DENABLE_MPI=ON -DENABLE_OMP=OFF" - ecmwf/fckit: "-G Ninja -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DENABLE_TESTS=OFF" ecmwf-ifs/fiat: "-G Ninja -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DENABLE_TESTS=OFF -DENABLE_MPI=ON" cmake_options: "-G Ninja -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ${{ matrix.cmake_options }} -DENABLE_MPI=ON -DENABLE_FFTW=ON" ctest_options: "${{ matrix.ctest_options }}" From d18de02913716b64e41c5ec54caf28078a086249 Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Thu, 1 Aug 2024 14:43:58 +0000 Subject: [PATCH 07/86] Add missing USE statements --- src/trans/gpu/external/gpnorm_trans.F90 | 2 +- src/trans/gpu/external/setup_trans.F90 | 3 ++- src/trans/gpu/external/setup_trans0.F90 | 3 ++- src/trans/gpu/external/trans_pnm.F90 | 2 +- src/trans/gpu/internal/cdmap_mod.F90 | 1 + src/trans/gpu/internal/dealloc_resol_mod.F90 | 1 + src/trans/gpu/internal/gath_grid_32_ctl_mod.F90 | 1 + 7 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/trans/gpu/external/gpnorm_trans.F90 b/src/trans/gpu/external/gpnorm_trans.F90 index 86ba77863..e61d070c1 100755 --- a/src/trans/gpu/external/gpnorm_trans.F90 +++ b/src/trans/gpu/external/gpnorm_trans.F90 @@ -70,7 +70,7 @@ SUBROUTINE GPNORM_TRANS(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK USE TRGTOL_MOD, ONLY: TRGTOL_HANDLE, PREPARE_TRGTOL, TRGTOL USE TPM_TRANS, ONLY: GROWING_ALLOCATION -USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, MAKE_BUFFERED_ALLOCATOR +USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, MAKE_BUFFERED_ALLOCATOR, INSTANTIATE_ALLOCATOR !endif INTERFACE diff --git a/src/trans/gpu/external/setup_trans.F90 b/src/trans/gpu/external/setup_trans.F90 index 8eaf0efce..b359edc28 100755 --- a/src/trans/gpu/external/setup_trans.F90 +++ b/src/trans/gpu/external/setup_trans.F90 @@ -132,7 +132,8 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK USE PREPSNM_MOD, ONLY: PREPSNM #ifdef ACCGPU -USE OPENACC, ONLY: ACC_DEVICE_KIND +USE OPENACC, ONLY: ACC_DEVICE_KIND, ACC_GET_DEVICE_TYPE, ACC_GET_NUM_DEVICES, & + & ACC_SET_DEVICE_NUM, ACC_GET_DEVICE_NUM #endif #ifdef OMPGPU ! TODO: add OMP equivalents to ACC library routines diff --git a/src/trans/gpu/external/setup_trans0.F90 b/src/trans/gpu/external/setup_trans0.F90 index 0d136b0d3..b749f5c5c 100755 --- a/src/trans/gpu/external/setup_trans0.F90 +++ b/src/trans/gpu/external/setup_trans0.F90 @@ -83,7 +83,8 @@ SUBROUTINE SETUP_TRANS0(KOUT,KERR,KPRINTLEV,KMAX_RESOL,KPROMATR,& USE ECTRANS_VERSION_MOD, ONLY: ECTRANS_VERSION_STR, ECTRANS_GIT_SHA1 USE EC_ENV_MOD, ONLY: EC_GETENV #ifdef _OPENACC -USE OPENACC, ONLY: ACC_DEVICE_KIND +USE OPENACC, ONLY: ACC_DEVICE_KIND, ACC_GET_DEVICE_TYPE, ACC_GET_NUM_DEVICES, & + & ACC_SET_DEVICE_NUM, ACC_GET_DEVICE_NUM, ACC_INIT #endif !endif INTERFACE diff --git a/src/trans/gpu/external/trans_pnm.F90 b/src/trans/gpu/external/trans_pnm.F90 index cd89a85c7..421cbfd2d 100755 --- a/src/trans/gpu/external/trans_pnm.F90 +++ b/src/trans/gpu/external/trans_pnm.F90 @@ -57,7 +57,7 @@ SUBROUTINE TRANS_PNM(KRESOL,KM,PRPNM,LDTRANSPOSE,LDCHEAP) USE TPM_FLT, ONLY: S USE SET_RESOL_MOD, ONLY: SET_RESOL USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS -USE TPM_POL, ONLY: INI_POL +USE TPM_POL, ONLY: INI_POL, END_POL USE SUPOLF_MOD, ONLY: SUPOLF !endif INTERFACE diff --git a/src/trans/gpu/internal/cdmap_mod.F90 b/src/trans/gpu/internal/cdmap_mod.F90 index 649aefa65..e0e7d0b55 100755 --- a/src/trans/gpu/internal/cdmap_mod.F90 +++ b/src/trans/gpu/internal/cdmap_mod.F90 @@ -18,6 +18,7 @@ SUBROUTINE CDMAP(KM,KMLOC,KSL,KSLO,PEPSNM, KDIR, KDGNH, KDGNHD,& USE TPM_FLT, ONLY: S USE TPM_DISTR, ONLY: D USE TPM_TRANS, ONLY: FOUBUF_IN, FOUBUF +USE SEEFMM_MIX, ONLY: SEEFMM_MULM !**** *CDMAP* - REMAP ROOTS ! diff --git a/src/trans/gpu/internal/dealloc_resol_mod.F90 b/src/trans/gpu/internal/dealloc_resol_mod.F90 index 792fc46d8..343f46c41 100755 --- a/src/trans/gpu/internal/dealloc_resol_mod.F90 +++ b/src/trans/gpu/internal/dealloc_resol_mod.F90 @@ -51,6 +51,7 @@ SUBROUTINE DEALLOC_RESOL(KRESOL) USE TPM_FLT, ONLY: S USE TPM_CTL, ONLY: C USE TPM_HICFFT, ONLY: DESTROY_ALL_PLANS_FFT +USE SEEFMM_MIX, ONLY: FREE_SEEFMM USE SET_RESOL_MOD, ONLY: SET_RESOL ! diff --git a/src/trans/gpu/internal/gath_grid_32_ctl_mod.F90 b/src/trans/gpu/internal/gath_grid_32_ctl_mod.F90 index 0b1fd230d..99c6cbbd4 100755 --- a/src/trans/gpu/internal/gath_grid_32_ctl_mod.F90 +++ b/src/trans/gpu/internal/gath_grid_32_ctl_mod.F90 @@ -37,6 +37,7 @@ SUBROUTINE GATH_GRID_32_CTL(PGPG,KFGATHG,KPROMA,KTO,PGP) & MPL_WAIT, MPL_ALLTOALLV USE TPM_GEOMETRY, ONLY: G USE TPM_DISTR, ONLY: D, NPROC, MYPROC, MTAGDISTSP, NPRCIDS +USE SET2PE_MOD, ONLY: SET2PE USE EQ_REGIONS_MOD, ONLY: N_REGIONS_NS, N_REGIONS IMPLICIT NONE From c4281661250397659ca23caab0d30cd253af2d38 Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Fri, 2 Aug 2024 16:02:26 +0000 Subject: [PATCH 08/86] Implement workaround for obscure partially present NV error For some reason if I qualify the `USE` statements on these lines by only importing the actually used objects, I get this error: ``` Present table errors: (null) lives at 0x7ffd5283efb0 size 140725987282868 partially present in host:0x7ffd5283efb0 device:0x14717b3ab400 size:108160 presentcount:0+1 line:-1 name:(null) file:(null) FATAL ERROR: variable in data clause is partially present on the device: name=(unknown) file:(OpenACC API) acc_delete line:-1 ``` --- src/trans/gpu/internal/ext_acc.F90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/trans/gpu/internal/ext_acc.F90 b/src/trans/gpu/internal/ext_acc.F90 index b2ea8687a..874d2b117 100644 --- a/src/trans/gpu/internal/ext_acc.F90 +++ b/src/trans/gpu/internal/ext_acc.F90 @@ -274,7 +274,7 @@ subroutine ext_acc_create(ptrs, stream) enddo end subroutine subroutine ext_acc_copyin(ptrs, stream) - use openacc, only: acc_async_sync + use openacc implicit none type(ext_acc_arr_desc), intent(in) :: ptrs(:) integer(acc_handle_kind), optional :: stream @@ -301,7 +301,7 @@ subroutine ext_acc_copyin(ptrs, stream) enddo end subroutine subroutine ext_acc_copyout(ptrs, stream) - use openacc, only: acc_async_sync, acc_copyout + use openacc implicit none type(ext_acc_arr_desc), intent(in) :: ptrs(:) integer(acc_handle_kind), optional :: stream @@ -328,7 +328,7 @@ subroutine ext_acc_copyout(ptrs, stream) enddo end subroutine subroutine ext_acc_delete(ptrs, stream) - use openacc, only: acc_async_sync + use openacc implicit none type(ext_acc_arr_desc), intent(in) :: ptrs(:) integer(acc_handle_kind), optional :: stream From 8b09448acfc477faf006ac4a27c0d93dc8f6d1ba Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Tue, 6 Aug 2024 09:05:04 +0100 Subject: [PATCH 09/86] Add back specific imports from openacc Co-authored-by: lukasm91 --- src/trans/gpu/internal/ext_acc.F90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/trans/gpu/internal/ext_acc.F90 b/src/trans/gpu/internal/ext_acc.F90 index 874d2b117..98cee38ce 100644 --- a/src/trans/gpu/internal/ext_acc.F90 +++ b/src/trans/gpu/internal/ext_acc.F90 @@ -274,7 +274,7 @@ subroutine ext_acc_create(ptrs, stream) enddo end subroutine subroutine ext_acc_copyin(ptrs, stream) - use openacc + use openacc, only: acc_async_sync implicit none type(ext_acc_arr_desc), intent(in) :: ptrs(:) integer(acc_handle_kind), optional :: stream @@ -301,7 +301,7 @@ subroutine ext_acc_copyin(ptrs, stream) enddo end subroutine subroutine ext_acc_copyout(ptrs, stream) - use openacc + use openacc, only: acc_async_sync, acc_copyout implicit none type(ext_acc_arr_desc), intent(in) :: ptrs(:) integer(acc_handle_kind), optional :: stream @@ -328,7 +328,7 @@ subroutine ext_acc_copyout(ptrs, stream) enddo end subroutine subroutine ext_acc_delete(ptrs, stream) - use openacc + use openacc, only: acc_async_sync, acc_delete implicit none type(ext_acc_arr_desc), intent(in) :: ptrs(:) integer(acc_handle_kind), optional :: stream From 69474cffe174070f26feda90df244b70e7a4fd7c Mon Sep 17 00:00:00 2001 From: Lukas Mosimann Date: Mon, 10 Jun 2024 11:33:16 -0700 Subject: [PATCH 10/86] set some extra entries in zinp/zinp0 to 0 to avoid 'nan' propagation --- src/trans/gpu/internal/leinv_mod.F90 | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/trans/gpu/internal/leinv_mod.F90 b/src/trans/gpu/internal/leinv_mod.F90 index 88d63506a..ce3cf32fc 100755 --- a/src/trans/gpu/internal/leinv_mod.F90 +++ b/src/trans/gpu/internal/leinv_mod.F90 @@ -185,6 +185,11 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) DO J=1,(R_NSMAX-KM+2)/2 ZINP(JK+(J-1)*IIN_STRIDES0+D_OFFSETS_GEMM2(KMLOC)*IIN_STRIDES0)=PIA(JK,IA+1+(J-1)*2,KMLOC) ENDDO + ! those are - in principle - only needed with tensor cores (zinp might contain NaNs!) + !$ACC LOOP SEQ + DO J=(R_NSMAX-KM+2)/2+1,ALIGN((R_NSMAX-KM+2)/2,A) + ZINP(JK+(J-1)*IIN_STRIDES0+D_OFFSETS_GEMM2(KMLOC)*IIN_STRIDES0)=0 + ENDDO ELSEIF (MOD((JK-1),2) .EQ. 0) THEN ! every other field is sufficient because Im(KM=0) == 0 #ifdef OMPGPU @@ -195,6 +200,11 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) DO J=1,(R_NSMAX+2)/2 ZINP0((JK-1)/2+1+(J-1)*IIN0_STRIDES0) = PIA(JK,IA+1+(J-1)*2,KMLOC) ENDDO + ! those are - in principle - only needed with tensor cores (zinp might contain NaNs!) + !$ACC LOOP SEQ + DO J=(R_NSMAX+2)/2+1,ALIGN((R_NSMAX+2)/2,A) + ZINP0((JK-1)/2+1+(J-1)*IIN0_STRIDES0) = 0 + ENDDO ENDIF ENDDO ENDDO @@ -310,6 +320,11 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) DO J=1,(R_NSMAX-KM+3)/2 ZINP(JK+(J-1)*IIN_STRIDES0+D_OFFSETS_GEMM2(KMLOC)*IIN_STRIDES0)=PIA(JK,IS+1+(J-1)*2,KMLOC) ENDDO + ! those are - in principle - only needed with tensor cores (zinp might contain NaNs!) + !$ACC LOOP SEQ + DO J=(R_NSMAX-KM+3)/2+1,ALIGN((R_NSMAX-KM+3)/2,A) + ZINP(JK+(J-1)*IIN_STRIDES0+D_OFFSETS_GEMM2(KMLOC)*IIN_STRIDES0)=0 + ENDDO ELSEIF (MOD((JK-1),2) == 0) THEN #ifdef OMPGPU #endif @@ -319,6 +334,11 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) DO J=1,(R_NSMAX+3)/2 ZINP0((JK-1)/2+1+(J-1)*IIN0_STRIDES0) = PIA(JK,IS+1+(J-1)*2,KMLOC) ENDDO + ! those are - in principle - only needed with tensor cores (zinp might contain NaNs!) + !$ACC LOOP SEQ + DO J=(R_NSMAX+3)/2+1,ALIGN((R_NSMAX+3)/2,A) + ZINP0((JK-1)/2+1+(J-1)*IIN0_STRIDES0) = 0 + ENDDO ENDIF ENDDO ENDDO From ec9a6f313047097b088cfdbfc63f0a5f3c840400 Mon Sep 17 00:00:00 2001 From: Lukas Mosimann Date: Mon, 12 Aug 2024 04:40:12 -0700 Subject: [PATCH 11/86] zero padding only needed with cutlass --- src/trans/gpu/internal/leinv_mod.F90 | 16 +++++++--- src/trans/gpu/internal/trltom_pack_unpack.F90 | 31 +++++++++++++++++-- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/src/trans/gpu/internal/leinv_mod.F90 b/src/trans/gpu/internal/leinv_mod.F90 index ce3cf32fc..ab8d133ac 100755 --- a/src/trans/gpu/internal/leinv_mod.F90 +++ b/src/trans/gpu/internal/leinv_mod.F90 @@ -185,11 +185,13 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) DO J=1,(R_NSMAX-KM+2)/2 ZINP(JK+(J-1)*IIN_STRIDES0+D_OFFSETS_GEMM2(KMLOC)*IIN_STRIDES0)=PIA(JK,IA+1+(J-1)*2,KMLOC) ENDDO - ! those are - in principle - only needed with tensor cores (zinp might contain NaNs!) + ! those are only needed with tensor cores (zinp might contain NaNs!) +#if defined(USE_CUTLASS) && defined(USE_CUTLASS_3XTF32) !$ACC LOOP SEQ DO J=(R_NSMAX-KM+2)/2+1,ALIGN((R_NSMAX-KM+2)/2,A) ZINP(JK+(J-1)*IIN_STRIDES0+D_OFFSETS_GEMM2(KMLOC)*IIN_STRIDES0)=0 ENDDO +#endif ELSEIF (MOD((JK-1),2) .EQ. 0) THEN ! every other field is sufficient because Im(KM=0) == 0 #ifdef OMPGPU @@ -200,11 +202,13 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) DO J=1,(R_NSMAX+2)/2 ZINP0((JK-1)/2+1+(J-1)*IIN0_STRIDES0) = PIA(JK,IA+1+(J-1)*2,KMLOC) ENDDO - ! those are - in principle - only needed with tensor cores (zinp might contain NaNs!) + ! those are only needed with tensor cores (zinp might contain NaNs!) +#if defined(USE_CUTLASS) && defined(USE_CUTLASS_3XTF32) !$ACC LOOP SEQ DO J=(R_NSMAX+2)/2+1,ALIGN((R_NSMAX+2)/2,A) ZINP0((JK-1)/2+1+(J-1)*IIN0_STRIDES0) = 0 ENDDO +#endif ENDIF ENDDO ENDDO @@ -320,11 +324,13 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) DO J=1,(R_NSMAX-KM+3)/2 ZINP(JK+(J-1)*IIN_STRIDES0+D_OFFSETS_GEMM2(KMLOC)*IIN_STRIDES0)=PIA(JK,IS+1+(J-1)*2,KMLOC) ENDDO - ! those are - in principle - only needed with tensor cores (zinp might contain NaNs!) +#if defined(USE_CUTLASS) && defined(USE_CUTLASS_3XTF32) + ! those are only needed with tensor cores (zinp might contain NaNs!) !$ACC LOOP SEQ DO J=(R_NSMAX-KM+3)/2+1,ALIGN((R_NSMAX-KM+3)/2,A) ZINP(JK+(J-1)*IIN_STRIDES0+D_OFFSETS_GEMM2(KMLOC)*IIN_STRIDES0)=0 ENDDO +#endif ELSEIF (MOD((JK-1),2) == 0) THEN #ifdef OMPGPU #endif @@ -334,11 +340,13 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) DO J=1,(R_NSMAX+3)/2 ZINP0((JK-1)/2+1+(J-1)*IIN0_STRIDES0) = PIA(JK,IS+1+(J-1)*2,KMLOC) ENDDO - ! those are - in principle - only needed with tensor cores (zinp might contain NaNs!) + ! those are only needed with tensor cores (zinp might contain NaNs!) +#if defined(USE_CUTLASS) && defined(USE_CUTLASS_3XTF32) !$ACC LOOP SEQ DO J=(R_NSMAX+3)/2+1,ALIGN((R_NSMAX+3)/2,A) ZINP0((JK-1)/2+1+(J-1)*IIN0_STRIDES0) = 0 ENDDO +#endif ENDIF ENDDO ENDDO diff --git a/src/trans/gpu/internal/trltom_pack_unpack.F90 b/src/trans/gpu/internal/trltom_pack_unpack.F90 index 25172b288..2da3d9f38 100755 --- a/src/trans/gpu/internal/trltom_pack_unpack.F90 +++ b/src/trans/gpu/internal/trltom_pack_unpack.F90 @@ -12,6 +12,7 @@ MODULE TRLTOM_PACK_UNPACK USE BUFFERED_ALLOCATOR_MOD, ONLY: ALLOCATION_RESERVATION_HANDLE + USE PARKIND_ECTRANS, ONLY: JPIM IMPLICIT NONE PRIVATE @@ -24,6 +25,9 @@ MODULE TRLTOM_PACK_UNPACK TYPE TRLTOM_UNPACK_HANDLE TYPE(ALLOCATION_RESERVATION_HANDLE) :: HINPS_AND_ZINPA END TYPE + + INTEGER(KIND=JPIM) :: A = 8 !Alignment + CONTAINS FUNCTION PREPARE_TRLTOM_PACK(ALLOCATOR, KF_FS) RESULT(HTRLTOM_PACK) USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT @@ -223,9 +227,8 @@ SUBROUTINE TRLTOM_UNPACK(ALLOCATOR,HTRLTOM_UNPACK,FOUBUF,ZINPS,ZINPA,ZINPS0,ZINP !$ACC& PRESENT(ZINPS,ZINPA,ZINPS0,ZINPA0) & !$ACC& PRESENT(F_RW,F_RACTHE) & !$ACC& PRESENT(D_MYMS,D_NUMP,R_NDGNH,R_NDGL,G_NDGLU) & - !$ACC& PRESENT(D_NPNTGTB1) + !$ACC& PRESENT(D_NPNTGTB1,D_OFFSETS_GEMM1,FOUBUF) - !$ACC DATA PRESENT(FOUBUF,D_OFFSETS_GEMM1) !$ACC PARALLEL LOOP DEFAULT(NONE) COLLAPSE(3) PRIVATE(KM,ISL,IGLS,OFFSET1,OFFSET2,JGL,PAIA,PAIS) & !$ACC& FIRSTPRIVATE(KF_FS,KF_UV,IIN_STRIDES0,IIN0_STRIDES0) ASYNC(1) #endif @@ -258,13 +261,35 @@ SUBROUTINE TRLTOM_UNPACK(ALLOCATOR,HTRLTOM_UNPACK,FOUBUF,ZINPS,ZINPA,ZINPS0,ZINP ENDDO ENDDO END DO + #ifdef OMPGPU #endif + +#if defined(USE_CUTLASS) && defined(USE_CUTLASS_3XTF32) #ifdef ACCGPU - !$ACC END DATA + !$ACC PARALLEL LOOP DEFAULT(NONE) COLLAPSE(2) PRIVATE(KM,JGL) & + !$ACC& FIRSTPRIVATE(KF_FS,IIN_STRIDES0) ASYNC(1) +#endif + DO KMLOC=1,D_NUMP + DO JF=1,KF_FS*2 + KM = D_MYMS(KMLOC) + !$ACC LOOP SEQ + DO JGL=G_NDGLU(KM),ALIGN(G_NDGLU(KM),A)-1 + IF (KM /= 0) THEN + ZINPA(JF+JGL*IIN_STRIDES0+IIN_STRIDES0*D_OFFSETS_GEMM1(KMLOC))=0_JPRB + ZINPS(JF+JGL*IIN_STRIDES0+IIN_STRIDES0*D_OFFSETS_GEMM1(KMLOC))=0_JPRB + ENDIF + ENDDO + ENDDO + END DO +#endif +#ifdef OMPGPU +#endif +#ifdef ACCGPU !$ACC END DATA #endif + END SUBROUTINE TRLTOM_UNPACK END MODULE TRLTOM_PACK_UNPACK From 267b2e9895194179128be722271ee06da806e42a Mon Sep 17 00:00:00 2001 From: Lukas Mosimann Date: Mon, 12 Aug 2024 04:43:13 -0700 Subject: [PATCH 12/86] fix conversion --- src/trans/gpu/internal/trltom_pack_unpack.F90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/trans/gpu/internal/trltom_pack_unpack.F90 b/src/trans/gpu/internal/trltom_pack_unpack.F90 index 2da3d9f38..3425f01fc 100755 --- a/src/trans/gpu/internal/trltom_pack_unpack.F90 +++ b/src/trans/gpu/internal/trltom_pack_unpack.F90 @@ -276,8 +276,8 @@ SUBROUTINE TRLTOM_UNPACK(ALLOCATOR,HTRLTOM_UNPACK,FOUBUF,ZINPS,ZINPA,ZINPS0,ZINP !$ACC LOOP SEQ DO JGL=G_NDGLU(KM),ALIGN(G_NDGLU(KM),A)-1 IF (KM /= 0) THEN - ZINPA(JF+JGL*IIN_STRIDES0+IIN_STRIDES0*D_OFFSETS_GEMM1(KMLOC))=0_JPRB - ZINPS(JF+JGL*IIN_STRIDES0+IIN_STRIDES0*D_OFFSETS_GEMM1(KMLOC))=0_JPRB + ZINPA(JF+JGL*IIN_STRIDES0+IIN_STRIDES0*D_OFFSETS_GEMM1(KMLOC))=0.0_JPRB + ZINPS(JF+JGL*IIN_STRIDES0+IIN_STRIDES0*D_OFFSETS_GEMM1(KMLOC))=0.0_JPRB ENDIF ENDDO ENDDO From 29230ab870d4a560f824aa701c593677fcf9d7f5 Mon Sep 17 00:00:00 2001 From: Lukas Mosimann Date: Mon, 12 Aug 2024 04:49:56 -0700 Subject: [PATCH 13/86] add warning --- src/trans/gpu/algor/hicblas_cutlass.cuda.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/trans/gpu/algor/hicblas_cutlass.cuda.h b/src/trans/gpu/algor/hicblas_cutlass.cuda.h index a0bd6dd1a..7a842a808 100644 --- a/src/trans/gpu/algor/hicblas_cutlass.cuda.h +++ b/src/trans/gpu/algor/hicblas_cutlass.cuda.h @@ -67,6 +67,8 @@ class cutlass_sgemm_grouped { true, // MyOp // >; + // Note that when setting this alignment > 1 the inputs must be properly + // zero padded, otherwise NaNs might propagate. static constexpr int sz_align = 8; public: @@ -122,6 +124,8 @@ class cutlass_sgemm_grouped { true, // MyOp // >; + // Note that when setting this alignment > 1 the inputs must be properly + // zero padded, otherwise NaNs might propagate. static constexpr int sz_align = 1; public: From 7372c6b1bbbca2f152bff90c6bcc8b2e427a66de Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Mon, 12 Aug 2024 13:14:13 +0000 Subject: [PATCH 14/86] Add ASYNC(1) so host updates are synchronised properly Without these the UPDATE can be carried out before the device arrays have been properly filled by the preceding ASYNC(1) unpacking DO loop. --- src/trans/gpu/internal/trltog_mod.F90 | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/trans/gpu/internal/trltog_mod.F90 b/src/trans/gpu/internal/trltog_mod.F90 index 242f701bf..250c48e22 100755 --- a/src/trans/gpu/internal/trltog_mod.F90 +++ b/src/trans/gpu/internal/trltog_mod.F90 @@ -861,35 +861,35 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC UPDATE HOST(PGP) + !$ACC UPDATE HOST(PGP) ASYNC(1) #endif ENDIF IF (PRESENT(PGPUV)) THEN #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC UPDATE HOST(PGPUV) + !$ACC UPDATE HOST(PGPUV) ASYNC(1) #endif ENDIF IF (PRESENT(PGP2)) THEN #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC UPDATE HOST(PGP2) + !$ACC UPDATE HOST(PGP2) ASYNC(1) #endif ENDIF IF (PRESENT(PGP3A)) THEN #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC UPDATE HOST(PGP3A) + !$ACC UPDATE HOST(PGP3A) ASYNC(1) #endif ENDIF IF (PRESENT(PGP3B)) THEN #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC UPDATE HOST(PGP3B) + !$ACC UPDATE HOST(PGP3B) ASYNC(1) #endif ENDIF IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_DELETE(ACC_POINTERS(1:ACC_POINTERS_CNT),STREAM=1_ACC_HANDLE_KIND) From e6b76917447e75dacea6277d053693e9e35616c4 Mon Sep 17 00:00:00 2001 From: Olivier Marsden Date: Thu, 15 Aug 2024 21:21:49 +0000 Subject: [PATCH 15/86] Allow NOPT_MEMORY_TR to be controlled by driver command-line option. *Allows choosing between heap and stack for ZCOMBUF arrays in GTOL and LTOG transposition routines on the CPU. Silently ignored on the GPU. --- src/programs/ectrans-benchmark.F90 | 59 ++++++++++++++++-------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/src/programs/ectrans-benchmark.F90 b/src/programs/ectrans-benchmark.F90 index b84e58bef..05b9caa43 100644 --- a/src/programs/ectrans-benchmark.F90 +++ b/src/programs/ectrans-benchmark.F90 @@ -133,6 +133,7 @@ program ectrans_benchmark integer(kind=jpim) :: nstats_mem = 0 integer(kind=jpim) :: ntrace_stats = 0 integer(kind=jpim) :: nprnt_stats = 1 +integer(kind=jpim) :: nopt_mem_tr = 0 ! The multiplier of the machine epsilon used as a tolerance for correctness checking ! ncheck = 0 (the default) means that correctness checking is disabled @@ -220,7 +221,7 @@ program ectrans_benchmark ! Setup call get_command_line_arguments(nsmax, cgrid, iters, nfld, nlev, lvordiv, lscders, luvders, & - & luseflt, nproma, verbosity, ldump_values, lprint_norms, lmeminfo, nprtrv, nprtrw, ncheck) + & luseflt, nopt_mem_tr, nproma, verbosity, ldump_values, lprint_norms, lmeminfo, nprtrv, nprtrw, ncheck) if (cgrid == '') cgrid = cubic_octahedral_gaussian_grid(nsmax) call parse_grid(cgrid, ndgl, nloen) nflevg = nlev @@ -376,10 +377,11 @@ program ectrans_benchmark if (verbosity >= 1) write(nout,'(a)')'======= Setup ecTrans =======' call gstats(1, 0) -call setup_trans0(kout=nout, kerr=nerr, kprintlev=merge(2, 0, verbosity == 1), & - & kmax_resol=nmax_resol, kpromatr=npromatr, kprgpns=nprgpns, kprgpew=nprgpew, & - & kprtrw=nprtrw, ldsync_trans=lsync_trans, & - & ldeq_regions=leq_regions, prad=zra, ldalloperm=.true., ldmpoff=.not.luse_mpi) +call setup_trans0(kout=nout, kerr=nerr, kprintlev=merge(2, 0, verbosity == 1), & + & kmax_resol=nmax_resol, kpromatr=npromatr, kprgpns=nprgpns, kprgpew=nprgpew, & + & kprtrw=nprtrw, ldsync_trans=lsync_trans, & + & ldeq_regions=leq_regions, prad=zra, ldalloperm=.true., ldmpoff=.not.luse_mpi,& + & kopt_memory_tr=nopt_mem_tr) call gstats(1, 1) call gstats(2, 0) @@ -410,27 +412,28 @@ program ectrans_benchmark write(nout,'(" ")') write(nout,'(a)')'======= Start of runtime parameters =======' write(nout,'(" ")') - write(nout,'("nsmax ",i0)') nsmax - write(nout,'("grid ",a)') trim(cgrid) - write(nout,'("ndgl ",i0)') ndgl - write(nout,'("nproc ",i0)') nproc - write(nout,'("nthread ",i0)') nthread - write(nout,'("nprgpns ",i0)') nprgpns - write(nout,'("nprgpew ",i0)') nprgpew - write(nout,'("nprtrw ",i0)') nprtrw - write(nout,'("nprtrv ",i0)') nprtrv - write(nout,'("ngptot ",i0)') ngptot - write(nout,'("ngptotg ",i0)') ngptotg - write(nout,'("nfld ",i0)') nfld - write(nout,'("nlev ",i0)') nlev - write(nout,'("nproma ",i0)') nproma - write(nout,'("ngpblks ",i0)') ngpblks - write(nout,'("nspec2 ",i0)') nspec2 - write(nout,'("nspec2g ",i0)') nspec2g - write(nout,'("luseflt ",l1)') luseflt - write(nout,'("lvordiv ",l1)') lvordiv - write(nout,'("lscders ",l1)') lscders - write(nout,'("luvders ",l1)') luvders + write(nout,'("nsmax ",i0)') nsmax + write(nout,'("grid ",a)') trim(cgrid) + write(nout,'("ndgl ",i0)') ndgl + write(nout,'("nproc ",i0)') nproc + write(nout,'("nthread ",i0)') nthread + write(nout,'("nprgpns ",i0)') nprgpns + write(nout,'("nprgpew ",i0)') nprgpew + write(nout,'("nprtrw ",i0)') nprtrw + write(nout,'("nprtrv ",i0)') nprtrv + write(nout,'("ngptot ",i0)') ngptot + write(nout,'("ngptotg ",i0)') ngptotg + write(nout,'("nfld ",i0)') nfld + write(nout,'("nlev ",i0)') nlev + write(nout,'("nproma ",i0)') nproma + write(nout,'("ngpblks ",i0)') ngpblks + write(nout,'("nspec2 ",i0)') nspec2 + write(nout,'("nspec2g ",i0)') nspec2g + write(nout,'("luseflt ",l1)') luseflt + write(nout,'("nopt_mem_tr",i0)') nopt_mem_tr + write(nout,'("lvordiv ",l1)') lvordiv + write(nout,'("lscders ",l1)') lscders + write(nout,'("luvders ",l1)') luvders write(nout,'(" ")') write(nout,'(a)') '======= End of runtime parameters =======' write(nout,'(" ")') @@ -1058,7 +1061,7 @@ subroutine parsing_failed(message) !=================================================================================================== subroutine get_command_line_arguments(nsmax, cgrid, iters, nfld, nlev, lvordiv, lscders, luvders, & - & luseflt, nproma, verbosity, ldump_values, lprint_norms, & + & luseflt, nopt_mem_tr, nproma, verbosity, ldump_values, lprint_norms, & & lmeminfo, nprtrv, nprtrw, ncheck) integer, intent(inout) :: nsmax ! Spectral truncation @@ -1070,6 +1073,7 @@ subroutine get_command_line_arguments(nsmax, cgrid, iters, nfld, nlev, lvordiv, logical, intent(inout) :: lscders ! Compute scalar derivatives logical, intent(inout) :: luvders ! Compute uv East-West derivatives logical, intent(inout) :: luseflt ! Use fast Legendre transforms + integer, intent(inout) :: nopt_mem_tr ! Use of heap or stack memory for ZCOMBUF arrays in transposition arrays integer, intent(inout) :: nproma ! NPROMA integer, intent(inout) :: verbosity ! Level of verbosity logical, intent(inout) :: ldump_values ! Dump values of grid point fields for debugging @@ -1120,6 +1124,7 @@ subroutine get_command_line_arguments(nsmax, cgrid, iters, nfld, nlev, lvordiv, case('--scders'); lscders = .True. case('--uvders'); luvders = .True. case('--flt'); luseflt = .True. + case('--mem-tr'); nopt_mem_tr = get_int_value('--mem-tr', iarg) case('--nproma'); nproma = get_int_value('--nproma', iarg) case('--dump-values'); ldump_values = .true. case('--norms'); lprint_norms = .true. From 16097d141b27d7bd682b102f18c79f4cfca156a8 Mon Sep 17 00:00:00 2001 From: marsdeno <41438629+marsdeno@users.noreply.github.com> Date: Fri, 16 Aug 2024 09:59:45 +0100 Subject: [PATCH 16/86] Free up space for nvhpc builds --- .github/workflows/build.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f0980fa2e..c08473597 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -124,6 +124,25 @@ jobs: path: ${{ env.DEPS_DIR }} key: deps-${{ matrix.os }}-${{ matrix.compiler }}-${{ matrix.build_type }}-${{ env.CACHE_SUFFIX }} + # Free up disk space for nvhpc + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + if: contains( matrix.compiler, 'nvhpc' ) + continue-on-error: true + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: false + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: true + swap-storage: true + - name: Install NVHPC compiler if: contains( matrix.compiler, 'nvhpc' ) shell: bash -eux {0} From cc377009fb69b778f1194bd495250c92549518e4 Mon Sep 17 00:00:00 2001 From: marsdeno <41438629+marsdeno@users.noreply.github.com> Date: Tue, 20 Aug 2024 10:13:46 +0100 Subject: [PATCH 17/86] Update src/programs/ectrans-benchmark.F90 Co-authored-by: Sam Hatfield --- src/programs/ectrans-benchmark.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/programs/ectrans-benchmark.F90 b/src/programs/ectrans-benchmark.F90 index 05b9caa43..e2469a0b8 100644 --- a/src/programs/ectrans-benchmark.F90 +++ b/src/programs/ectrans-benchmark.F90 @@ -1073,7 +1073,7 @@ subroutine get_command_line_arguments(nsmax, cgrid, iters, nfld, nlev, lvordiv, logical, intent(inout) :: lscders ! Compute scalar derivatives logical, intent(inout) :: luvders ! Compute uv East-West derivatives logical, intent(inout) :: luseflt ! Use fast Legendre transforms - integer, intent(inout) :: nopt_mem_tr ! Use of heap or stack memory for ZCOMBUF arrays in transposition arrays + integer, intent(inout) :: nopt_mem_tr ! Use of heap or stack memory for ZCOMBUF arrays in transposition arrays (0 for heap, 1 for stack) integer, intent(inout) :: nproma ! NPROMA integer, intent(inout) :: verbosity ! Level of verbosity logical, intent(inout) :: ldump_values ! Dump values of grid point fields for debugging From 4c5957b0a0a763ae23ae52898d5a0fa486d4306e Mon Sep 17 00:00:00 2001 From: marsdeno <41438629+marsdeno@users.noreply.github.com> Date: Tue, 20 Aug 2024 10:29:41 +0100 Subject: [PATCH 18/86] Printing bug in setup_trans0 We are checking preprocessor macro USE_3XTF32 instead of USE_CUTLASS_3XTF32 --- src/trans/gpu/external/setup_trans0.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/trans/gpu/external/setup_trans0.F90 b/src/trans/gpu/external/setup_trans0.F90 index b749f5c5c..20540c369 100755 --- a/src/trans/gpu/external/setup_trans0.F90 +++ b/src/trans/gpu/external/setup_trans0.F90 @@ -210,7 +210,7 @@ SUBROUTINE SETUP_TRANS0(KOUT,KERR,KPRINTLEV,KMAX_RESOL,KPROMATR,& #ifdef USE_CUTLASS WRITE(NOUT,'(A)') " - Cutlass-based GEMM operations" #endif -#ifdef USE_3XTF32 +#ifdef USE_CUTLASS_3XTF32 WRITE(NOUT,'(A)') " - tensor-core usage for 32b Cutlass operations" #endif WRITE(NOUT,'(A)') From 8d8b82501e8197b80a90b2ed9e81348a0c04aa4b Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 28 Aug 2024 15:53:18 +0200 Subject: [PATCH 19/86] Enable GPU feature without MPI requirement (#140) --- CMakeLists.txt | 4 ++-- src/programs/ectrans-benchmark.F90 | 1 + src/trans/gpu/CMakeLists.txt | 1 + src/trans/gpu/internal/trgtol_mod.F90 | 15 +++++++++++++++ src/trans/gpu/internal/trltog_mod.F90 | 15 ++++++++++++++- src/trans/gpu/internal/trltom_mod.F90 | 11 +++++++++++ src/trans/gpu/internal/trmtol_mod.F90 | 14 ++++++++++++++ 7 files changed, 58 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 45f3cb7cc..a8652716a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -104,8 +104,8 @@ endif() ecbuild_add_option( FEATURE GPU DEFAULT OFF - DESCRIPTION "Compile GPU version of ectrans (Requires OpenACC or sufficient OpenMP offloading support and MPI)" - CONDITION (HAVE_HIP OR HAVE_CUDA) AND (HAVE_ACC OR HAVE_OMP) AND HAVE_MPI ) + DESCRIPTION "Compile GPU version of ectrans (Requires OpenACC or sufficient OpenMP offloading support)" + CONDITION (HAVE_HIP OR HAVE_CUDA) AND (HAVE_ACC OR HAVE_OMP) ) if( HAVE_GPU ) if( HAVE_ACC ) diff --git a/src/programs/ectrans-benchmark.F90 b/src/programs/ectrans-benchmark.F90 index e2469a0b8..b1d393d2d 100644 --- a/src/programs/ectrans-benchmark.F90 +++ b/src/programs/ectrans-benchmark.F90 @@ -236,6 +236,7 @@ program ectrans_benchmark nproc = 1 myproc = 1 mpl_comm = -1 + lsync_trans = .false. endif nthread = oml_max_threads() diff --git a/src/trans/gpu/CMakeLists.txt b/src/trans/gpu/CMakeLists.txt index c6c0daaea..85fa2fcd5 100644 --- a/src/trans/gpu/CMakeLists.txt +++ b/src/trans/gpu/CMakeLists.txt @@ -90,6 +90,7 @@ foreach( prec dp sp ) $<${HAVE_CUTLASS}:nvidia::cutlass::cutlass> PRIVATE_DEFINITIONS ${GPU_OFFLOAD}GPU ${GPU_RUNTIME}GPU + ECTRANS_HAVE_MPI=${ectrans_HAVE_MPI} $<${HAVE_GPU_AWARE_MPI}:USE_GPU_AWARE_MPI> $<${HAVE_CUTLASS}:USE_CUTLASS> $<${HAVE_CUTLASS_3XTF32}:USE_CUTLASS_3XTF32> diff --git a/src/trans/gpu/internal/trgtol_mod.F90 b/src/trans/gpu/internal/trgtol_mod.F90 index 92ce4a89d..7e173bc95 100755 --- a/src/trans/gpu/internal/trgtol_mod.F90 +++ b/src/trans/gpu/internal/trgtol_mod.F90 @@ -113,7 +113,9 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, USE PE2SET_MOD, ONLY: PE2SET USE MPL_DATA_MODULE, ONLY: MPL_COMM_OML USE OML_MOD, ONLY: OML_MY_THREAD +#if ECTRANS_HAVE_MPI USE MPI_F08, ONLY: MPI_COMM, MPI_REQUEST, MPI_FLOAT, MPI_DOUBLE +#endif USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE TPM_TRANS, ONLY: NPROMA USE ISO_C_BINDING, ONLY: C_SIZE_T, C_FLOAT, C_DOUBLE, C_INT8_T @@ -176,15 +178,20 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, TYPE(EXT_ACC_ARR_DESC) :: ACC_POINTERS(5) ! at most 5 copyins... INTEGER(KIND=JPIM) :: ACC_POINTERS_CNT = 0 +#if ECTRANS_HAVE_MPI TYPE(MPI_COMM) :: LOCAL_COMM TYPE(MPI_REQUEST) :: IREQUEST(2*NPROC) +#endif #ifdef PARKINDTRANS_SINGLE #define TRGTOL_DTYPE MPI_FLOAT #else #define TRGTOL_DTYPE MPI_DOUBLE #endif + +#if ECTRANS_HAVE_MPI LOCAL_COMM%MPI_VAL = MPL_COMM_OML( OML_MY_THREAD() ) +#endif ! ------------------------------------------------------------------ @@ -580,18 +587,26 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, DO INR=1,IRECV_COUNTS IR=IR+1 IPROC=IRECV_TO_PROC(INR) +#if ECTRANS_HAVE_MPI CALL MPI_IRECV(ZCOMBUFR(ICOMBUFR_OFFSET(INR)+1:ICOMBUFR_OFFSET(INR+1)),IRECVTOT(IPROC), & & TRGTOL_DTYPE,NPRCIDS(IPROC)-1,MTAGLG,LOCAL_COMM,IREQUEST(IR),IERROR) IREQ(IR) = IREQUEST(IR)%MPI_VAL +#else + CALL ABORT_TRANS("Should not be here: MPI is disabled") +#endif ENDDO !....Send loop......................................................... DO INS=1,ISEND_COUNTS IR=IR+1 ISEND=ISEND_TO_PROC(INS) +#if ECTRANS_HAVE_MPI CALL MPI_ISEND(ZCOMBUFS(ICOMBUFS_OFFSET(INS)+1:ICOMBUFS_OFFSET(INS+1)),ISENDTOT(ISEND), & & TRGTOL_DTYPE,NPRCIDS(ISEND)-1,MTAGLG,LOCAL_COMM,IREQUEST(IR),IERROR) IREQ(IR) = IREQUEST(IR)%MPI_VAL +#else + CALL ABORT_TRANS("Should not be here: MPI is disabled") +#endif ENDDO ! Copy local contribution diff --git a/src/trans/gpu/internal/trltog_mod.F90 b/src/trans/gpu/internal/trltog_mod.F90 index 250c48e22..f60aa55cc 100755 --- a/src/trans/gpu/internal/trltog_mod.F90 +++ b/src/trans/gpu/internal/trltog_mod.F90 @@ -114,7 +114,9 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, USE MPL_DATA_MODULE, ONLY: MPL_COMM_OML USE OML_MOD, ONLY: OML_MY_THREAD USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS +#if ECTRANS_HAVE_MPI USE MPI_F08, ONLY: MPI_COMM, MPI_REQUEST, MPI_FLOAT, MPI_DOUBLE +#endif USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE TPM_TRANS, ONLY: LDIVGP, LSCDERS, LUVDER, LVORGP, NPROMA USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION @@ -188,16 +190,19 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, TYPE(EXT_ACC_ARR_DESC) :: ACC_POINTERS(5) ! at most 5 copyins... INTEGER(KIND=JPIM) :: ACC_POINTERS_CNT = 0 +#if ECTRANS_HAVE_MPI TYPE(MPI_COMM) :: LOCAL_COMM TYPE(MPI_REQUEST) :: IREQUEST(NPROC*2) +#endif #ifdef PARKINDTRANS_SINGLE #define TRLTOG_DTYPE MPI_FLOAT #else #define TRLTOG_DTYPE MPI_DOUBLE #endif +#if ECTRANS_HAVE_MPI LOCAL_COMM%MPI_VAL = MPL_COMM_OML( OML_MY_THREAD() ) - +#endif ! ------------------------------------------------------------------ !* 0. Some initializations @@ -707,21 +712,29 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, DO INR=1,IRECV_COUNTS IR=IR+1 IRECV=IRECV_TO_PROC(INR) +#if ECTRANS_HAVE_MPI CALL MPI_IRECV(ZCOMBUFR(ICOMBUFR_OFFSET(INR)+1:ICOMBUFR_OFFSET(INR+1)), & & IRECVTOT(IRECV), & & TRLTOG_DTYPE,NPRCIDS(IRECV)-1, & & MTAGLG, LOCAL_COMM, IREQUEST(IR), & & IERROR ) IREQ(IR) = IREQUEST(IR)%MPI_VAL +#else + CALL ABORT_TRANS("Should not be here: MPI is disabled") +#endif ENDDO !...Send loop......................................................... DO INS=1,ISEND_COUNTS IR=IR+1 ISEND=ISEND_TO_PROC(INS) +#if ECTRANS_HAVE_MPI CALL MPI_ISEND(ZCOMBUFS(ICOMBUFS_OFFSET(INS)+1:ICOMBUFS_OFFSET(INS+1)),ISENDTOT(ISEND), & & TRLTOG_DTYPE, NPRCIDS(ISEND)-1,MTAGLG,LOCAL_COMM,IREQUEST(IR),IERROR) IREQ(IR) = IREQUEST(IR)%MPI_VAL +#else + CALL ABORT_TRANS("Should not be here: MPI is disabled") +#endif ENDDO IF(IR > 0) THEN diff --git a/src/trans/gpu/internal/trltom_mod.F90 b/src/trans/gpu/internal/trltom_mod.F90 index be3a4929d..2c689763b 100755 --- a/src/trans/gpu/internal/trltom_mod.F90 +++ b/src/trans/gpu/internal/trltom_mod.F90 @@ -93,7 +93,9 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS) USE MPL_MODULE, ONLY: MPL_ALLTOALLV, MPL_BARRIER, MPL_ALL_MS_COMM, MPL_MYRANK USE TPM_DISTR, ONLY: D, NPRTRW, NPROC, MYPROC, MYSETW USE TPM_GEN, ONLY: LSYNC_TRANS, NERR +#if ECTRANS_HAVE_MPI USE MPI_F08, ONLY: MPI_COMM, MPI_FLOAT, MPI_DOUBLE, MPI_ALLTOALLV +#endif USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION USE ISO_C_BINDING, ONLY: C_SIZE_T @@ -112,14 +114,19 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS) TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR TYPE(TRLTOM_HANDLE), INTENT(IN) :: HTRLTOM +#if ECTRANS_HAVE_MPI TYPE(MPI_COMM) :: LOCAL_COMM +#endif #ifdef PARKINDTRANS_SINGLE #define TRLTOM_DTYPE MPI_FLOAT #else #define TRLTOM_DTYPE MPI_DOUBLE #endif + +#if ECTRANS_HAVE_MPI LOCAL_COMM%MPI_VAL = MPL_ALL_MS_COMM +#endif IF (LHOOK) CALL DR_HOOK('TRLTOM',0,ZHOOK_HANDLE) @@ -184,9 +191,13 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS) !! this is safe-but-slow fallback for running without GPU-aware MPI !$ACC UPDATE HOST(PFBUF_IN,PFBUF) #endif +#if ECTRANS_HAVE_MPI CALL MPI_ALLTOALLV(PFBUF_IN,ILENS,IOFFS,TRLTOM_DTYPE,& & PFBUF,ILENR,IOFFR, TRLTOM_DTYPE, & & LOCAL_COMM,IERROR) +#else + CALL ABORT_TRANS("Should not be here: MPI is disabled") +#endif #ifdef USE_GPU_AWARE_MPI #ifdef OMPGPU #endif diff --git a/src/trans/gpu/internal/trmtol_mod.F90 b/src/trans/gpu/internal/trmtol_mod.F90 index 4298d790e..98420900e 100755 --- a/src/trans/gpu/internal/trmtol_mod.F90 +++ b/src/trans/gpu/internal/trmtol_mod.F90 @@ -93,7 +93,9 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) USE MPL_MODULE, ONLY: MPL_ALLTOALLV, MPL_BARRIER, MPL_ALL_MS_COMM, MPL_MYRANK USE TPM_DISTR, ONLY: D, NPRTRW, NPROC, MYPROC, MYSETW USE TPM_GEN, ONLY: LSYNC_TRANS, NERR +#if ECTRANS_HAVE_MPI USE MPI_F08, ONLY: MPI_COMM, MPI_FLOAT, MPI_DOUBLE +#endif USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE ISO_C_BINDING, ONLY: C_SIZE_T @@ -112,14 +114,20 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR TYPE(TRMTOL_HANDLE), INTENT(IN) :: HTRMTOL + +#if ECTRANS_HAVE_MPI TYPE(MPI_COMM) :: LOCAL_COMM +#endif #ifdef PARKINDTRANS_SINGLE #define TRMTOL_DTYPE MPI_FLOAT #else #define TRMTOL_DTYPE MPI_DOUBLE #endif + +#if ECTRANS_HAVE_MPI LOCAL_COMM%MPI_VAL = MPL_ALL_MS_COMM +#endif IF (LHOOK) CALL DR_HOOK('TRMTOL',0,ZHOOK_HANDLE) @@ -178,9 +186,15 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) !! this is safe-but-slow fallback for running without GPU-aware MPI !$ACC UPDATE HOST(PFBUF_IN,PFBUF) #endif + +#if ECTRANS_HAVE_MPI CALL MPI_ALLTOALLV(PFBUF_IN,ILENS,IOFFS,TRMTOL_DTYPE,& & PFBUF,ILENR,IOFFR,TRMTOL_DTYPE,& & LOCAL_COMM,IERROR) +#else + CALL ABORT_TRANS("Should not be here: MPI is disabled") +#endif + #ifdef USE_GPU_AWARE_MPI #ifdef OMPGPU #endif From 9f02164c3d2d366f0a6e6290835ecc1d99d7b479 Mon Sep 17 00:00:00 2001 From: lukasm91 Date: Mon, 2 Sep 2024 15:22:37 +0200 Subject: [PATCH 20/86] Clean up hicfft (#143) Removes dead code paths from early implementations. --- .../gpu/algor/hicfft_execute_plan.hip.cpp | 32 ---- src/trans/gpu/external/setup_trans.F90 | 2 - src/trans/gpu/internal/dealloc_resol_mod.F90 | 3 - src/trans/gpu/internal/set_resol_mod.F90 | 2 - src/trans/gpu/internal/sufft_mod.F90 | 3 - src/trans/gpu/internal/tpm_hicfft.F90 | 169 +----------------- 6 files changed, 1 insertion(+), 210 deletions(-) diff --git a/src/trans/gpu/algor/hicfft_execute_plan.hip.cpp b/src/trans/gpu/algor/hicfft_execute_plan.hip.cpp index 4a92e1fd0..345cd73d2 100644 --- a/src/trans/gpu/algor/hicfft_execute_plan.hip.cpp +++ b/src/trans/gpu/algor/hicfft_execute_plan.hip.cpp @@ -38,38 +38,6 @@ __global__ void debugFloat(int varId, int N, HIP_DATA_TYPE_REAL *x) { } } -extern "C" -void -hicfft_execute_plan_(int ISIGNp, int N, DATA_TYPE *data_in_host, DATA_TYPE *data_out_host, long *iplan) -{ - HIP_DATA_TYPE_COMPLEX *data_in = reinterpret_cast(data_in_host); - HIP_DATA_TYPE_COMPLEX *data_out = reinterpret_cast(data_out_host); - hipfftHandle* PLANp = reinterpret_cast(iplan); - hipfftHandle plan = *PLANp; - int ISIGN = ISIGNp; - - /*if (hipDeviceSynchronize() != hipSuccess){ - fprintf(stderr, "GPU runtime error: Failed to synchronize\n"); - return; - }*/ - - if( ISIGN== -1 ){ - fftSafeCall(fftExecDir(plan, (HIP_DATA_TYPE_REAL*)data_in, data_out)); - } - else if( ISIGN== 1){ - fftSafeCall(fftExecInv(plan, data_in, (HIP_DATA_TYPE_REAL*)data_out)); - } - else { - abort(); - } - - if (hipDeviceSynchronize() != hipSuccess){ - fprintf(stderr, "GPU runtime error: Failed to synchronize\n"); - return; - } - -} - namespace { struct Double { using real = double; diff --git a/src/trans/gpu/external/setup_trans.F90 b/src/trans/gpu/external/setup_trans.F90 index b359edc28..b5d2ff732 100755 --- a/src/trans/gpu/external/setup_trans.F90 +++ b/src/trans/gpu/external/setup_trans.F90 @@ -117,7 +117,6 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& USE TPM_FIELDS, ONLY: FIELDS_RESOL, F, F_RW, F_RN, F_RLAPIN, F_RACTHE, ZEPSNM, & & ZAA, ZAS, ZAA0, ZAS0, KMLOC0 USE TPM_FFT, ONLY: T, FFT_RESOL -USE TPM_HICFFT, ONLY: HICT, HICFFT_RESOL USE TPM_FLT, ONLY: FLT_RESOL, S USE TPM_CTL, ONLY: CTL_RESOL, C USE SET_RESOL_MOD, ONLY: SET_RESOL @@ -207,7 +206,6 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& ALLOCATE(GEOM_RESOL(NMAX_RESOL)) ALLOCATE(DISTR_RESOL(NMAX_RESOL)) ALLOCATE(FFT_RESOL(NMAX_RESOL)) - ALLOCATE(HICFFT_RESOL(NMAX_RESOL)) ALLOCATE(FLT_RESOL(NMAX_RESOL)) ALLOCATE(CTL_RESOL(NMAX_RESOL)) GEOM_RESOL(:)%LAM=.FALSE. diff --git a/src/trans/gpu/internal/dealloc_resol_mod.F90 b/src/trans/gpu/internal/dealloc_resol_mod.F90 index 343f46c41..cdbb4eada 100755 --- a/src/trans/gpu/internal/dealloc_resol_mod.F90 +++ b/src/trans/gpu/internal/dealloc_resol_mod.F90 @@ -50,7 +50,6 @@ SUBROUTINE DEALLOC_RESOL(KRESOL) USE TPM_FFT, ONLY: T USE TPM_FLT, ONLY: S USE TPM_CTL, ONLY: C -USE TPM_HICFFT, ONLY: DESTROY_ALL_PLANS_FFT USE SEEFMM_MIX, ONLY: FREE_SEEFMM USE SET_RESOL_MOD, ONLY: SET_RESOL ! @@ -70,8 +69,6 @@ SUBROUTINE DEALLOC_RESOL(KRESOL) CALL SET_RESOL(KRESOL) - CALL DESTROY_ALL_PLANS_FFT() - !TPM_FLT IF( ALLOCATED(S%FA) ) THEN DO JMLOC=1,D%NUMP,NPRTRV ! +++++++++++++++++++++ JMLOC LOOP ++++++++++ diff --git a/src/trans/gpu/internal/set_resol_mod.F90 b/src/trans/gpu/internal/set_resol_mod.F90 index f1ac30cff..f75583499 100755 --- a/src/trans/gpu/internal/set_resol_mod.F90 +++ b/src/trans/gpu/internal/set_resol_mod.F90 @@ -19,7 +19,6 @@ SUBROUTINE SET_RESOL(KRESOL,LDSETUP) USE TPM_GEOMETRY, ONLY: G, GEOM_RESOL USE TPM_FIELDS, ONLY: F, FIELDS_RESOL USE TPM_FFT, ONLY: T, FFT_RESOL -USE TPM_HICFFT, ONLY: HICT, HICFFT_RESOL USE TPM_FLT, ONLY: S, FLT_RESOL USE TPM_CTL, ONLY: C, CTL_RESOL USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS @@ -62,7 +61,6 @@ SUBROUTINE SET_RESOL(KRESOL,LDSETUP) G => GEOM_RESOL(NCUR_RESOL) D => DISTR_RESOL(NCUR_RESOL) T => FFT_RESOL(NCUR_RESOL) - HICT => HICFFT_RESOL(NCUR_RESOL) S => FLT_RESOL(NCUR_RESOL) C => CTL_RESOL(NCUR_RESOL) ENDIF diff --git a/src/trans/gpu/internal/sufft_mod.F90 b/src/trans/gpu/internal/sufft_mod.F90 index b279f7278..91b6fd262 100755 --- a/src/trans/gpu/internal/sufft_mod.F90 +++ b/src/trans/gpu/internal/sufft_mod.F90 @@ -17,7 +17,6 @@ SUBROUTINE SUFFT USE TPM_DIM, ONLY: R USE TPM_GEN, ONLY: NOUT, NPRINTLEV USE TPM_DISTR, ONLY: D - USE TPM_HICFFT, ONLY: INIT_PLANS_FFT ! IMPLICIT NONE @@ -33,8 +32,6 @@ SUBROUTINE SUFFT LLP2 = NPRINTLEV>1 IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUFFT ===' - CALL INIT_PLANS_FFT(R%NDLON) - ENDIF ! ------------------------------------------------------------------ diff --git a/src/trans/gpu/internal/tpm_hicfft.F90 b/src/trans/gpu/internal/tpm_hicfft.F90 index a4278d024..696d2b0b6 100755 --- a/src/trans/gpu/internal/tpm_hicfft.F90 +++ b/src/trans/gpu/internal/tpm_hicfft.F90 @@ -28,26 +28,7 @@ MODULE TPM_HICFFT SAVE PRIVATE - PUBLIC CREATE_PLAN_FFT, DESTROY_PLAN_FFT, DESTROY_ALL_PLANS_FFT, INIT_PLANS_FFT, EXECUTE_PLAN_FFT, & - & HICFFT_RESOL, HICT, EXECUTE_DIR_FFT, EXECUTE_INV_FFT - - TYPE HICFFT_TYPE - INTEGER(KIND=C_INT),POINTER :: N_PLANS(:) - TYPE(HICFFT_PLAN),POINTER :: HICFFT_PLANS(:) - INTEGER(KIND=C_INT) :: N_MAX=0 - END TYPE HICFFT_TYPE - - - TYPE HICFFT_PLAN - TYPE(C_PTR) :: NPLAN - INTEGER(KIND=C_INT) :: NLOT - INTEGER(KIND=C_INT) :: NSTRIDE - INTEGER(KIND=C_INT) :: NTYPE - TYPE(HICFFT_PLAN),POINTER :: NEXT_PLAN => NULL() - END TYPE HICFFT_PLAN - - TYPE(HICFFT_TYPE),ALLOCATABLE,TARGET :: HICFFT_RESOL(:) - TYPE(HICFFT_TYPE),POINTER :: HICT + PUBLIC EXECUTE_DIR_FFT, EXECUTE_INV_FFT INTERFACE EXECUTE_DIR_FFT MODULE PROCEDURE EXECUTE_DIR_FFT_FLOAT,EXECUTE_DIR_FFT_DOUBLE @@ -64,154 +45,6 @@ MODULE TPM_HICFFT ! ------------------------------------------------------------------ - SUBROUTINE INIT_PLANS_FFT(KDLON) - INTEGER(KIND=C_INT),INTENT(IN) :: KDLON - - HICT%N_MAX=KDLON - ALLOCATE(HICT%HICFFT_PLANS(HICT%N_MAX)) - ALLOCATE(HICT%N_PLANS(HICT%N_MAX)) - HICT%N_PLANS(:)=0 - RETURN - END SUBROUTINE INIT_PLANS_FFT - - - SUBROUTINE CREATE_PLAN_FFT(KPLAN,KTYPE,KN,KLOT,KSTRIDE) - TYPE(C_PTR),INTENT(OUT) :: KPLAN - INTEGER(KIND=C_INT),INTENT(IN) :: KTYPE,KN,KLOT,KSTRIDE - - TYPE(C_PTR) :: IPLAN - INTEGER(KIND=C_INT) :: IRANK, ISTRIDE - INTEGER(KIND=C_INT) :: JL, JN - INTEGER(KIND=C_INT) :: IRDIST,ICDIST,IN(1),IEMBED(1) - LOGICAL :: LLFOUND - TYPE(HICFFT_PLAN),POINTER :: CURR_HICFFT_PLAN,START_HICFFT_PLAN - - INTERFACE - SUBROUTINE HICFFT_CREATE_PLAN(KPLAN,KTYPE,KN,KLOT,KSTRIDE) BIND(C,NAME="hicfft_create_plan_") - USE, INTRINSIC :: ISO_C_BINDING - TYPE(C_PTR), INTENT(OUT) :: KPLAN - INTEGER(C_INT), INTENT(IN) :: KTYPE,KN,KLOT,KSTRIDE - END SUBROUTINE HICFFT_CREATE_PLAN - END INTERFACE - - IF( KN > HICT%N_MAX )THEN - stop 'CREATE_PLAN_FFT: KN > N_MAX THAT WAS INITIALISED IN INIT_PLANS_FFT' - ENDIF - - IRANK=1 - ISTRIDE=1 - IN(1)=KN - IEMBED(1)=IN(1) - ICDIST=KN/2+1 - IRDIST=ICDIST*2 - - !!$OMP CRITICAL - LLFOUND=.FALSE. - CURR_HICFFT_PLAN=>HICT%HICFFT_PLANS(KN) - ! search for plan in existing plans - DO JL=1,HICT%N_PLANS(KN) - IF( KLOT == CURR_HICFFT_PLAN%NLOT .AND. KTYPE == CURR_HICFFT_PLAN%NTYPE & - & .AND. KSTRIDE == CURR_HICFFT_PLAN%NSTRIDE)THEN - LLFOUND=.TRUE. - IPLAN=CURR_HICFFT_PLAN%NPLAN - EXIT - ELSEIF( JL /= HICT%N_PLANS(KN) )THEN - CURR_HICFFT_PLAN=>CURR_HICFFT_PLAN%NEXT_PLAN - ENDIF - ENDDO - IF( .NOT.LLFOUND )THEN - CALL HICFFT_CREATE_PLAN(IPLAN,KTYPE,KN,KLOT,KSTRIDE) - KPLAN=IPLAN - HICT%N_PLANS(KN)=HICT%N_PLANS(KN)+1 - IF( HICT%N_PLANS(KN) /= 1 )THEN - ALLOCATE(CURR_HICFFT_PLAN%NEXT_PLAN) - CURR_HICFFT_PLAN=>CURR_HICFFT_PLAN%NEXT_PLAN - ENDIF - CURR_HICFFT_PLAN%NPLAN=IPLAN - CURR_HICFFT_PLAN%NLOT=KLOT - CURR_HICFFT_PLAN%NSTRIDE=KSTRIDE - CURR_HICFFT_PLAN%NTYPE=KTYPE - CURR_HICFFT_PLAN%NEXT_PLAN=>NULL() - ELSE - KPLAN=IPLAN - ENDIF - !!$OMP END CRITICAL - END SUBROUTINE CREATE_PLAN_FFT - - - SUBROUTINE DESTROY_PLAN_FFT(KPLAN) - TYPE(C_PTR),INTENT(IN) :: KPLAN - INTERFACE - SUBROUTINE HICFFT_DESTROY_PLAN(KPLAN) BIND(C, NAME="hicfft_destroy_plan_") - USE, INTRINSIC :: ISO_C_BINDING - TYPE(C_PTR), VALUE, INTENT(IN) :: KPLAN - END SUBROUTINE HICFFT_DESTROY_PLAN - END INTERFACE - - CALL HICFFT_DESTROY_PLAN(KPLAN) - END SUBROUTINE DESTROY_PLAN_FFT - - - SUBROUTINE DESTROY_ALL_PLANS_FFT - INTEGER(KIND=C_INT) :: JL, JN - TYPE(HICFFT_PLAN),POINTER :: CURR_HICFFT_PLAN - - IF( .NOT. ASSOCIATED(HICT) ) THEN - RETURN - ENDIF - - IF ( .NOT. ASSOCIATED(HICT%HICFFT_PLANS) .OR. .NOT. ASSOCIATED(HICT%N_PLANS) ) THEN - RETURN - ENDIF - - DO JN = 1, HICT%N_MAX - CURR_HICFFT_PLAN=>HICT%HICFFT_PLANS(JN) - DO JL = 1, HICT%N_PLANS(JN) - IF( ASSOCIATED(CURR_HICFFT_PLAN) ) THEN - CALL DESTROY_PLAN_FFT(CURR_HICFFT_PLAN%NPLAN) - CURR_HICFFT_PLAN=>CURR_HICFFT_PLAN%NEXT_PLAN - ENDIF - ENDDO - ENDDO - - DEALLOCATE(HICT%HICFFT_PLANS) - DEALLOCATE(HICT%N_PLANS) - END SUBROUTINE DESTROY_ALL_PLANS_FFT - - SUBROUTINE EXECUTE_PLAN_FFT(KN,N,X_IN,X_OUT,PLAN_PTR) - TYPE(C_PTR) :: PLAN_PTR - INTEGER(KIND=C_INT) :: KN - INTEGER(KIND=C_INT) :: N - REAL(KIND=JPRBT), TARGET :: X_IN - REAL(KIND=JPRBT), TARGET :: X_OUT - - INTERFACE - SUBROUTINE HICFFT_EXECUTE_PLAN (KN, N, X_IN_PTR, X_OUT_PTR, PLAN_PTR) & - & BIND(C,NAME="hicfft_execute_plan_") - USE, INTRINSIC :: ISO_C_BINDING - TYPE(C_PTR), VALUE :: PLAN_PTR - INTEGER(KIND=C_INT), VALUE :: KN - INTEGER(KIND=C_INT), VALUE :: N - TYPE(C_PTR), VALUE :: X_IN_PTR, X_OUT_PTR - END SUBROUTINE HICFFT_EXECUTE_PLAN - END INTERFACE - -#ifdef OMPGPU - !$OMP TARGET DATA USE_DEVICE_PTR(X_IN,X_OUT) -#endif -#ifdef ACCGPU - !$ACC HOST_DATA USE_DEVICE(X_IN,X_OUT) -#endif - CALL HICFFT_EXECUTE_PLAN(KN,N,C_LOC(X_IN),C_LOC(X_OUT),PLAN_PTR) -#ifdef ACCGPU - !$ACC END HOST_DATA -#endif -#ifdef OMPGPU - !$OMP END TARGET DATA -#endif - - END SUBROUTINE EXECUTE_PLAN_FFT - SUBROUTINE EXECUTE_DIR_FFT_FLOAT(PREEL_REAL,PREEL_COMPLEX,KFIELD,LOENS,OFFSETS,ALLOC) USE PARKIND_ECTRANS ,ONLY : JPIM From 51cd73c0e959f3ead89a2033fa6f3553609e885a Mon Sep 17 00:00:00 2001 From: lukasm91 Date: Mon, 2 Sep 2024 15:25:20 +0200 Subject: [PATCH 21/86] Remove flag -gpu=pinned (#142) --- src/trans/gpu/CMakeLists.txt | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/trans/gpu/CMakeLists.txt b/src/trans/gpu/CMakeLists.txt index 85fa2fcd5..c417da5eb 100644 --- a/src/trans/gpu/CMakeLists.txt +++ b/src/trans/gpu/CMakeLists.txt @@ -9,19 +9,6 @@ # Preprocess module file containing version information configure_file( internal/ectrans_version_mod.F90.in internal/ectrans_version_mod.F90 ) -## Apply workarounds for some known compilers - -if(CMAKE_Fortran_COMPILER_ID MATCHES "NVHPC") - - # Compile setup_trans with pinned memory to improve data movement performance. - ectrans_add_compile_options( - SOURCES external/setup_trans.F90 - #FLAGS "-gpu=pinned,deepcopy,fastmath,nordc") - FLAGS "-gpu=pinned,fastmath") - # TODO: check if it is sufficient to only set "-gpu=pinned" which appends rather than overwrites - -endif() - ## Assemble sources ecbuild_list_add_pattern( LIST trans_src From e268466271a61b5933a1a66826a3b4c85c79205e Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Mon, 2 Sep 2024 14:37:29 +0000 Subject: [PATCH 22/86] Amend "9f02164 - Clean up hicfft (#143)" with removal of more dead code --- src/trans/gpu/algor/hicfft.cuda.cu | 1 + ...ft_execute_plan.hip.cpp => hicfft.hip.cpp} | 52 +++++------- .../gpu/algor/hicfft_create_plan.cuda.cu | 1 - .../gpu/algor/hicfft_create_plan.hip.cpp | 79 ------------------- .../gpu/algor/hicfft_destroy_plan.cuda.cu | 1 - .../gpu/algor/hicfft_destroy_plan.hip.cpp | 23 ------ .../gpu/algor/hicfft_execute_plan.cuda.cu | 1 - 7 files changed, 20 insertions(+), 138 deletions(-) create mode 120000 src/trans/gpu/algor/hicfft.cuda.cu rename src/trans/gpu/algor/{hicfft_execute_plan.hip.cpp => hicfft.hip.cpp} (87%) delete mode 120000 src/trans/gpu/algor/hicfft_create_plan.cuda.cu delete mode 100644 src/trans/gpu/algor/hicfft_create_plan.hip.cpp delete mode 120000 src/trans/gpu/algor/hicfft_destroy_plan.cuda.cu delete mode 100644 src/trans/gpu/algor/hicfft_destroy_plan.hip.cpp delete mode 120000 src/trans/gpu/algor/hicfft_execute_plan.cuda.cu diff --git a/src/trans/gpu/algor/hicfft.cuda.cu b/src/trans/gpu/algor/hicfft.cuda.cu new file mode 120000 index 000000000..54e8f2322 --- /dev/null +++ b/src/trans/gpu/algor/hicfft.cuda.cu @@ -0,0 +1 @@ +hicfft.hip.cpp \ No newline at end of file diff --git a/src/trans/gpu/algor/hicfft_execute_plan.hip.cpp b/src/trans/gpu/algor/hicfft.hip.cpp similarity index 87% rename from src/trans/gpu/algor/hicfft_execute_plan.hip.cpp rename to src/trans/gpu/algor/hicfft.hip.cpp index 345cd73d2..b7a8581a8 100644 --- a/src/trans/gpu/algor/hicfft_execute_plan.hip.cpp +++ b/src/trans/gpu/algor/hicfft.hip.cpp @@ -4,39 +4,25 @@ #define fftSafeCall(err) __fftSafeCall(err, __FILE__, __LINE__) -#ifdef TRANS_SINGLE -typedef float DATA_TYPE; -typedef hipfftComplex HIP_DATA_TYPE_COMPLEX; -typedef hipfftReal HIP_DATA_TYPE_REAL; -#define fftExecDir hipfftExecR2C -#define fftExecInv hipfftExecC2R -#else -typedef double DATA_TYPE; -typedef hipfftDoubleComplex HIP_DATA_TYPE_COMPLEX; -typedef hipfftDoubleReal HIP_DATA_TYPE_REAL; -#define fftExecDir hipfftExecD2Z -#define fftExecInv hipfftExecZ2D -#endif - -__global__ void debug(int varId, int N, HIP_DATA_TYPE_COMPLEX *x) { - for (int i = 0; i < N; i++) - { - HIP_DATA_TYPE_COMPLEX a = x[i]; - double b = (double)a.x; - double c = (double)a.y; - if (varId == 0) printf("GPU: input[%d]=(%2.4f,%2.4f)\n",i+1,b,c); - if (varId == 1) printf("GPU: output[%d]=(%2.4f,%2.4f)\n",i+1,b,c); - } -} - -__global__ void debugFloat(int varId, int N, HIP_DATA_TYPE_REAL *x) { - for (int i = 0; i < N; i++) - { - double a = (double)x[i]; - if (varId == 0) printf("GPU: input[%d]=%2.4f\n",i+1,a); - if (varId == 1) printf("GPU: output[%d]=%2.4f\n",i+1,a); - } -} +// __global__ void debug(int varId, int N, HIP_DATA_TYPE_COMPLEX *x) { +// for (int i = 0; i < N; i++) +// { +// HIP_DATA_TYPE_COMPLEX a = x[i]; +// double b = (double)a.x; +// double c = (double)a.y; +// if (varId == 0) printf("GPU: input[%d]=(%2.4f,%2.4f)\n",i+1,b,c); +// if (varId == 1) printf("GPU: output[%d]=(%2.4f,%2.4f)\n",i+1,b,c); +// } +// } + +// __global__ void debugFloat(int varId, int N, HIP_DATA_TYPE_REAL *x) { +// for (int i = 0; i < N; i++) +// { +// double a = (double)x[i]; +// if (varId == 0) printf("GPU: input[%d]=%2.4f\n",i+1,a); +// if (varId == 1) printf("GPU: output[%d]=%2.4f\n",i+1,a); +// } +// } namespace { struct Double { diff --git a/src/trans/gpu/algor/hicfft_create_plan.cuda.cu b/src/trans/gpu/algor/hicfft_create_plan.cuda.cu deleted file mode 120000 index fdc9ab897..000000000 --- a/src/trans/gpu/algor/hicfft_create_plan.cuda.cu +++ /dev/null @@ -1 +0,0 @@ -hicfft_create_plan.hip.cpp \ No newline at end of file diff --git a/src/trans/gpu/algor/hicfft_create_plan.hip.cpp b/src/trans/gpu/algor/hicfft_create_plan.hip.cpp deleted file mode 100644 index a1aa074ce..000000000 --- a/src/trans/gpu/algor/hicfft_create_plan.hip.cpp +++ /dev/null @@ -1,79 +0,0 @@ -#include "hicfft.h" - -#define fftSafeCall(err) __fftSafeCall(err, __FILE__, __LINE__) - -// static int allocatedWorkspace=0; -// static void* planWorkspace; -// static int planWorkspaceSize=100*1024*1024; //100MB -void *planWorkspace = nullptr; -static int currentWorkspaceSize = 0; - -extern "C" -void -hicfft_create_plan_(hipfftHandle * *plan, int *ISIGNp, int *Np, int *LOTp, int *stridep, int *plan_size) -{ - int ISIGN = *ISIGNp; - int N = *Np; - int LOT = *LOTp; - int stride = *stridep; - - *plan = new hipfftHandle; - - if (hipDeviceSynchronize() != hipSuccess){ - fprintf(stderr, "GPU runtime error: Failed to synchronize\n"); - return; - } - - int embed[1]; - int dist; - - #ifdef TRANS_SINGLE - hipfftType fft_dir = HIPFFT_R2C; - hipfftType fft_inv = HIPFFT_C2R; - #else - hipfftType fft_dir = HIPFFT_D2Z; - hipfftType fft_inv = HIPFFT_Z2D; - #endif - - embed[0] = 1; - dist = 1; - - fftSafeCall(hipfftCreate(*plan)); - - // Disable auto allocation - fftSafeCall(hipfftSetAutoAllocation(**plan, false)); - - if( ISIGN== -1 ){ - fftSafeCall(hipfftPlanMany(*plan, 1, &N, - embed, stride, dist, - embed, stride, dist, - fft_dir, LOT)); - } else if( ISIGN== 1){ - fftSafeCall(hipfftPlanMany(*plan, 1, &N, - embed, stride, dist, - embed, stride, dist, - fft_inv, LOT)); - } else { - abort(); - } - - // get size used by this plan - size_t thisWorkplanSize; - hipfftGetSize(**plan, &thisWorkplanSize); - - // check if this the work space is sufficiently large - if (thisWorkplanSize > currentWorkspaceSize) { - hipDeviceSynchronize(); - hipFree(planWorkspace); - hipMalloc(&planWorkspace, thisWorkplanSize); - currentWorkspaceSize = thisWorkplanSize; - } - - if (hipDeviceSynchronize() != hipSuccess){ - fprintf(stderr, "GPU runtime error: Failed to synchronize\n"); - return; - } - - return; - -} diff --git a/src/trans/gpu/algor/hicfft_destroy_plan.cuda.cu b/src/trans/gpu/algor/hicfft_destroy_plan.cuda.cu deleted file mode 120000 index 398313b5c..000000000 --- a/src/trans/gpu/algor/hicfft_destroy_plan.cuda.cu +++ /dev/null @@ -1 +0,0 @@ -hicfft_destroy_plan.hip.cpp \ No newline at end of file diff --git a/src/trans/gpu/algor/hicfft_destroy_plan.hip.cpp b/src/trans/gpu/algor/hicfft_destroy_plan.hip.cpp deleted file mode 100644 index 1e5e1fb1b..000000000 --- a/src/trans/gpu/algor/hicfft_destroy_plan.hip.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include "hicfft.h" - -#define fftSafeCall(err) __fftSafeCall(err, __FILE__, __LINE__) - -extern "C" -void -hicfft_destroy_plan_(hipfftHandle *PLANp) -{ - hipfftHandle plan = *PLANp; - - if (hipDeviceSynchronize() != hipSuccess){ - fprintf(stderr, "GPU runtime error: Failed to synchronize\n"); - return; - } - - fftSafeCall(hipfftDestroy(plan)); - - if (hipDeviceSynchronize() != hipSuccess){ - fprintf(stderr, "GPU runtime error: Failed to synchronize\n"); - return; - } - -} diff --git a/src/trans/gpu/algor/hicfft_execute_plan.cuda.cu b/src/trans/gpu/algor/hicfft_execute_plan.cuda.cu deleted file mode 120000 index 3dd81ece3..000000000 --- a/src/trans/gpu/algor/hicfft_execute_plan.cuda.cu +++ /dev/null @@ -1 +0,0 @@ -hicfft_execute_plan.hip.cpp \ No newline at end of file From 09ebed675783fa3a5bb2ce1bf42ebd671356cb89 Mon Sep 17 00:00:00 2001 From: Lukas Mosimann Date: Tue, 3 Sep 2024 01:03:40 -0700 Subject: [PATCH 23/86] fix barrier usage --- src/trans/gpu/internal/dir_trans_ctl_mod.F90 | 1 - src/trans/gpu/internal/ftdir_mod.F90 | 6 +++--- src/trans/gpu/internal/ftinv_mod.F90 | 6 +++--- src/trans/gpu/internal/ledir_mod.F90 | 10 +++++----- src/trans/gpu/internal/leinv_mod.F90 | 10 +++++----- src/trans/gpu/internal/ltdir_mod.F90 | 6 +++--- src/trans/gpu/internal/ltinv_mod.F90 | 6 +++--- src/trans/gpu/internal/trltom_mod.F90 | 4 ++-- src/trans/gpu/internal/trmtol_mod.F90 | 4 ++-- 9 files changed, 26 insertions(+), 27 deletions(-) diff --git a/src/trans/gpu/internal/dir_trans_ctl_mod.F90 b/src/trans/gpu/internal/dir_trans_ctl_mod.F90 index 65c9e4e85..ea2b4d873 100755 --- a/src/trans/gpu/internal/dir_trans_ctl_mod.F90 +++ b/src/trans/gpu/internal/dir_trans_ctl_mod.F90 @@ -76,7 +76,6 @@ SUBROUTINE DIR_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_UV,KF_SCALARS,& USE PARKIND_ECTRANS, ONLY: JPRBT, JPRD, JPRB, JPIM USE TPM_GEN, ONLY: NPROMATR, NOUT USE TPM_DISTR, ONLY: NPROC - USE MPL_MODULE, ONLY: MPL_BARRIER USE TPM_TRANS, ONLY: GROWING_ALLOCATION USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, MAKE_BUFFERED_ALLOCATOR, & & INSTANTIATE_ALLOCATOR diff --git a/src/trans/gpu/internal/ftdir_mod.F90 b/src/trans/gpu/internal/ftdir_mod.F90 index da5910c77..65e50138a 100755 --- a/src/trans/gpu/internal/ftdir_mod.F90 +++ b/src/trans/gpu/internal/ftdir_mod.F90 @@ -64,7 +64,7 @@ SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD) USE TPM_GEOMETRY, ONLY: G_NMEN, G_NLOEN USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR USE TPM_HICFFT, ONLY: EXECUTE_DIR_FFT - USE MPL_MODULE, ONLY: MPL_BARRIER + USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX IMPLICIT NONE @@ -89,7 +89,7 @@ SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD) IF (LSYNC_TRANS) THEN CALL GSTATS(430,0) - CALL MPL_BARRIER(CDSTRING='') + CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='') CALL GSTATS(430,1) ENDIF CALL GSTATS(413,0) @@ -99,7 +99,7 @@ SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD) IF (LSYNC_TRANS) THEN CALL GSTATS(433,0) - CALL MPL_BARRIER(CDSTRING='') + CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='') CALL GSTATS(433,1) ENDIF CALL GSTATS(413,1) diff --git a/src/trans/gpu/internal/ftinv_mod.F90 b/src/trans/gpu/internal/ftinv_mod.F90 index e0e3804e3..e939f42fc 100755 --- a/src/trans/gpu/internal/ftinv_mod.F90 +++ b/src/trans/gpu/internal/ftinv_mod.F90 @@ -64,7 +64,7 @@ SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD) USE TPM_DISTR, ONLY: MYSETW, D_NPTRLS, D_NDGL_FS, D_NSTAGTF USE TPM_GEOMETRY, ONLY: G_NLOEN USE TPM_HICFFT, ONLY: EXECUTE_INV_FFT - USE MPL_MODULE, ONLY: MPL_BARRIER + USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX IMPLICIT NONE @@ -87,7 +87,7 @@ SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD) IF (LSYNC_TRANS) THEN CALL GSTATS(440,0) - CALL MPL_BARRIER(CDSTRING='') + CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='') CALL GSTATS(440,1) ENDIF CALL GSTATS(423,0) @@ -97,7 +97,7 @@ SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD) IF (LSYNC_TRANS) THEN CALL GSTATS(443,0) - CALL MPL_BARRIER(CDSTRING='') + CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='') CALL GSTATS(443,1) ENDIF CALL GSTATS(423,1) diff --git a/src/trans/gpu/internal/ledir_mod.F90 b/src/trans/gpu/internal/ledir_mod.F90 index 98c932562..4399304e2 100755 --- a/src/trans/gpu/internal/ledir_mod.F90 +++ b/src/trans/gpu/internal/ledir_mod.F90 @@ -102,7 +102,7 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) USE TPM_DISTR, ONLY: D_NUMP, D_MYMS, D_OFFSETS_GEMM1, D_OFFSETS_GEMM2 USE HICBLAS_MOD, ONLY: HIP_GEMM_BATCHED, HIP_DGEMM_BATCHED_OVERLOAD, & & HIP_DGEMM_GROUPED_OVERLOAD, HIP_SGEMM_GROUPED_OVERLOAD - USE MPL_MODULE, ONLY: MPL_BARRIER + USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT @@ -164,7 +164,7 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) !$ACC WAIT(1) #endif CALL GSTATS(430,0) - CALL MPL_BARRIER(CDSTRING='') + CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='') CALL GSTATS(430,1) ENDIF CALL GSTATS(414,0) @@ -235,7 +235,7 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) !$ACC WAIT(1) #endif CALL GSTATS(434,0) - CALL MPL_BARRIER(CDSTRING='') + CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='') CALL GSTATS(434,1) ENDIF CALL GSTATS(414,1) @@ -278,7 +278,7 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) !$ACC WAIT(1) #endif CALL GSTATS(430,0) - CALL MPL_BARRIER(CDSTRING='') + CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='') CALL GSTATS(430,1) ENDIF CALL GSTATS(414,0) @@ -350,7 +350,7 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) !$ACC WAIT(1) #endif CALL GSTATS(434,0) - CALL MPL_BARRIER(CDSTRING='') + CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='') CALL GSTATS(434,1) ENDIF CALL GSTATS(414,1) diff --git a/src/trans/gpu/internal/leinv_mod.F90 b/src/trans/gpu/internal/leinv_mod.F90 index ab8d133ac..3e3397bde 100755 --- a/src/trans/gpu/internal/leinv_mod.F90 +++ b/src/trans/gpu/internal/leinv_mod.F90 @@ -102,7 +102,7 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) USE HICBLAS_MOD, ONLY: HIP_GEMM_BATCHED, HIP_DGEMM_BATCHED_OVERLOAD, & & HIP_DGEMM_GROUPED_OVERLOAD, HIP_SGEMM_GROUPED_OVERLOAD USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT - USE MPL_MODULE, ONLY: MPL_BARRIER + USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX #ifdef TRANS_SINGLE #define HIP_GEMM HIP_SGEMM_GROUPED_OVERLOAD @@ -219,7 +219,7 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) !$ACC WAIT(1) #endif CALL GSTATS(440,0) - CALL MPL_BARRIER(CDSTRING='') + CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='') CALL GSTATS(440,1) ENDIF CALL GSTATS(424,0) @@ -291,7 +291,7 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) !$ACC WAIT(1) #endif CALL GSTATS(444,0) - CALL MPL_BARRIER(CDSTRING='') + CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='') CALL GSTATS(444,1) ENDIF CALL GSTATS(424,1) @@ -356,7 +356,7 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) !$ACC WAIT(1) #endif CALL GSTATS(440,0) - CALL MPL_BARRIER(CDSTRING='') + CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='') CALL GSTATS(440,1) ENDIF CALL GSTATS(424,0) @@ -424,7 +424,7 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) !$ACC WAIT(1) #endif CALL GSTATS(444,0) - CALL MPL_BARRIER(CDSTRING='') + CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='') CALL GSTATS(444,1) ENDIF CALL GSTATS(424,1) diff --git a/src/trans/gpu/internal/ltdir_mod.F90 b/src/trans/gpu/internal/ltdir_mod.F90 index 918c887a9..8a3976165 100755 --- a/src/trans/gpu/internal/ltdir_mod.F90 +++ b/src/trans/gpu/internal/ltdir_mod.F90 @@ -72,7 +72,7 @@ SUBROUTINE LTDIR(ALLOCATOR,HLTDIR,ZINPS,ZINPA,ZINPS0,ZINPA0,KF_FS,KF_UV,KF_SCALA USE UVTVD_MOD, ONLY: UVTVD USE UPDSP_MOD, ONLY: UPDSP USE UPDSPB_MOD, ONLY: UPDSPB - USE MPL_MODULE, ONLY: MPL_BARRIER + USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM USE TPM_GEN, ONLY: LSYNC_TRANS USE TPM_TRANS, ONLY: NF_SC2, NF_SC3A, NF_SC3B USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX @@ -263,7 +263,7 @@ SUBROUTINE LTDIR(ALLOCATOR,HLTDIR,ZINPS,ZINPA,ZINPS0,ZINPA0,KF_FS,KF_UV,KF_SCALA IF (LSYNC_TRANS) THEN CALL GSTATS(430,0) - CALL MPL_BARRIER(CDSTRING='') + CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='') CALL GSTATS(430,1) ENDIF CALL GSTATS(412,0) @@ -278,7 +278,7 @@ SUBROUTINE LTDIR(ALLOCATOR,HLTDIR,ZINPS,ZINPA,ZINPS0,ZINPA0,KF_FS,KF_UV,KF_SCALA #endif IF (LSYNC_TRANS) THEN CALL GSTATS(432,0) - CALL MPL_BARRIER(CDSTRING='') + CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='') CALL GSTATS(432,1) ENDIF CALL GSTATS(412,1) diff --git a/src/trans/gpu/internal/ltinv_mod.F90 b/src/trans/gpu/internal/ltinv_mod.F90 index 54c1a19b1..4fbfcf437 100755 --- a/src/trans/gpu/internal/ltinv_mod.F90 +++ b/src/trans/gpu/internal/ltinv_mod.F90 @@ -114,7 +114,7 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,& USE LEINV_MOD, ONLY: LEINV_STRIDES, LEINV USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS USE TPM_FIELDS, ONLY: F,ZEPSNM - USE MPL_MODULE, ONLY: MPL_BARRIER + USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM USE TPM_GEN, ONLY: LSYNC_TRANS USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE ISO_C_BINDING, ONLY: C_SIZE_T, C_LOC @@ -313,7 +313,7 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,& IF (LSYNC_TRANS) THEN CALL GSTATS(440,0) - CALL MPL_BARRIER(CDSTRING='') + CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='') CALL GSTATS(440,1) ENDIF CALL GSTATS(422,0) @@ -328,7 +328,7 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,& #endif IF (LSYNC_TRANS) THEN CALL GSTATS(442,0) - CALL MPL_BARRIER(CDSTRING='') + CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='') CALL GSTATS(442,1) ENDIF CALL GSTATS(422,1) diff --git a/src/trans/gpu/internal/trltom_mod.F90 b/src/trans/gpu/internal/trltom_mod.F90 index 2c689763b..ab3556138 100755 --- a/src/trans/gpu/internal/trltom_mod.F90 +++ b/src/trans/gpu/internal/trltom_mod.F90 @@ -177,7 +177,7 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS) IF (LSYNC_TRANS) THEN CALL GSTATS(430,0) - CALL MPL_BARRIER(CDSTRING='') + CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='') CALL GSTATS(430,1) ENDIF CALL GSTATS(411,0) @@ -210,7 +210,7 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS) #endif IF (LSYNC_TRANS) THEN CALL GSTATS(431,0) - CALL MPL_BARRIER(CDSTRING='') + CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='') CALL GSTATS(431,1) ENDIF CALL GSTATS(411,1) diff --git a/src/trans/gpu/internal/trmtol_mod.F90 b/src/trans/gpu/internal/trmtol_mod.F90 index 98420900e..4fc67d556 100755 --- a/src/trans/gpu/internal/trmtol_mod.F90 +++ b/src/trans/gpu/internal/trmtol_mod.F90 @@ -172,7 +172,7 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) IF (LSYNC_TRANS) THEN CALL GSTATS(440,0) - CALL MPL_BARRIER(CDSTRING='') + CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='') CALL GSTATS(440,1) ENDIF CALL GSTATS(421,0) @@ -207,7 +207,7 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) #endif IF (LSYNC_TRANS) THEN CALL GSTATS(441,0) - CALL MPL_BARRIER(CDSTRING='') + CALL MPL_BARRIER(MPL_ALL_MS_COMM,CDSTRING='') CALL GSTATS(441,1) ENDIF CALL GSTATS(421,1) From bc375f08d3e39c13b7a74858a673cba67ef58c71 Mon Sep 17 00:00:00 2001 From: Lukas Mosimann Date: Tue, 3 Sep 2024 01:04:06 -0700 Subject: [PATCH 24/86] fix various corner cases --- src/trans/gpu/internal/dir_trans_ctl_mod.F90 | 12 +++++++----- src/trans/gpu/internal/inv_trans_ctl_mod.F90 | 14 ++++++++------ src/trans/gpu/internal/trgtol_mod.F90 | 10 ++++++---- src/trans/gpu/internal/trltog_mod.F90 | 12 ++++++------ 4 files changed, 27 insertions(+), 21 deletions(-) diff --git a/src/trans/gpu/internal/dir_trans_ctl_mod.F90 b/src/trans/gpu/internal/dir_trans_ctl_mod.F90 index ea2b4d873..af873d76d 100755 --- a/src/trans/gpu/internal/dir_trans_ctl_mod.F90 +++ b/src/trans/gpu/internal/dir_trans_ctl_mod.F90 @@ -145,11 +145,13 @@ SUBROUTINE DIR_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_UV,KF_SCALARS,& ! Prepare everything ALLOCATOR = MAKE_BUFFERED_ALLOCATOR() HTRGTOL = PREPARE_TRGTOL(ALLOCATOR,KF_GP,KF_FS) - HFTDIR = PREPARE_FTDIR() - HTRLTOM_PACK = PREPARE_TRLTOM_PACK(ALLOCATOR, KF_FS) - HTRLTOM = PREPARE_TRLTOM(ALLOCATOR, KF_FS) - HTRLTOM_UNPACK = PREPARE_TRLTOM_UNPACK(ALLOCATOR, KF_FS) - HLTDIR = PREPARE_LTDIR(ALLOCATOR, KF_FS, KF_UV) + IF (KF_FS > 0) THEN + HFTDIR = PREPARE_FTDIR() + HTRLTOM_PACK = PREPARE_TRLTOM_PACK(ALLOCATOR, KF_FS) + HTRLTOM = PREPARE_TRLTOM(ALLOCATOR, KF_FS) + HTRLTOM_UNPACK = PREPARE_TRLTOM_UNPACK(ALLOCATOR, KF_FS) + HLTDIR = PREPARE_LTDIR(ALLOCATOR, KF_FS, KF_UV) + ENDIF CALL INSTANTIATE_ALLOCATOR(ALLOCATOR, GROWING_ALLOCATION) diff --git a/src/trans/gpu/internal/inv_trans_ctl_mod.F90 b/src/trans/gpu/internal/inv_trans_ctl_mod.F90 index 3029b3f68..c928feb70 100644 --- a/src/trans/gpu/internal/inv_trans_ctl_mod.F90 +++ b/src/trans/gpu/internal/inv_trans_ctl_mod.F90 @@ -196,12 +196,14 @@ SUBROUTINE INV_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_OUT_LT,& IF (IF_FOURIER /= KF_FS) CALL ABORT_TRANS('Size mismatch: Wrong computation KF_FS') ALLOCATOR = MAKE_BUFFERED_ALLOCATOR() - HLTINV = PREPARE_LTINV(ALLOCATOR,KF_UV,KF_SCALARS,LVORGP,LDIVGP,LSCDERS) - HTRMTOL_PACK = PREPARE_TRMTOL_PACK(ALLOCATOR,IF_LEG) - HTRMTOL = PREPARE_TRMTOL(ALLOCATOR,IF_LEG) - HTRMTOL_UNPACK = PREPARE_TRMTOL_UNPACK(ALLOCATOR,IF_FOURIER) - HFSC = PREPARE_FSC(ALLOCATOR) - HFTINV = PREPARE_FTINV(ALLOCATOR) + IF (KF_FS > 0) THEN + HLTINV = PREPARE_LTINV(ALLOCATOR,KF_UV,KF_SCALARS,LVORGP,LDIVGP,LSCDERS) + HTRMTOL_PACK = PREPARE_TRMTOL_PACK(ALLOCATOR,IF_LEG) + HTRMTOL = PREPARE_TRMTOL(ALLOCATOR,IF_LEG) + HTRMTOL_UNPACK = PREPARE_TRMTOL_UNPACK(ALLOCATOR,IF_FOURIER) + HFSC = PREPARE_FSC(ALLOCATOR) + HFTINV = PREPARE_FTINV(ALLOCATOR) + ENDIF HTRLTOG = PREPARE_TRLTOG(ALLOCATOR,IF_FOURIER,KF_GP) CALL INSTANTIATE_ALLOCATOR(ALLOCATOR, GROWING_ALLOCATION) diff --git a/src/trans/gpu/internal/trgtol_mod.F90 b/src/trans/gpu/internal/trgtol_mod.F90 index 7e173bc95..ba30c301a 100755 --- a/src/trans/gpu/internal/trgtol_mod.F90 +++ b/src/trans/gpu/internal/trgtol_mod.F90 @@ -331,7 +331,8 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC DATA COPYIN(IRECV_BUFR_TO_OUT,PGP_INDICES) PRESENT(PREEL_REAL) ASYNC(1) + !$ACC DATA COPYIN(IRECV_BUFR_TO_OUT) PRESENT(PREEL_REAL) IF (KF_FS > 0) ASYNC(1) + !$ACC DATA COPYIN(PGP_INDICES) ASYNC(1) #endif CALL GSTATS(1805,1) @@ -581,7 +582,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, #endif #else !! this is safe-but-slow fallback for running without GPU-aware MPI - !$ACC UPDATE HOST(ZCOMBUFS) + !$ACC UPDATE HOST(ZCOMBUFS) IF(ISEND_COUNT > 0) #endif ! Receive loop......................................................... DO INR=1,IRECV_COUNTS @@ -711,7 +712,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, #endif #else !! this is safe-but-slow fallback for running without GPU-aware MPI - !$ACC UPDATE DEVICE(ZCOMBUFR) + !$ACC UPDATE DEVICE(ZCOMBUFR) IF(IRECV_COUNTS > 0) #endif IF (LSYNC_TRANS) THEN CALL GSTATS(431,0) @@ -751,7 +752,8 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, #endif #ifdef ACCGPU !$ACC END DATA ! ZCOMBUFR - !$ACC END DATA ! IRECV_BUFR_TO_OUT,PGPINDICES + !$ACC END DATA ! IRECV_BUFR_TO_OUT + !$ACC END DATA ! PGPINDICES !$ACC END DATA !ZCOMBUFS (present) !$ACC END DATA !PGP3B !$ACC END DATA !PGP3A diff --git a/src/trans/gpu/internal/trltog_mod.F90 b/src/trans/gpu/internal/trltog_mod.F90 index f60aa55cc..4bb9cdd22 100755 --- a/src/trans/gpu/internal/trltog_mod.F90 +++ b/src/trans/gpu/internal/trltog_mod.F90 @@ -488,7 +488,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC DATA COPYIN(IIN_TO_SEND_BUFR,IGP_OFFSETS) ASYNC(1) + !$ACC DATA COPYIN(IGP_OFFSETS) ASYNC(1) #endif ACC_POINTERS_CNT = 0 @@ -523,7 +523,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, !$ACC DATA IF(PRESENT(PGP3B)) PRESENT(PGP3B) ASYNC(1) ! Present until self contribution and packing are done - !$ACC DATA PRESENT(PREEL_REAL) + !$ACC DATA COPYIN(IIN_TO_SEND_BUFR) PRESENT(PREEL_REAL) IF(KF_FS > 0) #endif #ifdef OMPGPU #endif @@ -655,7 +655,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC DATA PRESENT(ZCOMBUFS) + !$ACC DATA PRESENT(ZCOMBUFS) IF(ISEND_COUNTS > 0) #endif CALL GSTATS(1605,0) DO INS=1,ISEND_COUNTS @@ -707,7 +707,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, #endif #else !! this is safe-but-slow fallback for running without GPU-aware MPI - !$ACC UPDATE HOST(ZCOMBUFS) + !$ACC UPDATE HOST(ZCOMBUFS) IF(ISEND_COUNTS > 0) #endif DO INR=1,IRECV_COUNTS IR=IR+1 @@ -750,7 +750,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, #endif #else !! this is safe-but-slow fallback for running without GPU-aware MPI - !$ACC UPDATE DEVICE(ZCOMBUFR) + !$ACC UPDATE DEVICE(ZCOMBUFR) IF(IRECV_COUNTS > 0) #endif IF (LSYNC_TRANS) THEN @@ -763,7 +763,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC DATA PRESENT(ZCOMBUFR) + !$ACC DATA PRESENT(ZCOMBUFR) IF(IRECV_COUNTS > 0) #endif CALL GSTATS(805,1) From 0703b5aefcf2793d33d6795bf0b46f05a37b7f10 Mon Sep 17 00:00:00 2001 From: Lukas Mosimann Date: Tue, 3 Sep 2024 01:13:58 -0700 Subject: [PATCH 25/86] add missing asyncs --- src/trans/gpu/internal/trgtol_mod.F90 | 2 +- src/trans/gpu/internal/trltog_mod.F90 | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/trans/gpu/internal/trgtol_mod.F90 b/src/trans/gpu/internal/trgtol_mod.F90 index ba30c301a..e9375fb77 100755 --- a/src/trans/gpu/internal/trgtol_mod.F90 +++ b/src/trans/gpu/internal/trgtol_mod.F90 @@ -569,7 +569,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC DATA IF(IRECV_COUNTS > 0) PRESENT(ZCOMBUFR) + !$ACC DATA IF(IRECV_COUNTS > 0) PRESENT(ZCOMBUFR) ASYNC(1) #endif IR=0 diff --git a/src/trans/gpu/internal/trltog_mod.F90 b/src/trans/gpu/internal/trltog_mod.F90 index 4bb9cdd22..1c0a1990e 100755 --- a/src/trans/gpu/internal/trltog_mod.F90 +++ b/src/trans/gpu/internal/trltog_mod.F90 @@ -523,7 +523,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, !$ACC DATA IF(PRESENT(PGP3B)) PRESENT(PGP3B) ASYNC(1) ! Present until self contribution and packing are done - !$ACC DATA COPYIN(IIN_TO_SEND_BUFR) PRESENT(PREEL_REAL) IF(KF_FS > 0) + !$ACC DATA COPYIN(IIN_TO_SEND_BUFR) PRESENT(PREEL_REAL) IF(KF_FS > 0) ASYNC(1) #endif #ifdef OMPGPU #endif @@ -655,7 +655,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC DATA PRESENT(ZCOMBUFS) IF(ISEND_COUNTS > 0) + !$ACC DATA PRESENT(ZCOMBUFS) IF(ISEND_COUNTS > 0) ASYNC(1) #endif CALL GSTATS(1605,0) DO INS=1,ISEND_COUNTS @@ -763,7 +763,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, #ifdef OMPGPU #endif #ifdef ACCGPU - !$ACC DATA PRESENT(ZCOMBUFR) IF(IRECV_COUNTS > 0) + !$ACC DATA PRESENT(ZCOMBUFR) IF(IRECV_COUNTS > 0) ASYNC(1) #endif CALL GSTATS(805,1) From 3605044f4e6e1ebbb79350f8e725dbc81682d3ce Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Thu, 12 Sep 2024 11:57:56 +0100 Subject: [PATCH 26/86] Fix calculation of zstepavg We collect `iters+2` statistics, but use `iters` to calculate the average. We should use `iters+2` to calculate the average. --- src/programs/ectrans-benchmark.F90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/programs/ectrans-benchmark.F90 b/src/programs/ectrans-benchmark.F90 index b1d393d2d..962e5a0d9 100644 --- a/src/programs/ectrans-benchmark.F90 +++ b/src/programs/ectrans-benchmark.F90 @@ -876,16 +876,16 @@ program ectrans_benchmark call mpl_allreduce(ztstepmin2, 'min', ldreprod=.false.) endif -ztstepavg = (ztstepavg/real(nproc,jprb))/real(iters,jprd) +ztstepavg = (ztstepavg/real(nproc,jprb))/real(iters+2,jprd) ztloop = ztloop/real(nproc,jprd) ztstep(:) = ztstep(:)/real(nproc,jprd) ztstepmed = get_median(ztstep) -ztstepavg1 = (ztstepavg1/real(nproc,jprb))/real(iters,jprd) +ztstepavg1 = (ztstepavg1/real(nproc,jprb))/real(iters+2,jprd) ztstep1(:) = ztstep1(:)/real(nproc,jprd) ztstepmed1 = get_median(ztstep1) -ztstepavg2 = (ztstepavg2/real(nproc,jprb))/real(iters,jprd) +ztstepavg2 = (ztstepavg2/real(nproc,jprb))/real(iters+2,jprd) ztstep2(:) = ztstep2(:)/real(nproc,jprd) ztstepmed2 = get_median(ztstep2) From 280d6c2c156a1231b49f1886e800710d9f758adf Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 29 Aug 2024 08:16:37 +0000 Subject: [PATCH 27/86] cleaner link of fiat with ectrans cpu --- src/trans/cpu/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/trans/cpu/CMakeLists.txt b/src/trans/cpu/CMakeLists.txt index d0d137222..b06599d32 100644 --- a/src/trans/cpu/CMakeLists.txt +++ b/src/trans/cpu/CMakeLists.txt @@ -225,7 +225,7 @@ foreach( prec dp sp ) PUBLIC_INCLUDES $ $ $ - PUBLIC_LIBS ectrans_common ectrans_${prec}_includes + PUBLIC_LIBS fiat ectrans_common ectrans_${prec}_includes ) ectrans_target_fortran_module_directory( @@ -233,7 +233,6 @@ foreach( prec dp sp ) MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/ectrans_${prec} INSTALL_DIRECTORY module/ectrans_${prec} ) - target_link_libraries( ectrans_${prec} PUBLIC fiat) set( FFTW_LINK PRIVATE ) if( LAPACK_LIBRARIES MATCHES "mkl" AND NOT FFTW_LIBRARIES MATCHES "mkl" ) From b794b0cd8880234083ca6205ad646a04640d94c1 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 28 Aug 2024 12:21:12 +0000 Subject: [PATCH 28/86] Move common files parallel to cpu/gpu --- src/trans/CMakeLists.txt | 2 + src/trans/common/CMakeLists.txt | 75 +++++++++++++++++ .../{cpu => common}/external/get_current.F90 | 0 .../{cpu => common}/external/setup_trans0.F90 | 1 - .../internal/abort_trans_mod.F90 | 0 .../{cpu => common}/internal/cpledn_mod.F90 | 0 .../internal}/ectrans_blas_mod.F90 | 0 .../internal/ectrans_version_mod.F90.in | 0 .../internal/eq_regions_mod.F90 | 0 .../internal/field_split_mod.F90 | 14 ++-- .../{cpu => common}/internal/gawl_mod.F90 | 0 .../internal/myrecvset_mod.F90 | 0 .../internal/mysendset_mod.F90 | 0 .../{cpu => common}/internal/pe2set_mod.F90 | 0 .../{cpu => common}/internal/set2pe_mod.F90 | 0 .../{cpu => common}/internal/sugaw_mod.F90 | 0 .../internal/sump_trans0_mod.F90 | 0 .../internal/sump_trans_preleg_mod.F90 | 0 .../{cpu => common}/internal/sumplat_mod.F90 | 0 .../{cpu => common}/internal/sumplatb_mod.F90 | 0 .../internal/sumplatbeq_mod.F90 | 0 .../{cpu => common}/internal/sumplatf_mod.F90 | 0 .../{cpu => common}/internal/supol_mod.F90 | 0 .../{cpu => common}/internal/supolf_mod.F90 | 0 .../{cpu => common}/internal/sustaonl_mod.F90 | 0 .../{cpu => common}/internal/suwavedi_mod.F90 | 0 .../internal/tpm_constants.F90 | 0 .../{cpu => common}/internal/tpm_ctl.F90 | 3 +- .../{cpu => common}/internal/tpm_dim.F90 | 9 ++- .../{cpu => common}/internal/tpm_distr.F90 | 27 +++++++ .../{cpu => common}/internal/tpm_gen.F90 | 0 .../{cpu => common}/internal/tpm_geometry.F90 | 7 ++ .../{cpu => common}/internal/tpm_pol.F90 | 2 +- .../{cpu => common}/sharedmem/sharedmem.c | 0 .../sharedmem/sharedmem_mod.F90 | 0 src/trans/cpu/CMakeLists.txt | 81 ++----------------- src/trans/cpu/external/setup_trans.F90 | 2 +- src/trans/cpu/sedrenames.txt | 1 - src/trans/gpu/external/setup_trans.F90 | 2 +- 39 files changed, 137 insertions(+), 89 deletions(-) create mode 100644 src/trans/common/CMakeLists.txt rename src/trans/{cpu => common}/external/get_current.F90 (100%) rename src/trans/{cpu => common}/external/setup_trans0.F90 (99%) rename src/trans/{cpu => common}/internal/abort_trans_mod.F90 (100%) rename src/trans/{cpu => common}/internal/cpledn_mod.F90 (100%) rename src/trans/{cpu/algor => common/internal}/ectrans_blas_mod.F90 (100%) rename src/trans/{cpu => common}/internal/ectrans_version_mod.F90.in (100%) rename src/trans/{cpu => common}/internal/eq_regions_mod.F90 (100%) rename src/trans/{cpu => common}/internal/field_split_mod.F90 (91%) mode change 100644 => 100755 rename src/trans/{cpu => common}/internal/gawl_mod.F90 (100%) rename src/trans/{cpu => common}/internal/myrecvset_mod.F90 (100%) rename src/trans/{cpu => common}/internal/mysendset_mod.F90 (100%) rename src/trans/{cpu => common}/internal/pe2set_mod.F90 (100%) rename src/trans/{cpu => common}/internal/set2pe_mod.F90 (100%) rename src/trans/{cpu => common}/internal/sugaw_mod.F90 (100%) rename src/trans/{cpu => common}/internal/sump_trans0_mod.F90 (100%) rename src/trans/{cpu => common}/internal/sump_trans_preleg_mod.F90 (100%) rename src/trans/{cpu => common}/internal/sumplat_mod.F90 (100%) rename src/trans/{cpu => common}/internal/sumplatb_mod.F90 (100%) rename src/trans/{cpu => common}/internal/sumplatbeq_mod.F90 (100%) rename src/trans/{cpu => common}/internal/sumplatf_mod.F90 (100%) rename src/trans/{cpu => common}/internal/supol_mod.F90 (100%) rename src/trans/{cpu => common}/internal/supolf_mod.F90 (100%) rename src/trans/{cpu => common}/internal/sustaonl_mod.F90 (100%) rename src/trans/{cpu => common}/internal/suwavedi_mod.F90 (100%) rename src/trans/{cpu => common}/internal/tpm_constants.F90 (100%) rename src/trans/{cpu => common}/internal/tpm_ctl.F90 (89%) mode change 100644 => 100755 rename src/trans/{cpu => common}/internal/tpm_dim.F90 (82%) mode change 100644 => 100755 rename src/trans/{cpu => common}/internal/tpm_distr.F90 (85%) mode change 100644 => 100755 rename src/trans/{cpu => common}/internal/tpm_gen.F90 (100%) rename src/trans/{cpu => common}/internal/tpm_geometry.F90 (79%) rename src/trans/{cpu => common}/internal/tpm_pol.F90 (98%) mode change 100644 => 100755 rename src/trans/{cpu => common}/sharedmem/sharedmem.c (100%) rename src/trans/{cpu => common}/sharedmem/sharedmem_mod.F90 (100%) diff --git a/src/trans/CMakeLists.txt b/src/trans/CMakeLists.txt index 87db12306..c5c266758 100644 --- a/src/trans/CMakeLists.txt +++ b/src/trans/CMakeLists.txt @@ -6,6 +6,8 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. +add_subdirectory( common ) + if( HAVE_CPU) add_subdirectory( cpu ) endif() diff --git a/src/trans/common/CMakeLists.txt b/src/trans/common/CMakeLists.txt new file mode 100644 index 000000000..4ccf6d898 --- /dev/null +++ b/src/trans/common/CMakeLists.txt @@ -0,0 +1,75 @@ +# (C) Copyright 2020- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Preprocess module file containing version information +configure_file( internal/ectrans_version_mod.F90.in internal/ectrans_version_mod.F90 ) + +## Sources which are precision independent can go into a common library +list( APPEND ectrans_common_src + sharedmem/sharedmem_mod.F90 + sharedmem/sharedmem.c + internal/ectrans_blas_mod.F90 + internal/abort_trans_mod.F90 + internal/cpledn_mod.F90 + internal/field_split_mod.F90 + internal/gawl_mod.F90 + internal/sugaw_mod.F90 + internal/supol_mod.F90 + internal/supolf_mod.F90 + internal/tpm_constants.F90 + internal/tpm_ctl.F90 + internal/tpm_dim.F90 + internal/tpm_gen.F90 + internal/tpm_geometry.F90 + internal/tpm_pol.F90 + internal/tpm_distr.F90 + internal/pe2set_mod.F90 + internal/set2pe_mod.F90 + internal/eq_regions_mod.F90 + internal/sump_trans0_mod.F90 + internal/sustaonl_mod.F90 + internal/sumplat_mod.F90 + internal/sumplatb_mod.F90 + internal/sumplatbeq_mod.F90 + internal/sumplatf_mod.F90 + internal/mysendset_mod.F90 + internal/myrecvset_mod.F90 + internal/suwavedi_mod.F90 + internal/sump_trans_preleg_mod.F90 + external/get_current.F90 + external/setup_trans0.F90 + ${CMAKE_CURRENT_BINARY_DIR}/internal/ectrans_version_mod.F90 +) +list( APPEND ectrans_common_includes + ectrans/get_current.h + ectrans/setup_trans0.h +) + +ecbuild_add_library( + TARGET ectrans_common + LINKER_LANGUAGE Fortran + SOURCES ${ectrans_common_src} + PUBLIC_LIBS fiat + PRIVATE_LIBS ${LAPACK_LIBRARIES} + PUBLIC_INCLUDES $ + $ + $ + $ +) +ectrans_target_fortran_module_directory( + TARGET ectrans_common + MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/ectrans + INSTALL_DIRECTORY module/ectrans +) + +if( HAVE_OMP ) + ecbuild_debug("target_link_libraries( ectrans_common PRIVATE OpenMP::OpenMP_Fortran )") + target_link_libraries( ectrans_common PRIVATE OpenMP::OpenMP_Fortran ) +endif() + +set( ectrans_common_includes ${ectrans_common_includes} PARENT_SCOPE ) \ No newline at end of file diff --git a/src/trans/cpu/external/get_current.F90 b/src/trans/common/external/get_current.F90 similarity index 100% rename from src/trans/cpu/external/get_current.F90 rename to src/trans/common/external/get_current.F90 diff --git a/src/trans/cpu/external/setup_trans0.F90 b/src/trans/common/external/setup_trans0.F90 similarity index 99% rename from src/trans/cpu/external/setup_trans0.F90 rename to src/trans/common/external/setup_trans0.F90 index 1861b2dce..cdd581f1d 100644 --- a/src/trans/cpu/external/setup_trans0.F90 +++ b/src/trans/common/external/setup_trans0.F90 @@ -145,7 +145,6 @@ SUBROUTINE SETUP_TRANS0(KOUT,KERR,KPRINTLEV,KMAX_RESOL,KPROMATR,& WRITE(NOUT,'(A)') WRITE(NOUT,'(A)') "ecTrans at version: " // ECTRANS_VERSION_STR() WRITE(NOUT,'(A)') "commit: " // ECTRANS_GIT_SHA1() -WRITE(NOUT,'(A)') "CPU version" WRITE(NOUT,'(A)') LLP1 = NPRINTLEV>0 diff --git a/src/trans/cpu/internal/abort_trans_mod.F90 b/src/trans/common/internal/abort_trans_mod.F90 similarity index 100% rename from src/trans/cpu/internal/abort_trans_mod.F90 rename to src/trans/common/internal/abort_trans_mod.F90 diff --git a/src/trans/cpu/internal/cpledn_mod.F90 b/src/trans/common/internal/cpledn_mod.F90 similarity index 100% rename from src/trans/cpu/internal/cpledn_mod.F90 rename to src/trans/common/internal/cpledn_mod.F90 diff --git a/src/trans/cpu/algor/ectrans_blas_mod.F90 b/src/trans/common/internal/ectrans_blas_mod.F90 similarity index 100% rename from src/trans/cpu/algor/ectrans_blas_mod.F90 rename to src/trans/common/internal/ectrans_blas_mod.F90 diff --git a/src/trans/cpu/internal/ectrans_version_mod.F90.in b/src/trans/common/internal/ectrans_version_mod.F90.in similarity index 100% rename from src/trans/cpu/internal/ectrans_version_mod.F90.in rename to src/trans/common/internal/ectrans_version_mod.F90.in diff --git a/src/trans/cpu/internal/eq_regions_mod.F90 b/src/trans/common/internal/eq_regions_mod.F90 similarity index 100% rename from src/trans/cpu/internal/eq_regions_mod.F90 rename to src/trans/common/internal/eq_regions_mod.F90 diff --git a/src/trans/cpu/internal/field_split_mod.F90 b/src/trans/common/internal/field_split_mod.F90 old mode 100644 new mode 100755 similarity index 91% rename from src/trans/cpu/internal/field_split_mod.F90 rename to src/trans/common/internal/field_split_mod.F90 index 41773df10..d5dbf8589 --- a/src/trans/cpu/internal/field_split_mod.F90 +++ b/src/trans/common/internal/field_split_mod.F90 @@ -11,8 +11,8 @@ MODULE FIELD_SPLIT_MOD CONTAINS SUBROUTINE FIELD_SPLIT(KBLK,KF_GP,KKF_UV_G,KVSETUV,KVSETSC,& - & KSTUV_G,KENUV_G,KF_UV_G,KSTSC_G,KENSC_G,KF_SCALARS_G,& - & KSTUV,KENUV,KF_UV,KSTSC,KENSC,KF_SCALARS) + & KSTUV_G,KENUV_G,KF_UV_G,KSTSC_G,KENSC_G,KF_SCALARS_G,& + & KSTUV,KENUV,KF_UV,KSTSC,KENSC,KF_SCALARS) !**** *FIELD_SPLIT* - Split fields @@ -58,18 +58,16 @@ SUBROUTINE FIELD_SPLIT(KBLK,KF_GP,KKF_UV_G,KVSETUV,KVSETSC,& ! Original : 01-01-03 ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM - -USE TPM_GEN ,ONLY : NPROMATR -!USE TPM_TRANS -USE TPM_DISTR ,ONLY : MYSETV, NPRTRV +USE EC_PARKIND,ONLY: JPIM +USE TPM_GEN, ONLY: NPROMATR +USE TPM_DISTR, ONLY: MYSETV, NPRTRV ! IMPLICIT NONE ! Declaration of arguments -INTEGER(KIND=JPIM),INTENT(IN) :: KBLK,KF_GP,KKF_UV_G +INTEGER(KIND=JPIM), INTENT(IN) :: KBLK,KF_GP,KKF_UV_G INTEGER(KIND=JPIM), INTENT(IN) :: KVSETUV(:),KVSETSC(:) INTEGER(KIND=JPIM), INTENT(OUT) :: KSTUV_G,KENUV_G,KF_UV_G,KSTSC_G,KENSC_G,KF_SCALARS_G INTEGER(KIND=JPIM), INTENT(OUT) :: KSTUV,KENUV,KF_UV,KSTSC,KENSC,KF_SCALARS diff --git a/src/trans/cpu/internal/gawl_mod.F90 b/src/trans/common/internal/gawl_mod.F90 similarity index 100% rename from src/trans/cpu/internal/gawl_mod.F90 rename to src/trans/common/internal/gawl_mod.F90 diff --git a/src/trans/cpu/internal/myrecvset_mod.F90 b/src/trans/common/internal/myrecvset_mod.F90 similarity index 100% rename from src/trans/cpu/internal/myrecvset_mod.F90 rename to src/trans/common/internal/myrecvset_mod.F90 diff --git a/src/trans/cpu/internal/mysendset_mod.F90 b/src/trans/common/internal/mysendset_mod.F90 similarity index 100% rename from src/trans/cpu/internal/mysendset_mod.F90 rename to src/trans/common/internal/mysendset_mod.F90 diff --git a/src/trans/cpu/internal/pe2set_mod.F90 b/src/trans/common/internal/pe2set_mod.F90 similarity index 100% rename from src/trans/cpu/internal/pe2set_mod.F90 rename to src/trans/common/internal/pe2set_mod.F90 diff --git a/src/trans/cpu/internal/set2pe_mod.F90 b/src/trans/common/internal/set2pe_mod.F90 similarity index 100% rename from src/trans/cpu/internal/set2pe_mod.F90 rename to src/trans/common/internal/set2pe_mod.F90 diff --git a/src/trans/cpu/internal/sugaw_mod.F90 b/src/trans/common/internal/sugaw_mod.F90 similarity index 100% rename from src/trans/cpu/internal/sugaw_mod.F90 rename to src/trans/common/internal/sugaw_mod.F90 diff --git a/src/trans/cpu/internal/sump_trans0_mod.F90 b/src/trans/common/internal/sump_trans0_mod.F90 similarity index 100% rename from src/trans/cpu/internal/sump_trans0_mod.F90 rename to src/trans/common/internal/sump_trans0_mod.F90 diff --git a/src/trans/cpu/internal/sump_trans_preleg_mod.F90 b/src/trans/common/internal/sump_trans_preleg_mod.F90 similarity index 100% rename from src/trans/cpu/internal/sump_trans_preleg_mod.F90 rename to src/trans/common/internal/sump_trans_preleg_mod.F90 diff --git a/src/trans/cpu/internal/sumplat_mod.F90 b/src/trans/common/internal/sumplat_mod.F90 similarity index 100% rename from src/trans/cpu/internal/sumplat_mod.F90 rename to src/trans/common/internal/sumplat_mod.F90 diff --git a/src/trans/cpu/internal/sumplatb_mod.F90 b/src/trans/common/internal/sumplatb_mod.F90 similarity index 100% rename from src/trans/cpu/internal/sumplatb_mod.F90 rename to src/trans/common/internal/sumplatb_mod.F90 diff --git a/src/trans/cpu/internal/sumplatbeq_mod.F90 b/src/trans/common/internal/sumplatbeq_mod.F90 similarity index 100% rename from src/trans/cpu/internal/sumplatbeq_mod.F90 rename to src/trans/common/internal/sumplatbeq_mod.F90 diff --git a/src/trans/cpu/internal/sumplatf_mod.F90 b/src/trans/common/internal/sumplatf_mod.F90 similarity index 100% rename from src/trans/cpu/internal/sumplatf_mod.F90 rename to src/trans/common/internal/sumplatf_mod.F90 diff --git a/src/trans/cpu/internal/supol_mod.F90 b/src/trans/common/internal/supol_mod.F90 similarity index 100% rename from src/trans/cpu/internal/supol_mod.F90 rename to src/trans/common/internal/supol_mod.F90 diff --git a/src/trans/cpu/internal/supolf_mod.F90 b/src/trans/common/internal/supolf_mod.F90 similarity index 100% rename from src/trans/cpu/internal/supolf_mod.F90 rename to src/trans/common/internal/supolf_mod.F90 diff --git a/src/trans/cpu/internal/sustaonl_mod.F90 b/src/trans/common/internal/sustaonl_mod.F90 similarity index 100% rename from src/trans/cpu/internal/sustaonl_mod.F90 rename to src/trans/common/internal/sustaonl_mod.F90 diff --git a/src/trans/cpu/internal/suwavedi_mod.F90 b/src/trans/common/internal/suwavedi_mod.F90 similarity index 100% rename from src/trans/cpu/internal/suwavedi_mod.F90 rename to src/trans/common/internal/suwavedi_mod.F90 diff --git a/src/trans/cpu/internal/tpm_constants.F90 b/src/trans/common/internal/tpm_constants.F90 similarity index 100% rename from src/trans/cpu/internal/tpm_constants.F90 rename to src/trans/common/internal/tpm_constants.F90 diff --git a/src/trans/cpu/internal/tpm_ctl.F90 b/src/trans/common/internal/tpm_ctl.F90 old mode 100644 new mode 100755 similarity index 89% rename from src/trans/cpu/internal/tpm_ctl.F90 rename to src/trans/common/internal/tpm_ctl.F90 index b7be06f50..6f218ab82 --- a/src/trans/cpu/internal/tpm_ctl.F90 +++ b/src/trans/common/internal/tpm_ctl.F90 @@ -10,8 +10,7 @@ MODULE TPM_CTL -USE, INTRINSIC :: iso_c_binding, ONLY: C_PTR, C_NULL_PTR -USE SHAREDMEM_MOD ,ONLY : SHAREDMEM +USE SHAREDMEM_MOD, ONLY: SHAREDMEM IMPLICIT NONE SAVE diff --git a/src/trans/cpu/internal/tpm_dim.F90 b/src/trans/common/internal/tpm_dim.F90 old mode 100644 new mode 100755 similarity index 82% rename from src/trans/cpu/internal/tpm_dim.F90 rename to src/trans/common/internal/tpm_dim.F90 index 3f3ca3c42..236d08d0d --- a/src/trans/cpu/internal/tpm_dim.F90 +++ b/src/trans/common/internal/tpm_dim.F90 @@ -1,5 +1,6 @@ ! (C) Copyright 2000- ECMWF. ! (C) Copyright 2000- Meteo-France. +! (C) Copyright 2022- NVIDIA. ! ! This software is licensed under the terms of the Apache Licence Version 2.0 ! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. @@ -12,7 +13,7 @@ MODULE TPM_DIM ! Module for dimensions. -USE EC_PARKIND ,ONLY : JPIM +USE EC_PARKIND, ONLY: JPIM IMPLICIT NONE @@ -48,4 +49,10 @@ MODULE TPM_DIM TYPE(DIM_TYPE),ALLOCATABLE,TARGET :: DIM_RESOL(:) TYPE(DIM_TYPE),POINTER :: R +! flat copies of above +INTEGER(KIND=JPIM) :: R_NSMAX ! Truncation order +INTEGER(KIND=JPIM) :: R_NTMAX ! Truncation order for tendencies +INTEGER(KIND=JPIM) :: R_NDGNH ! Number of rows in northern hemisphere +INTEGER(KIND=JPIM) :: R_NDGL ! Number of rows of latitudes + END MODULE TPM_DIM diff --git a/src/trans/cpu/internal/tpm_distr.F90 b/src/trans/common/internal/tpm_distr.F90 old mode 100644 new mode 100755 similarity index 85% rename from src/trans/cpu/internal/tpm_distr.F90 rename to src/trans/common/internal/tpm_distr.F90 index ce0064f6a..6a151192f --- a/src/trans/cpu/internal/tpm_distr.F90 +++ b/src/trans/common/internal/tpm_distr.F90 @@ -129,6 +129,7 @@ MODULE TPM_DISTR INTEGER(KIND=JPIM) ,ALLOCATABLE :: MSTABF(:) ! (1:NPRTRW+1) ! Size of FOUBUF_IN, FOUBUF, except for the fields (i.e. this will be multiplied by 2 * KFIELD) INTEGER(KIND=JPIM) :: NLENGT0B +INTEGER(KIND=JPIM) :: NLENGT1B ! (only used in GPU code path) ! GRIDPOINT SPACE @@ -170,10 +171,36 @@ MODULE TPM_DISTR REAL(KIND=JPRD) ,ALLOCATABLE :: RWEIGHT(:) ! Weight per grid-point (if weighted distribution) INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPROCA_GP(:) ! Number of grid-points per a-set +INTEGER(KIND=JPIM), ALLOCATABLE :: OFFSETS_GEMM1(:), OFFSETS_GEMM2(:) + END TYPE DISTR_TYPE TYPE(DISTR_TYPE),ALLOCATABLE,TARGET :: DISTR_RESOL(:) TYPE(DISTR_TYPE),POINTER :: D +!flat versions of the above +INTEGER(KIND=JPIM) :: D_NUMP ! No. of spectral waves handled by this processor +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_MYMS(:) ! Wave numbers handled by this PE +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NSTAGT0B(:) ! Start adresses for segments within buffer + ! (according to processors to whom data + ! is going to be sent) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NSTAGT1B(:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NPROCL(:) ! Process responsible for each lat. (F.S) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NPNTGTB1(:,:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NASM0(:) ! Address in a spectral array of (m, n=m) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NSTAGTF(:) ! Offset for specific latitude in +INTEGER(KIND=JPIM) :: D_NDGL_FS ! Number of rows of latitudes for which this process is + ! performing Fourier Space calculations +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_MSTABF(:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NPNTGTB0(:,:) +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NPROCM(:) ! Process that does the calc. for certain +INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NPTRLS(:) ! Pointer to first lat. (F.S) + + +! The offsets in the input and output arrays to the gemms. +! (1) are the offsets in the "inputs" of dirtrans ("outputs" invtrans) +! (2) are the offsets in the "outputs" of invtrans ("inputs" dirtrans) +INTEGER(KIND=JPIM), POINTER :: D_OFFSETS_GEMM1(:), D_OFFSETS_GEMM2(:) + END MODULE TPM_DISTR diff --git a/src/trans/cpu/internal/tpm_gen.F90 b/src/trans/common/internal/tpm_gen.F90 similarity index 100% rename from src/trans/cpu/internal/tpm_gen.F90 rename to src/trans/common/internal/tpm_gen.F90 diff --git a/src/trans/cpu/internal/tpm_geometry.F90 b/src/trans/common/internal/tpm_geometry.F90 similarity index 79% rename from src/trans/cpu/internal/tpm_geometry.F90 rename to src/trans/common/internal/tpm_geometry.F90 index 48454a371..1ff1a1be9 100644 --- a/src/trans/cpu/internal/tpm_geometry.F90 +++ b/src/trans/common/internal/tpm_geometry.F90 @@ -34,4 +34,11 @@ MODULE TPM_GEOMETRY TYPE(GEOM_TYPE),ALLOCATABLE,TARGET :: GEOM_RESOL(:) TYPE(GEOM_TYPE),POINTER :: G +!flat copies of the above +INTEGER(KIND=JPIM),ALLOCATABLE :: G_NDGLU(:) ! NUMBER OF HEMISPERIC LATITUDES +INTEGER(KIND=JPIM),ALLOCATABLE :: G_NMEN(:) ! ASSOCIATED CUT-OFF WAVE NUMBER +INTEGER(KIND=JPIM) :: G_NMEN_MAX +INTEGER(KIND=JPIM),ALLOCATABLE :: G_NLOEN(:) ! NUMBER OF POINTS ON A PARALLEL +INTEGER(KIND=JPIM) :: G_NLOEN_MAX + END MODULE TPM_GEOMETRY diff --git a/src/trans/cpu/internal/tpm_pol.F90 b/src/trans/common/internal/tpm_pol.F90 old mode 100644 new mode 100755 similarity index 98% rename from src/trans/cpu/internal/tpm_pol.F90 rename to src/trans/common/internal/tpm_pol.F90 index f563d9609..b1f7ed222 --- a/src/trans/cpu/internal/tpm_pol.F90 +++ b/src/trans/common/internal/tpm_pol.F90 @@ -15,7 +15,7 @@ MODULE TPM_POL ! R. El Khatib 17-Feb-2016 Optional allocation/computation of DDC/DDD/DDE ! since they are (big and) not used in supolf. -USE EC_PARKIND ,ONLY : JPRD, JPIM +USE EC_PARKIND, ONLY: JPRD, JPIM IMPLICIT NONE diff --git a/src/trans/cpu/sharedmem/sharedmem.c b/src/trans/common/sharedmem/sharedmem.c similarity index 100% rename from src/trans/cpu/sharedmem/sharedmem.c rename to src/trans/common/sharedmem/sharedmem.c diff --git a/src/trans/cpu/sharedmem/sharedmem_mod.F90 b/src/trans/common/sharedmem/sharedmem_mod.F90 similarity index 100% rename from src/trans/cpu/sharedmem/sharedmem_mod.F90 rename to src/trans/common/sharedmem/sharedmem_mod.F90 diff --git a/src/trans/cpu/CMakeLists.txt b/src/trans/cpu/CMakeLists.txt index b06599d32..46b5cccce 100644 --- a/src/trans/cpu/CMakeLists.txt +++ b/src/trans/cpu/CMakeLists.txt @@ -6,9 +6,6 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. -# Preprocess module file containing version information -configure_file( internal/ectrans_version_mod.F90.in internal/ectrans_version_mod.F90 ) - ## Apply workarounds for some known compilers if(CMAKE_Fortran_COMPILER_ID MATCHES "Cray") @@ -32,66 +29,6 @@ if(CMAKE_Fortran_COMPILER_ID MATCHES "Intel") endif() endif() -## Sources which are precision independent can go into a common library -list( APPEND ectrans_common_src - algor/ectrans_blas_mod.F90 - sharedmem/sharedmem_mod.F90 - sharedmem/sharedmem.c - internal/abort_trans_mod.F90 - internal/cpledn_mod.F90 - internal/gawl_mod.F90 - internal/sugaw_mod.F90 - internal/supol_mod.F90 - internal/supolf_mod.F90 - internal/tpm_constants.F90 - internal/tpm_ctl.F90 - internal/tpm_dim.F90 - internal/tpm_gen.F90 - internal/tpm_geometry.F90 - internal/tpm_pol.F90 - internal/tpm_distr.F90 - internal/pe2set_mod.F90 - internal/set2pe_mod.F90 - internal/eq_regions_mod.F90 - internal/sump_trans0_mod.F90 - internal/sustaonl_mod.F90 - internal/sumplat_mod.F90 - internal/sumplatb_mod.F90 - internal/sumplatbeq_mod.F90 - internal/sumplatf_mod.F90 - internal/mysendset_mod.F90 - internal/myrecvset_mod.F90 - internal/suwavedi_mod.F90 - internal/sump_trans_preleg_mod.F90 - external/get_current.F90 - external/setup_trans0.F90 - ${CMAKE_CURRENT_BINARY_DIR}/internal/ectrans_version_mod.F90 -) -list( APPEND ectrans_common_includes - ectrans/get_current.h - ectrans/setup_trans0.h -) - -ecbuild_add_library( - TARGET ectrans_common - LINKER_LANGUAGE Fortran - SOURCES ${ectrans_common_src} - PUBLIC_LIBS fiat - PRIVATE_LIBS ${LAPACK_LIBRARIES} -) -ectrans_target_fortran_module_directory( - TARGET ectrans_common - MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/ectrans - INSTALL_DIRECTORY module/ectrans -) - -if( HAVE_OMP ) - ecbuild_debug("target_link_libraries( trans_${prec} PRIVATE OpenMP::OpenMP_Fortran )") - target_link_libraries( ectrans_common PRIVATE OpenMP::OpenMP_Fortran ) -endif() - - - function(generate_file) set (options) set (oneValueArgs INPUT OUTPUT BACKEND) @@ -197,16 +134,14 @@ function(generate_backend_sources) set(outfiles) foreach(file_i ${files}) - if(NOT (${file_i} IN_LIST ectrans_common_src)) - get_filename_component(outfile_name ${file_i} NAME) - get_filename_component(outfile_name_we ${file_i} NAME_WE) - get_filename_component(outfile_ext ${file_i} EXT) - get_filename_component(outfile_dir ${file_i} DIRECTORY) - set(outfile "${destination}/${file_i}") - ecbuild_debug("Generate ${outfile}") - generate_file(BACKEND ${backend} INPUT ${CMAKE_CURRENT_SOURCE_DIR}/${file_i} OUTPUT ${outfile}) - list(APPEND outfiles ${outfile}) - endif() + get_filename_component(outfile_name ${file_i} NAME) + get_filename_component(outfile_name_we ${file_i} NAME_WE) + get_filename_component(outfile_ext ${file_i} EXT) + get_filename_component(outfile_dir ${file_i} DIRECTORY) + set(outfile "${destination}/${file_i}") + ecbuild_debug("Generate ${outfile}") + generate_file(BACKEND ${backend} INPUT ${CMAKE_CURRENT_SOURCE_DIR}/${file_i} OUTPUT ${outfile}) + list(APPEND outfiles ${outfile}) endforeach(file_i) set(${_PAR_OUTPUT} ${outfiles} PARENT_SCOPE) endfunction(generate_backend_sources) diff --git a/src/trans/cpu/external/setup_trans.F90 b/src/trans/cpu/external/setup_trans.F90 index 2149b0ad2..887eb20d5 100644 --- a/src/trans/cpu/external/setup_trans.F90 +++ b/src/trans/cpu/external/setup_trans.F90 @@ -169,7 +169,7 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& ENDIF LLP1 = NPRINTLEV>0 LLP2 = NPRINTLEV>1 -IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SETUP_TRANS ===' +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SETUP_TRANS (CPU) ===' ! Allocate resolution dependent structures IF(.NOT. ALLOCATED(DIM_RESOL)) THEN diff --git a/src/trans/cpu/sedrenames.txt b/src/trans/cpu/sedrenames.txt index 66e63f274..e46b72e68 100644 --- a/src/trans/cpu/sedrenames.txt +++ b/src/trans/cpu/sedrenames.txt @@ -23,7 +23,6 @@ s/DIST_SPEC( *($|\(| |\*))/DIST_SPEC_VARIANTDESIGNATOR\1/g s/ectrans_mod/ectrans_mod_VARIANTDESIGNATOR/g s/FFTB_PLAN/FFTB_PLAN_VARIANTDESIGNATOR/g s/FFTB_TYPE/FFTB_TYPE_VARIANTDESIGNATOR/g -s/FIELD_SPLIT_MOD/FIELD_SPLIT_MOD_VARIANTDESIGNATOR/g s/FOURIER_IN_MOD/FOURIER_IN_MOD_VARIANTDESIGNATOR/g s/FOURIER_INAD_MOD/FOURIER_INAD_MOD_VARIANTDESIGNATOR/g s/FOURIER_OUT_MOD/FOURIER_OUT_MOD_VARIANTDESIGNATOR/g diff --git a/src/trans/gpu/external/setup_trans.F90 b/src/trans/gpu/external/setup_trans.F90 index b5d2ff732..bbaa3ed44 100755 --- a/src/trans/gpu/external/setup_trans.F90 +++ b/src/trans/gpu/external/setup_trans.F90 @@ -196,7 +196,7 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& ENDIF LLP1 = NPRINTLEV>0 LLP2 = NPRINTLEV>1 -IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SETUP_TRANS ===' +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SETUP_TRANS (GPU) ===' ! Allocate resolution dependent structures IF(.NOT. ALLOCATED(DIM_RESOL)) THEN From 5d2c939be91d82142768c1916f077ed8c26dc06c Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 28 Aug 2024 14:59:09 +0000 Subject: [PATCH 29/86] Add test for setup_trans0 only relying on the ectrans_common library --- tests/CMakeLists.txt | 32 ++++++--- tests/trans/test_adjoint.F90 | 2 +- tests/trans/test_setup_trans0.F90 | 108 ++++++++++++++++++++++++++++++ 3 files changed, 133 insertions(+), 9 deletions(-) create mode 100644 tests/trans/test_setup_trans0.F90 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index bae435088..6fa0e061e 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -45,6 +45,22 @@ if( HAVE_TESTS ) set( parkind parkind_sp ) endif() + ecbuild_add_executable( + TARGET ectrans_test_setup_trans0 + SOURCES trans/test_setup_trans0.F90 + LIBS ectrans_common + NOINSTALL) + set( ntasks 0 ) + if( HAVE_MPI ) + list( APPEND ntasks 1 2 ) + endif() + foreach( mpi ${ntasks} ) + ecbuild_add_test( TARGET ectrans_test_setup_trans0_mpi${mpi} + COMMAND ectrans_test_setup_trans0 + MPI ${mpi} + ) + endforeach() + ecbuild_add_test(TARGET ectrans_test_adjoint SOURCES trans/test_adjoint.F90 LIBS ${trans} ${parkind} @@ -54,17 +70,17 @@ if( HAVE_TESTS ) target_link_libraries( ectrans_test_adjoint OpenMP::OpenMP_Fortran ) endif() - set( ntasks 0 ) - set( nthreads 1 ) - if( HAVE_MPI ) - list( APPEND ntasks 1 2 ) - endif() - if( HAVE_OMP ) - list( APPEND nthreads 4 8 ) - endif() foreach( prec dp sp ) if( TARGET ectrans-benchmark-cpu-${prec} ) + set( ntasks 0 ) + set( nthreads 1 ) + if( HAVE_MPI ) + list( APPEND ntasks 1 2 ) + endif() + if( HAVE_OMP ) + list( APPEND nthreads 4 8 ) + endif() foreach( mpi ${ntasks} ) foreach( omp ${nthreads} ) set( t 47 ) diff --git a/tests/trans/test_adjoint.F90 b/tests/trans/test_adjoint.F90 index fae52c508..9dc14a5a7 100644 --- a/tests/trans/test_adjoint.F90 +++ b/tests/trans/test_adjoint.F90 @@ -56,7 +56,7 @@ PROGRAM TEST_ADJOINT #include "gath_grid.h" #include "gstats_setup.intfb.h" -! ======== PARAMTERS WHICH MAY BE MODIFIED, ORIGINALLY COMING FROM NAMELSIT =========== +! ======== PARAMETERS WHICH MAY BE MODIFIED, ORIGINALLY COMING FROM NAMELIST =========== NDGL = 32 NFLEVG = 9 NPROMA = 8 diff --git a/tests/trans/test_setup_trans0.F90 b/tests/trans/test_setup_trans0.F90 new file mode 100644 index 000000000..089398452 --- /dev/null +++ b/tests/trans/test_setup_trans0.F90 @@ -0,0 +1,108 @@ +! (C) Copyright 2005- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +PROGRAM TEST_SETUP_TRANS0 +USE EC_PARKIND ,ONLY : JPIM +USE MPL_MODULE ,ONLY : MPL_INIT, MPL_END, MPL_BARRIER, MPL_MYRANK, MPL_NPROC +USE ABORT_TRANS_MOD, ONLY : ABORT_TRANS +USE YOMHOOK, ONLY : JPHOOK, DR_HOOK + +IMPLICIT NONE + +INTEGER(KIND=JPIM) :: NPROC,NPRGPNS,NPRGPEW,NPRTRW,NPRTRV +INTEGER(KIND=JPIM) :: NOUT, MYPROC +REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +CHARACTER*6 CLNAME +LOGICAL :: LUSE_MPI + +#include "setup_trans0.h" + +LUSE_MPI = detect_mpirun() + +IF(LUSE_MPI) THEN + CALL MPL_INIT + MYPROC = MPL_MYRANK() + NPROC = MPL_NPROC() + NOUT = 20 + WRITE(CLNAME,'(A,I2.2)') 'OUT.',MYPROC + OPEN(NOUT,FILE=CLNAME) +ELSE + NOUT = 6 + MYPROC = 1 + NPROC = 1 +ENDIF + +CALL DR_HOOK('PROGRAM', 0, ZHOOK_HANDLE) + +! ====================================================================================== +! NPROC must match NPRGPNS * NPRGPEW +NPRTRV = 1 +NPRTRW = NPROC / NPRTRV +NPRGPEW = 1 +NPRGPNS = NPROC +! ====================================================================================== + +IF (MYPROC == 1) WRITE(NOUT,*) ' LUSE_MPI= ',LUSE_MPI + +IF(NPROC /= NPRTRW*NPRTRV) THEN + PRINT *,'NPRGPNS,NPRGPEW,NPRTRW,NPRTRV ',NPRGPNS,NPRGPEW,NPRTRW,NPRTRV + CALL ABORT_TRANS('NPRGPNS*NPRGPEW /= NPRTRW*NPRTRV') +ENDIF + +CALL SETUP_TRANS0(KOUT=NOUT,KERR=0,KPRINTLEV=2, & + & KMAX_RESOL=1,& + & LDEQ_REGIONS=.TRUE., & + & KPRGPNS=NPRGPNS, KPRGPEW=NPRGPEW, KPRTRW=NPRTRW,& + & LDMPOFF=.NOT.LUSE_MPI) + +CALL DR_HOOK('PROGRAM', 1, ZHOOK_HANDLE) + +IF(LUSE_MPI) THEN + CALL MPL_BARRIER() + CALL MPL_END +ENDIF + +CONTAINS + +function detect_mpirun() result(lmpi_required) + use ec_env_mod, only : ec_putenv + logical :: lmpi_required + integer :: ilen + integer, parameter :: nvars = 4 + character(len=32), dimension(nvars) :: cmpirun_detect + character(len=4) :: clenv + integer :: ivar + + ! Environment variables that are set when mpirun, srun, aprun, ... are used + cmpirun_detect(1) = 'OMPI_COMM_WORLD_SIZE' ! openmpi + cmpirun_detect(2) = 'ALPS_APP_PE' ! cray pe + cmpirun_detect(3) = 'PMI_SIZE' ! intel + cmpirun_detect(4) = 'SLURM_NTASKS' ! slurm + + lmpi_required = .false. + do ivar = 1, nvars + call get_environment_variable(name=trim(cmpirun_detect(ivar)), length=ilen) + if (ilen > 0) then + lmpi_required = .true. + exit ! break + endif + enddo + + call get_environment_variable(name="ECTRANS_USE_MPI", value=clenv, length=ilen ) + if (ilen > 0) then + lmpi_required = .true. + if( trim(clenv) == "0" .or. trim(clenv) == "OFF" .or. trim(CLENV) == "off" .or. trim(clenv) == "F" ) then + lmpi_required = .false. + endif + call ec_putenv("DR_HOOK_ASSERT_MPI_INITIALIZED=0", overwrite=.true.) + endif +end function + + +END PROGRAM From 8c66c21eafaf4454d102aef1bf469f5d94db7d19 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 28 Aug 2024 13:38:37 +0000 Subject: [PATCH 30/86] GPU library uses common --- src/trans/gpu/CMakeLists.txt | 6 +- src/trans/gpu/external/get_current.F90 | 67 --- src/trans/gpu/external/gpnorm_trans.F90 | 8 +- src/trans/gpu/external/gpnorm_trans_gpu.F90 | 8 +- src/trans/gpu/external/setup_trans0.F90 | 302 ------------ src/trans/gpu/internal/abort_trans_mod.F90 | 39 -- src/trans/gpu/internal/cpledn_mod.F90 | 134 ------ .../gpu/internal/ectrans_version_mod.F90.in | 47 -- src/trans/gpu/internal/eq_regions_mod.F90 | 433 ----------------- src/trans/gpu/internal/field_split_mod.F90 | 138 ------ src/trans/gpu/internal/gawl_mod.F90 | 117 ----- src/trans/gpu/internal/myrecvset_mod.F90 | 83 ---- src/trans/gpu/internal/mysendset_mod.F90 | 80 --- src/trans/gpu/internal/pe2set_mod.F90 | 119 ----- src/trans/gpu/internal/set2pe_mod.F90 | 130 ----- src/trans/gpu/internal/sugaw_mod.F90 | 429 ----------------- src/trans/gpu/internal/sump_trans0_mod.F90 | 111 ----- src/trans/gpu/internal/sump_trans_mod.F90 | 31 +- .../gpu/internal/sump_trans_preleg_mod.F90 | 146 ------ src/trans/gpu/internal/sumplat_mod.F90 | 254 ---------- src/trans/gpu/internal/sumplatb_mod.F90 | 224 --------- src/trans/gpu/internal/sumplatbeq_mod.F90 | 288 ----------- src/trans/gpu/internal/sumplatf_mod.F90 | 148 ------ src/trans/gpu/internal/supol_mod.F90 | 172 ------- src/trans/gpu/internal/supolf_mod.F90 | 283 ----------- src/trans/gpu/internal/sustaonl_mod.F90 | 454 ------------------ src/trans/gpu/internal/suwavedi_mod.F90 | 186 ------- src/trans/gpu/internal/tpm_constants.F90 | 20 - src/trans/gpu/internal/tpm_ctl.F90 | 41 -- src/trans/gpu/internal/tpm_dim.F90 | 58 --- src/trans/gpu/internal/tpm_distr.F90 | 195 -------- src/trans/gpu/internal/tpm_gen.F90 | 45 -- src/trans/gpu/internal/tpm_geometry.F90 | 45 -- src/trans/gpu/internal/tpm_pol.F90 | 120 ----- src/trans/gpu/sharedmem/sharedmem.c | 28 -- src/trans/gpu/sharedmem/sharedmem_mod.F90 | 314 ------------ 36 files changed, 26 insertions(+), 5277 deletions(-) delete mode 100755 src/trans/gpu/external/get_current.F90 delete mode 100755 src/trans/gpu/external/setup_trans0.F90 delete mode 100755 src/trans/gpu/internal/abort_trans_mod.F90 delete mode 100755 src/trans/gpu/internal/cpledn_mod.F90 delete mode 100644 src/trans/gpu/internal/ectrans_version_mod.F90.in delete mode 100755 src/trans/gpu/internal/eq_regions_mod.F90 delete mode 100755 src/trans/gpu/internal/field_split_mod.F90 delete mode 100755 src/trans/gpu/internal/gawl_mod.F90 delete mode 100755 src/trans/gpu/internal/myrecvset_mod.F90 delete mode 100755 src/trans/gpu/internal/mysendset_mod.F90 delete mode 100755 src/trans/gpu/internal/pe2set_mod.F90 delete mode 100755 src/trans/gpu/internal/set2pe_mod.F90 delete mode 100755 src/trans/gpu/internal/sugaw_mod.F90 delete mode 100755 src/trans/gpu/internal/sump_trans0_mod.F90 delete mode 100755 src/trans/gpu/internal/sump_trans_preleg_mod.F90 delete mode 100755 src/trans/gpu/internal/sumplat_mod.F90 delete mode 100755 src/trans/gpu/internal/sumplatb_mod.F90 delete mode 100755 src/trans/gpu/internal/sumplatbeq_mod.F90 delete mode 100755 src/trans/gpu/internal/sumplatf_mod.F90 delete mode 100755 src/trans/gpu/internal/supol_mod.F90 delete mode 100755 src/trans/gpu/internal/supolf_mod.F90 delete mode 100755 src/trans/gpu/internal/sustaonl_mod.F90 delete mode 100755 src/trans/gpu/internal/suwavedi_mod.F90 delete mode 100755 src/trans/gpu/internal/tpm_constants.F90 delete mode 100755 src/trans/gpu/internal/tpm_ctl.F90 delete mode 100755 src/trans/gpu/internal/tpm_dim.F90 delete mode 100755 src/trans/gpu/internal/tpm_distr.F90 delete mode 100755 src/trans/gpu/internal/tpm_gen.F90 delete mode 100755 src/trans/gpu/internal/tpm_geometry.F90 delete mode 100755 src/trans/gpu/internal/tpm_pol.F90 delete mode 100644 src/trans/gpu/sharedmem/sharedmem.c delete mode 100644 src/trans/gpu/sharedmem/sharedmem_mod.F90 diff --git a/src/trans/gpu/CMakeLists.txt b/src/trans/gpu/CMakeLists.txt index c417da5eb..94fd94a23 100644 --- a/src/trans/gpu/CMakeLists.txt +++ b/src/trans/gpu/CMakeLists.txt @@ -6,18 +6,13 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. -# Preprocess module file containing version information -configure_file( internal/ectrans_version_mod.F90.in internal/ectrans_version_mod.F90 ) - ## Assemble sources ecbuild_list_add_pattern( LIST trans_src GLOB - sharedmem/* algor/* internal/* external/* - ${CMAKE_CURRENT_BINARY_DIR}/internal/ectrans_version_mod.F90 QUIET ) @@ -69,6 +64,7 @@ foreach( prec dp sp ) $ PUBLIC_LIBS parkind_${prec} fiat + ectrans_common PRIVATE_LIBS ${ECTRANS_GPU_HIP_LIBRARIES} ${LAPACK_LIBRARIES} # we still have symbols in some files $<${HAVE_ACC}:OpenACC::OpenACC_Fortran> diff --git a/src/trans/gpu/external/get_current.F90 b/src/trans/gpu/external/get_current.F90 deleted file mode 100755 index 71a5ee154..000000000 --- a/src/trans/gpu/external/get_current.F90 +++ /dev/null @@ -1,67 +0,0 @@ -! (C) Copyright 2012- Meteo-France. -! (C) Copyright 2012- ECMWF. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -SUBROUTINE GET_CURRENT(KRESOL,LDLAM) - -!**** *GET_CURRENT* - Extract current information from the transform package - -! Purpose. -! -------- -! Interface routine for extracting current information from the T.P. - -!** Interface. -! ---------- -! CALL GET_CURRENT(...) - -! Explicit arguments : (all optional) -! -------------------- -! KRESOL - Current resolution -! LDLAM - .T. if the corresponding resolution is LAM, .F. if it is global - -! Method. -! ------- - -! Externals. None -! ---------- - -! Author. -! ------- -! Ryad El Khatib *Meteo-France* - -! Modifications. -! -------------- -! Original : 24-Aug-2012 - -! ------------------------------------------------------------------ - -USE PARKIND1, ONLY: JPIM - -!ifndef INTERFACE - -USE TPM_GEN, ONLY: NCUR_RESOL -USE TPM_GEOMETRY, ONLY: G - -!endif INTERFACE - -IMPLICIT NONE - -INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(OUT) :: KRESOL -LOGICAL ,OPTIONAL,INTENT(OUT) :: LDLAM - -!ifndef INTERFACE - -! Get current resolution -IF (PRESENT(KRESOL)) KRESOL= NCUR_RESOL -IF (PRESENT(LDLAM)) LDLAM = G%LAM - - -!endif INTERFACE - -END SUBROUTINE GET_CURRENT diff --git a/src/trans/gpu/external/gpnorm_trans.F90 b/src/trans/gpu/external/gpnorm_trans.F90 index e61d070c1..d92b92b0a 100755 --- a/src/trans/gpu/external/gpnorm_trans.F90 +++ b/src/trans/gpu/external/gpnorm_trans.F90 @@ -395,17 +395,17 @@ SUBROUTINE GPNORM_TRANS(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) ENDDO IF(.NOT.LDAVE_ONLY)THEN IND=IND+1 - ZMING(JF)=MIN(ZMING(JF),ZRCV(IND)) + ZMING(JF)=MIN(ZMING(JF),REAL(ZRCV(IND),JPRBT)) IND=IND+1 - ZMAXG(JF)=MAX(ZMAXG(JF),ZRCV(IND)) + ZMAXG(JF)=MAX(ZMAXG(JF),REAL(ZRCV(IND),JPRBT)) ENDIF ENDDO IF(LDAVE_ONLY)THEN DO JF=1,KFIELDS IND=IND+1 - ZMING(JF)=MIN(ZMING(JF),ZRCV(IND)) + ZMING(JF)=MIN(ZMING(JF),REAL(ZRCV(IND),JPRBT)) IND=IND+1 - ZMAXG(JF)=MAX(ZMAXG(JF),ZRCV(IND)) + ZMAXG(JF)=MAX(ZMAXG(JF),REAL(ZRCV(IND),JPRBT)) ENDDO ENDIF DEALLOCATE(ZRCV) diff --git a/src/trans/gpu/external/gpnorm_trans_gpu.F90 b/src/trans/gpu/external/gpnorm_trans_gpu.F90 index 61bf243c4..26801cc47 100755 --- a/src/trans/gpu/external/gpnorm_trans_gpu.F90 +++ b/src/trans/gpu/external/gpnorm_trans_gpu.F90 @@ -459,17 +459,17 @@ SUBROUTINE GPNORM_TRANS_GPU(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) ENDDO IF(.NOT.LDAVE_ONLY)THEN IND=IND+1 - ZMING(JF)=MIN(ZMING(JF),ZRCV(IND)) + ZMING(JF)=MIN(ZMING(JF),REAL(ZRCV(IND),JPRBT)) IND=IND+1 - ZMAXG(JF)=MAX(ZMAXG(JF),ZRCV(IND)) + ZMAXG(JF)=MAX(ZMAXG(JF),REAL(ZRCV(IND),JPRBT)) ENDIF ENDDO IF(LDAVE_ONLY)THEN DO JF=1,KFIELDS IND=IND+1 - ZMING(JF)=MIN(ZMING(JF),ZRCV(IND)) + ZMING(JF)=MIN(ZMING(JF),REAL(ZRCV(IND),JPRBT)) IND=IND+1 - ZMAXG(JF)=MAX(ZMAXG(JF),ZRCV(IND)) + ZMAXG(JF)=MAX(ZMAXG(JF),REAL(ZRCV(IND),JPRBT)) ENDDO ENDIF DEALLOCATE(ZRCV) diff --git a/src/trans/gpu/external/setup_trans0.F90 b/src/trans/gpu/external/setup_trans0.F90 deleted file mode 100755 index 20540c369..000000000 --- a/src/trans/gpu/external/setup_trans0.F90 +++ /dev/null @@ -1,302 +0,0 @@ -! (C) Copyright 2000- ECMWF. -! (C) Copyright 2000- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -SUBROUTINE SETUP_TRANS0(KOUT,KERR,KPRINTLEV,KMAX_RESOL,KPROMATR,& -& KPRGPNS,KPRGPEW,KPRTRW,KCOMBFLEN,& -& LDMPOFF,LDSYNC_TRANS,KTRANS_SYNC_LEVEL,& -& LDEQ_REGIONS,K_REGIONS_NS,K_REGIONS_EW,K_REGIONS,& -& PRAD,LDALLOPERM,KOPT_MEMORY_TR) - -!**** *SETUP_TRANS0* - General setup routine for transform package - -! Purpose. -! -------- -! Resolution independent part of setup of transform package -! Has to be called BEFORE SETUP_TRANS - -!** Interface. -! ---------- -! CALL SETUP_TRANS0(...) - -! Explicit arguments : All arguments are optional, [..] default value -! ------------------- -! KOUT - Unit number for listing output [6] -! KERR - Unit number for error messages [0] -! KPRINTLEV - level of output to KOUT, 0->no output,1->normal,2->debug [0] -! KMAX_RESOL - maximum number of different resolutions for this run [1] -! KPRGPNS - splitting level in N-S direction in grid-point space [1] -! KPRGPEW - splitting level in E-W direction in grid-point space [1] -! KPRTRW - splitting level in wave direction in spectral space [1] -! KCOMBFLEN - Size of communication buffer [1800000 (*8bytes) ] (deprecated) -! LDMPOFF - switch off message passing [false] -! LDSYNC_TRANS - switch to activate barriers in trmtol trltom [false] -! KTRANS_SYNC_LEVEL - use of synchronization/blocking [0] -! LDEQ_REGIONS - true if new eq_regions partitioning [false] -! K_REGIONS - Number of regions (1D or 2D partitioning) -! K_REGIONS_NS - Maximum number of NS partitions -! K_REGIONS_EW - Maximum number of EW partitions -! PRAD - Radius of the planet -! LDALLOPERM - Allocate certain arrays permanently -! KOPT_MEMORY_TR - memory strategy (stack vs heap) in gripoint transpositions - -! The total number of (MPI)-processors has to be equal to KPRGPNS*KPRGPEW - -! Method. -! ------- - -! Externals. SUMP_TRANS0 - initial setup routine -! ---------- - -! Author. -! ------- -! Mats Hamrud *ECMWF* - -! Modifications. -! -------------- -! Original : 00-03-03 -! R. El Khatib 03-01-24 LDMPOFF -! G. Mozdzynski 2006-09-13 LDEQ_REGIONS -! N. Wedi 2009-11-30 add radius -! R. El Khatib 09-Sep-2020 NSTACK_MEMORY_TR - -! ------------------------------------------------------------------ - -USE PARKIND1, ONLY: JPIM, JPRB, JPRD - -!ifndef INTERFACE - -USE TPM_GEN, ONLY: NERR, NOUT, LMPOFF, LSYNC_TRANS, NTRANS_SYNC_LEVEL, MSETUP0, & - & NMAX_RESOL, NPRINTLEV, NPROMATR, LALLOPERM -USE TPM_DISTR, ONLY: LEQ_REGIONS, NPRGPEW, NPRGPNS, NPRTRW, NPRTRV, MYSETV -USE TPM_CONSTANTS, ONLY: RA -USE MPL_MODULE, ONLY: MPL_MYRANK -USE SUMP_TRANS0_MOD, ONLY: SUMP_TRANS0 -USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS -USE EQ_REGIONS_MOD, ONLY: N_REGIONS, N_REGIONS_EW, N_REGIONS_NS -USE ECTRANS_VERSION_MOD, ONLY: ECTRANS_VERSION_STR, ECTRANS_GIT_SHA1 -USE EC_ENV_MOD, ONLY: EC_GETENV -#ifdef _OPENACC -USE OPENACC, ONLY: ACC_DEVICE_KIND, ACC_GET_DEVICE_TYPE, ACC_GET_NUM_DEVICES, & - & ACC_SET_DEVICE_NUM, ACC_GET_DEVICE_NUM, ACC_INIT -#endif - -!endif INTERFACE - -IMPLICIT NONE - -INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KOUT,KERR,KPRINTLEV,KMAX_RESOL,KPROMATR -INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KPRGPNS,KPRGPEW,KPRTRW,KCOMBFLEN -LOGICAL ,OPTIONAL,INTENT(IN) :: LDMPOFF -LOGICAL ,OPTIONAL,INTENT(IN) :: LDSYNC_TRANS -INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KTRANS_SYNC_LEVEL -LOGICAL ,OPTIONAL,INTENT(IN) :: LDEQ_REGIONS -LOGICAL ,OPTIONAL,INTENT(IN) :: LDALLOPERM -REAL(KIND=JPRD) ,OPTIONAL,INTENT(IN) :: PRAD -INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(IN) :: KOPT_MEMORY_TR -INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(OUT) :: K_REGIONS(:) -INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(OUT) :: K_REGIONS_NS -INTEGER(KIND=JPIM) ,OPTIONAL,INTENT(OUT) :: K_REGIONS_EW - -INTEGER(KIND=JPIM) :: MYPROC -INTEGER :: IDEVICE_NUM, IPROC_PERNODE -#ifdef _OPENACC -INTEGER(ACC_DEVICE_KIND) :: IDEVTYPE, IDEVICE_TYPE -#endif -INTEGER :: NUMDEVS, IERROR, MYGPU -CHARACTER(LEN=2) :: CL_NPROC_PERNODE - -!ifndef INTERFACE - -LOGICAL :: LLP1,LLP2 - -! ------------------------------------------------------------------ - -IF( LDMPOFF ) THEN - MYPROC = 1 -ELSE - MYPROC = MPL_MYRANK() -ENDIF - - -!!CALL GSTATS_LABEL_IFS() -#ifdef _OPENACC -IDEVTYPE=ACC_GET_DEVICE_TYPE() -NUMDEVS = ACC_GET_NUM_DEVICES(IDEVTYPE) -MYGPU = MOD(MYPROC-1,NUMDEVS) -CALL ACC_SET_DEVICE_NUM(MYGPU, IDEVTYPE) -MYGPU = ACC_GET_DEVICE_NUM(IDEVTYPE) -WRITE(NOUT,*) 'MYPROC:',MYPROC, 'GPU:', MYGPU, 'of ', NUMDEVS -#endif - -CL_NPROC_PERNODE=' ' -CALL EC_GETENV('NPROC_PERNODE',CL_NPROC_PERNODE) -IF( CL_NPROC_PERNODE /= ' ')THEN - READ(CL_NPROC_PERNODE,*) IPROC_PERNODE - IDEVICE_NUM=MOD(MYPROC-1,IPROC_PERNODE) - WRITE(0,'("TRANSFORM TEST: MYPROC=",I8," CL_NPROC_PERNODE=",A," IPROC_PERNODE=",I2,& - & " IDEVICE_NUM=",I2)') MYPROC,CL_NPROC_PERNODE,IPROC_PERNODE,IDEVICE_NUM - IDEVICE_TYPE=0 - !!CALL ACC_SET_DEVICE_NUM(IDEVICE_NUM,ACC_DEVICE_NVIDIA) - CALL ACC_SET_DEVICE_NUM(IDEVICE_NUM,IDEVTYPE) - !!CALL ACC_INIT(ACC_DEVICE_NVIDIA) - CALL ACC_INIT(IDEVTYPE) - !$OMP PARALLEL - !!CALL ACC_SET_DEVICE_NUM(IDEVICE_NUM,ACC_DEVICE_NVIDIA) - CALL ACC_SET_DEVICE_NUM(IDEVICE_NUM,IDEVTYPE) - !!CALL ACC_INIT(ACC_DEVICE_NVIDIA) - CALL ACC_INIT(IDEVTYPE) -!$OMP END PARALLEL -ENDIF - -IF(MSETUP0 /= 0) THEN -!gr CALL ABORT_TRANS('SETUP_TRANS0: SETUP_TRANS0 MAY ONLY BE CALLED ONCE') -ENDIF - -! Default values - -NOUT = 6 -NERR = 0 -NPRINTLEV = 0 -NMAX_RESOL = 1 -NPRGPNS = 1 -NPRGPEW = 1 -NPRTRW = 1 -N_REGIONS_NS=1 -N_REGIONS_EW=1 -NPROMATR = 0 -LMPOFF = .FALSE. -LSYNC_TRANS=.FALSE. -NTRANS_SYNC_LEVEL=0 -LEQ_REGIONS=.FALSE. -RA=6371229._JPRB -LALLOPERM=.FALSE. - -! Optional arguments - -IF(PRESENT(KOUT)) THEN - NOUT = KOUT -ENDIF -IF(PRESENT(KERR)) THEN - NERR = KERR -ENDIF -IF(PRESENT(KPRINTLEV)) THEN - NPRINTLEV = KPRINTLEV -ENDIF - -! Print ecTrans version information -WRITE(NOUT,'(A)') -WRITE(NOUT,'(A)') "ecTrans at version: " // ECTRANS_VERSION_STR() -WRITE(NOUT,'(A)') "commit: " // ECTRANS_GIT_SHA1() -WRITE(NOUT,'(A)') -WRITE(NOUT,'(A)') "GPU version, with following compile-time options : " -#ifdef ACCGPU - WRITE(NOUT,'(A)') " - OpenACC-based offload" -#else - WRITE(NOUT,'(A)') " - OpenMP-based offload" -#endif -#ifdef USE_GPU_AWARE_MPI - WRITE(NOUT,'(A)') " - GPU-aware MPI" -#endif -#ifdef USE_GRAPHS_GEMM - WRITE(NOUT,'(A)') " - graph-based GEMM scheduling" -#endif -#ifdef USE_CUTLASS - WRITE(NOUT,'(A)') " - Cutlass-based GEMM operations" -#endif -#ifdef USE_CUTLASS_3XTF32 - WRITE(NOUT,'(A)') " - tensor-core usage for 32b Cutlass operations" -#endif -WRITE(NOUT,'(A)') - -LLP1 = NPRINTLEV>0 -LLP2 = NPRINTLEV>1 -IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SETUP_TRANS0 ===' - -IF(PRESENT(KMAX_RESOL))THEN - NMAX_RESOL = KMAX_RESOL -ENDIF -IF(PRESENT(KPROMATR))THEN - IF(MOD(KPROMATR,2) /= 0) THEN - CALL ABORT_TRANS('SETUP_TRANS0: KPROMATR HAS TO BE MULTIPLE OF 2') - ENDIF - NPROMATR = KPROMATR -ENDIF -IF(PRESENT(KPRGPNS)) THEN - NPRGPNS = KPRGPNS -ENDIF -IF(PRESENT(KPRGPEW)) THEN - NPRGPEW = KPRGPEW -ENDIF -IF(PRESENT(KPRTRW)) THEN - NPRTRW = KPRTRW -ENDIF -IF(PRESENT(KCOMBFLEN)) THEN - WRITE(NOUT,'(A)') - WRITE(NOUT,'(A)') '*** WARNING ***' - WRITE(NOUT,'(A)') 'KCOMBFLEN argument passed to SETUP_TRANS0 is deprecated' - WRITE(NOUT,'(A)') -ENDIF -IF(PRESENT(LDMPOFF)) THEN - LMPOFF = LDMPOFF -ENDIF -IF(PRESENT(LDSYNC_TRANS)) THEN - LSYNC_TRANS = LDSYNC_TRANS -ENDIF -IF(PRESENT(KTRANS_SYNC_LEVEL)) THEN - NTRANS_SYNC_LEVEL = KTRANS_SYNC_LEVEL -ENDIF -IF(PRESENT(LDEQ_REGIONS)) THEN - LEQ_REGIONS = LDEQ_REGIONS -ENDIF -IF(PRESENT(KOPT_MEMORY_TR)) THEN - WRITE(NOUT,'(A)') - WRITE(NOUT,'(A)') '*** WARNING ***' - WRITE(NOUT,'(A)') 'KOPT_MEMORY_TR argument passed to SETUP_TRANS0 will be ignored' - WRITE(NOUT,'(A)') 'This option only applies to the CPU version of ecTrans' - WRITE(NOUT,'(A)') -ENDIF - -! Initial setup -CALL SUMP_TRANS0 - -IF(PRESENT(K_REGIONS_NS)) THEN - K_REGIONS_NS = N_REGIONS_NS -ENDIF - -IF(PRESENT(K_REGIONS_EW)) THEN - K_REGIONS_EW = N_REGIONS_EW -ENDIF - -IF(PRESENT(K_REGIONS)) THEN - IF(UBOUND(K_REGIONS,1) < N_REGIONS_NS) THEN - CALL ABORT_TRANS('SETUP_TRANS0: K_REGIONS TOO SMALL') - ELSE - K_REGIONS(1:N_REGIONS_NS)=N_REGIONS(1:N_REGIONS_NS) - ENDIF -ENDIF - -IF(PRESENT(PRAD)) THEN - RA=PRAD -ENDIF - -IF(PRESENT(LDALLOPERM)) THEN - LALLOPERM=LDALLOPERM -ENDIF - -! Setup level 0 complete -MSETUP0 = 1 - -! ------------------------------------------------------------------ - -!endif INTERFACE - -END SUBROUTINE SETUP_TRANS0 - - diff --git a/src/trans/gpu/internal/abort_trans_mod.F90 b/src/trans/gpu/internal/abort_trans_mod.F90 deleted file mode 100755 index 2fe9e7830..000000000 --- a/src/trans/gpu/internal/abort_trans_mod.F90 +++ /dev/null @@ -1,39 +0,0 @@ -! (C) Copyright 2000- ECMWF. -! (C) Copyright 2000- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE ABORT_TRANS_MOD -CONTAINS -SUBROUTINE ABORT_TRANS(CDTEXT) - -USE TPM_GEN, ONLY: NOUT,NERR -USE TPM_DISTR, ONLY: NPROC,MYPROC -USE MPL_MODULE, ONLY: MPL_ABORT -USE SDL_MOD, ONLY: SDL_TRACEBACK, SDL_SRLABORT - -IMPLICIT NONE - - -CHARACTER(LEN=*),INTENT(IN) :: CDTEXT - -WRITE(NOUT,'(1X,A)') 'ABORT_TRANS CALLED' - -WRITE(NOUT,'(1X,A)') CDTEXT -WRITE(NERR,'(1X,A,1X,I3,1X,A)') 'ABORT! ',MYPROC,CDTEXT -CLOSE(NOUT) -IF (NPROC > 1) THEN - CALL MPL_ABORT(CDTEXT) -ELSE - CALL SDL_TRACEBACK - CALL FLUSH(0) - CALL SDL_SRLABORT -ENDIF - -END SUBROUTINE ABORT_TRANS -END MODULE ABORT_TRANS_MOD diff --git a/src/trans/gpu/internal/cpledn_mod.F90 b/src/trans/gpu/internal/cpledn_mod.F90 deleted file mode 100755 index e0f054782..000000000 --- a/src/trans/gpu/internal/cpledn_mod.F90 +++ /dev/null @@ -1,134 +0,0 @@ -! (C) Copyright 1987- ECMWF. -! (C) Copyright 1987- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE CPLEDN_MOD -CONTAINS -SUBROUTINE CPLEDN(KN,KODD,PFN,PX,KFLAG,PW,PXN,PXMOD) - -!**** *CPLEDN* - Routine to perform a single Newton iteration step to find -! the zero of the ordinary Legendre polynomial of degree N - -! Purpose. -! -------- - -!** Interface. -! ---------- -! *CALL* *CPLEDN(KN,KDBLE,PX,KFLAG,PW,PXN,PXMOD)* - -! Explicit arguments : -! -------------------- -! KN : Degree of the Legendre polynomial (in) -! KODD : odd or even number of latitudes (in) -! PFN : Fourier coefficients of series expansion (in) -! for the ordinary Legendre polynomials -! PX : abcissa where the computations are performed (in) -! KFLAG : When KFLAG.EQ.1 computes the weights (in) -! PW : Weight of the quadrature at PXN (out) -! PXN : new abscissa (Newton iteration) (out) -! PXMOD : PXN-PX (out) - -! Implicit arguments : -! -------------------- -! None - -! Method. -! ------- -! See documentation - -! Externals. -! ---------- -! None - -! Reference. -! ---------- -! ECMWF Research Department documentation of the IFS - -! Author. -! ------- -! Mats Hamrud and Philippe Courtier *ECMWF* - -! Modifications. -! -------------- -! Original : 87-10-15 -! Michel Rochas, 90-08-30 (Lobatto+cleaning) -! K. Yessad (Sep 2008): cleaning, improve comments. -! Nils Wedi + Mats Hamrud, 2009-02-05 revised following Swarztrauber, 2002 -! F. Vana 05-Mar-2015 Support for single precision -! ------------------------------------------------------------------ - -USE EC_PARKIND, ONLY: JPRD, JPIM - -! ------------------------------------------------------------------ - -IMPLICIT NONE - -INTEGER(KIND=JPIM),INTENT(IN) :: KN -INTEGER(KIND=JPIM),INTENT(IN) :: KODD -REAL(KIND=JPRD),INTENT(IN) :: PFN(0:KN/2) -REAL(KIND=JPRD),INTENT(IN) :: PX -INTEGER(KIND=JPIM),INTENT(IN) :: KFLAG -REAL(KIND=JPRD),INTENT(OUT) :: PW -REAL(KIND=JPRD),INTENT(INOUT) :: PXN -REAL(KIND=JPRD),INTENT(OUT) :: PXMOD - -! ------------------------------------------------------------------ - -REAL(KIND=JPRD) :: ZDLX,ZDLK,ZDLLDN,ZDLXN,ZDLMOD - -INTEGER(KIND=JPIM), PARAMETER :: JPKD=KIND(PX) - -INTEGER(KIND=JPIM) :: JN, IK - -! ----------------------------------------------------------------- - -!* 1. NEWTON ITERATION STEP. -! ---------------------- - -ZDLX = PX - -ZDLK = 0.0_JPRD -IF( KODD==0 ) ZDLK=0.5_JPRD*PFN(0) -ZDLXN = 0.0_JPRD -ZDLLDN = 0.0_JPRD -IK=1 - -IF(KFLAG == 0)THEN - DO JN=2-KODD,KN,2 - ! normalised ordinary Legendre polynomial == \overbar{P_n}^0 - ZDLK = ZDLK + PFN(IK)*COS(REAL(JN,JPKD)*ZDLX) - ! normalised derivative == d/d\theta(\overbar{P_n}^0) - ZDLLDN = ZDLLDN - PFN(IK)*REAL(JN,JPKD)*SIN(REAL(JN,JPKD)*ZDLX) - IK=IK+1 - ENDDO - ! Newton method - ZDLMOD = -ZDLK/ZDLLDN - ZDLXN = ZDLX+ZDLMOD - PXN = ZDLXN - PXMOD = ZDLMOD -ENDIF - -! ------------------------------------------------------------------ - -!* 2. Computes weight. -! ---------------- - -IF(KFLAG == 1)THEN - DO JN=2-KODD,KN,2 - ! normalised derivative - ZDLLDN = ZDLLDN - PFN(IK)*REAL(JN,JPKD)*SIN(REAL(JN,JPKD)*ZDLX) - IK=IK+1 - ENDDO - PW = REAL(2*KN+1,JPKD)/ZDLLDN**2 -ENDIF - -! ------------------------------------------------------------------ - -END SUBROUTINE CPLEDN -END MODULE CPLEDN_MOD diff --git a/src/trans/gpu/internal/ectrans_version_mod.F90.in b/src/trans/gpu/internal/ectrans_version_mod.F90.in deleted file mode 100644 index 88cae2da2..000000000 --- a/src/trans/gpu/internal/ectrans_version_mod.F90.in +++ /dev/null @@ -1,47 +0,0 @@ -! (C) Copyright 2023- ECMWF. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE ECTRANS_VERSION_MOD - -IMPLICIT NONE - -CONTAINS - - FUNCTION ECTRANS_VERSION_STR() - - !**** *ECTRANS_VERSION_STR* - Return ecTrans version as a string - - CHARACTER(LEN=LEN("@ectrans_VERSION_STR@")) :: ECTRANS_VERSION_STR - - ECTRANS_VERSION_STR = "@ectrans_VERSION_STR@" - END FUNCTION ECTRANS_VERSION_STR - - FUNCTION ECTRANS_VERSION_INT() - - !**** *ECTRANS_VERSION_INT* - Return ecTrans version as an integer - - USE PARKIND1 ,ONLY : JPIM - - INTEGER(KIND=JPIM) :: ECTRANS_VERSION_INT - - ECTRANS_VERSION_INT = 10000_JPIM * @ectrans_VERSION_MAJOR@ & - & + 100_JPIM * @ectrans_VERSION_MINOR@ & - & + 10_JPIM * @ectrans_VERSION_PATCH@ - END FUNCTION ECTRANS_VERSION_INT - - FUNCTION ECTRANS_GIT_SHA1() - - !**** *ECTRANS_GIT_SHA1* - Return the SHA-1 hash of the latest Git commit - - CHARACTER(LEN=LEN("@ectrans_GIT_SHA1@")) :: ECTRANS_GIT_SHA1 - - ECTRANS_GIT_SHA1 = "@ectrans_GIT_SHA1@" - END FUNCTION ECTRANS_GIT_SHA1 - -END MODULE ECTRANS_VERSION_MOD diff --git a/src/trans/gpu/internal/eq_regions_mod.F90 b/src/trans/gpu/internal/eq_regions_mod.F90 deleted file mode 100755 index 78e2af964..000000000 --- a/src/trans/gpu/internal/eq_regions_mod.F90 +++ /dev/null @@ -1,433 +0,0 @@ -! (C) Copyright 2006- ECMWF. -! (C) Copyright 2006- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE eq_regions_mod -! -! Purpose. -! -------- -! eq_regions_mod provides the code to perform a high level -! partitioning of the surface of a sphere into regions of -! equal area and small diameter. -! the type. -! -! Background. -! ----------- -! This Fortran version of eq_regions is a much cut down version of the -! "Recursive Zonal Equal Area (EQ) Sphere Partitioning Toolbox" of the -! same name developed by Paul Leopardi at the University of New South Wales. -! This version has been coded specifically for the case of partitioning the -! surface of a sphere or S^dim (where dim=2) as denoted in the original code. -! Only a subset of the original eq_regions package has been coded to determine -! the high level distribution of regions on a sphere, as the detailed -! distribution of grid points to each region is left to IFS software. -! This is required to take into account the spatial distribution of grid -! points in an IFS gaussian grid and provide an optimal (i.e. exact) -! distribution of grid points over regions. -! -! The following copyright notice for the eq_regions package is included from -! the original MatLab release. -! -! +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -! + Release 1.10 2005-06-26 + -! + + -! + Copyright (c) 2004, 2005, University of New South Wales + -! + + -! + Permission is hereby granted, free of charge, to any person obtaining + -! + a copy of this software and associated documentation files (the + -! + "Software"), to deal in the Software without restriction, including + -! + without limitation the rights to use, copy, modify, merge, publish, + -! + distribute, sublicense, and/or sell copies of the Software, and to + -! + permit persons to whom the Software is furnished to do so, subject to + -! + the following conditions: + -! + + -! + The above copyright notice and this permission notice shall be included + -! + in all copies or substantial portions of the Software. + -! + + -! + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + -! + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + -! + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + -! + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + -! + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + -! + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + -! + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + -! + + -! +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -! -! Author. -! ------- -! George Mozdzynski *ECMWF* -! -! Modifications. -! -------------- -! Original : 2006-04-15 -! -!-------------------------------------------------------------------------------- -! -USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT - -IMPLICIT NONE - -SAVE - -PRIVATE - -PUBLIC eq_regions,l_regions_debug,n_regions_ns,n_regions_ew,n_regions,my_region_ns,my_region_ew -PUBLIC eq_regions_t, eq_regions_save, eq_regions_load, eq_regions_free - -real(kind=JPRBT) :: pi - -type eq_regions_t -logical :: l_regions_debug=.false. -integer(kind=jpim) :: n_regions_ns -integer(kind=jpim) :: n_regions_ew -integer(kind=jpim) :: my_region_ns -integer(kind=jpim) :: my_region_ew -integer(kind=jpim),pointer :: n_regions(:) => null () -end type eq_regions_t - -logical :: l_regions_debug=.false. -integer(kind=jpim) :: n_regions_ns -integer(kind=jpim) :: n_regions_ew -integer(kind=jpim) :: my_region_ns -integer(kind=jpim) :: my_region_ew -integer(kind=jpim),pointer :: n_regions(:) => null () - -CONTAINS - -subroutine eq_regions_save (yder) -type (eq_regions_t), intent (inout) :: yder - -yder%l_regions_debug = l_regions_debug -yder%n_regions_ns = n_regions_ns -yder%n_regions_ew = n_regions_ew -yder%my_region_ns = my_region_ns -yder%my_region_ew = my_region_ew -yder%n_regions => n_regions - -nullify (n_regions) - -end subroutine - -subroutine eq_regions_load (yder) -type (eq_regions_t), intent (inout) :: yder - -l_regions_debug = yder%l_regions_debug -n_regions_ns = yder%n_regions_ns -n_regions_ew = yder%n_regions_ew -my_region_ns = yder%my_region_ns -my_region_ew = yder%my_region_ew -n_regions => yder%n_regions - -nullify (yder%n_regions) - -end subroutine - -subroutine eq_regions_free (yder) -type (eq_regions_t), intent (inout) :: yder - -if (associated (yder%n_regions)) then - deallocate (yder%n_regions) - nullify (yder%n_regions) -endif - -end subroutine - -subroutine eq_regions(N) -! -! eq_regions uses the zonal equal area sphere partitioning algorithm to partition -! the surface of a sphere into N regions of equal area and small diameter. -! -IMPLICIT NONE -integer(kind=jpim),intent(in) :: N -integer(kind=jpim) :: n_collars,j -real(kind=JPRBT),allocatable :: r_regions(:) -real(kind=JPRBT) :: c_polar - -pi=2.0_JPRBT*asin(1.0_JPRBT) - -n_regions(:)=0 - -if( N == 1 )then - - ! - ! We have only one region, which must be the whole sphere. - ! - n_regions(1)=1 - n_regions_ns=1 - -else - - ! - ! Given N, determine c_polar - ! the colatitude of the North polar spherical cap. - ! - c_polar = polar_colat(N) - ! - ! Given N, determine the ideal angle for spherical collars. - ! Based on N, this ideal angle, and c_polar, - ! determine n_collars, the number of collars between the polar caps. - ! - n_collars = num_collars(N,c_polar,ideal_collar_angle(N)) - n_regions_ns=n_collars+2 - ! - ! Given N, c_polar and n_collars, determine r_regions, - ! a list of the ideal real number of regions in each collar, - ! plus the polar caps. - ! The number of elements is n_collars+2. - ! r_regions[1] is 1. - ! r_regions[n_collars+2] is 1. - ! The sum of r_regions is N. - allocate(r_regions(n_collars+2)) - call ideal_region_list(N,c_polar,n_collars,r_regions) - ! - ! Given N and r_regions, determine n_regions, a list of the natural number - ! of regions in each collar and the polar caps. - ! This list is as close as possible to r_regions. - ! The number of elements is n_collars+2. - ! n_regions[1] is 1. - ! n_regions[n_collars+2] is 1. - ! The sum of n_regions is N. - ! - call round_to_naturals(N,n_collars,r_regions) - deallocate(r_regions) - if( N /= sum(n_regions(:)) )then - write(*,'("eq_regions: N=",I10," sum(n_regions(:))=",I10)')N,sum(n_regions(:)) - call abor1('eq_regions: N /= sum(n_regions)') - endif - -endif - -if( l_regions_debug )then - write(*,'("eq_regions: N=",I6," n_regions_ns=",I4)') N,n_regions_ns - do j=1,n_regions_ns - write(*,'("eq_regions: n_regions(",I4,")=",I4)') j,n_regions(j) - enddo -endif -n_regions_ew=maxval(n_regions(:)) - -return -end subroutine eq_regions - -function num_collars(N,c_polar,a_ideal) result(num_c) -! -!NUM_COLLARS The number of collars between the polar caps -! -! Given N, an ideal angle, and c_polar, -! determine n_collars, the number of collars between the polar caps. -! -IMPLICIT NONE -integer(kind=jpim),intent(in) :: N -real(kind=JPRBT),intent(in) :: a_ideal,c_polar -integer(kind=jpim) :: num_c -logical enough -enough = (N > 2) .and. (a_ideal > 0) -if( enough )then - num_c = max(1,nint((pi-2.*c_polar)/a_ideal)) -else - num_c = 0 -endif -return -end function num_collars - -subroutine ideal_region_list(N,c_polar,n_collars,r_regions) -! -!IDEAL_REGION_LIST The ideal real number of regions in each zone -! -! List the ideal real number of regions in each collar, plus the polar caps. -! -! Given N, c_polar and n_collars, determine r_regions, a list of the ideal real -! number of regions in each collar, plus the polar caps. -! The number of elements is n_collars+2. -! r_regions[1] is 1. -! r_regions[n_collars+2] is 1. -! The sum of r_regions is N. -! -IMPLICIT NONE -integer(kind=jpim),intent(in) :: N,n_collars -real(kind=JPRBT),intent(in) :: c_polar -real(kind=JPRBT),intent(out) :: r_regions(n_collars+2) -integer(kind=jpim) :: collar_n -real(kind=JPRBT) :: ideal_region_area,ideal_collar_area -real(kind=JPRBT) :: a_fitting -r_regions(:)=0.0_JPRBT -r_regions(1) = 1.0_JPRBT -if( n_collars > 0 )then - ! - ! Based on n_collars and c_polar, determine a_fitting, - ! the collar angle such that n_collars collars fit between the polar caps. - ! - a_fitting = (pi-2.0_JPRBT*c_polar)/float(n_collars) - ideal_region_area = area_of_ideal_region(N) - do collar_n=1,n_collars - ideal_collar_area = area_of_collar(c_polar+(collar_n-1)*a_fitting, & - & c_polar+collar_n*a_fitting) - r_regions(1+collar_n) = ideal_collar_area / ideal_region_area - enddo -endif -r_regions(2+n_collars) = 1. -return -end subroutine ideal_region_list - -function ideal_collar_angle(N) result(ideal) -! -! IDEAL_COLLAR_ANGLE The ideal angle for spherical collars of an EQ partition -! -! IDEAL_COLLAR_ANGLE(N) sets ANGLE to the ideal angle for the -! spherical collars of an EQ partition of the unit sphere S^2 into N regions. -! -IMPLICIT NONE -integer(kind=jpim),intent(in) :: N -real(kind=JPRBT) :: ideal -ideal = area_of_ideal_region(N)**(0.5_JPRBT) -return -end function ideal_collar_angle - -subroutine round_to_naturals(N,n_collars,r_regions) -! -! ROUND_TO_NATURALS Round off a given list of numbers of regions -! -! Given N and r_regions, determine n_regions, a list of the natural number -! of regions in each collar and the polar caps. -! This list is as close as possible to r_regions, using rounding. -! The number of elements is n_collars+2. -! n_regions[1] is 1. -! n_regions[n_collars+2] is 1. -! The sum of n_regions is N. -! -IMPLICIT NONE -integer(kind=jpim),intent(in) :: N,n_collars -real(kind=JPRBT),intent(in) :: r_regions(n_collars+2) -integer(kind=jpim) :: zone_n -real(kind=JPRBT) :: discrepancy -n_regions(1:n_collars+2) = r_regions(:) -discrepancy = 0.0_JPRBT -do zone_n = 1,n_collars+2 - n_regions(zone_n) = nint(r_regions(zone_n)+discrepancy); - discrepancy = discrepancy+r_regions(zone_n)-float(n_regions(zone_n)); -enddo -return -end subroutine round_to_naturals - -function polar_colat(N) result(polar_c) -! -! Given N, determine the colatitude of the North polar spherical cap. -! -IMPLICIT NONE -integer(kind=jpim),intent(in) :: N -real(kind=JPRBT) :: area -real(kind=JPRBT) :: polar_c -if( N == 1 ) polar_c=pi -if( N == 2 ) polar_c=pi/2.0_JPRBT -if( N > 2 )then - area=area_of_ideal_region(N) - polar_c=sradius_of_cap(area) -endif -return -end function polar_colat - -function area_of_ideal_region(N) result(area) -! -! AREA_OF_IDEAL_REGION(N) sets AREA to be the area of one of N equal -! area regions on S^2, that is 1/N times AREA_OF_SPHERE. -! -IMPLICIT NONE -integer(kind=jpim),intent(in) :: N -real(kind=JPRBT) :: area_of_sphere -real(kind=JPRBT) :: area -area_of_sphere = (2.0_JPRBT*pi**1.5_JPRBT/gamma(1.5_JPRBT)) -area = area_of_sphere/float(N) -return -end function area_of_ideal_region - -function sradius_of_cap(area) result(sradius) -! -! SRADIUS_OF_CAP(AREA) returns the spherical radius of -! an S^2 spherical cap of area AREA. -! -IMPLICIT NONE -real(kind=JPRBT),intent(in) :: area -real(kind=JPRBT) :: sradius -sradius = 2.0_JPRBT*asin(sqrt(area/pi)/2.0_JPRBT) -return -end function sradius_of_cap - -function area_of_collar(a_top, a_bot) result(area) -! -! AREA_OF_COLLAR Area of spherical collar -! -! AREA_OF_COLLAR(A_TOP, A_BOT) sets AREA to be the area of an S^2 spherical -! collar specified by A_TOP, A_BOT, where A_TOP is top (smaller) spherical radius, -! A_BOT is bottom (larger) spherical radius. -! -IMPLICIT NONE -real(kind=JPRBT),intent(in) :: a_top,a_bot -real(kind=JPRBT) area -area = area_of_cap(a_bot) - area_of_cap(a_top) -return -end function area_of_collar - -function area_of_cap(s_cap) result(area) -! -! AREA_OF_CAP Area of spherical cap -! -! AREA_OF_CAP(S_CAP) sets AREA to be the area of an S^2 spherical -! cap of spherical radius S_CAP. -! -real(kind=JPRBT),intent(in) :: s_cap -real(kind=JPRBT) area -area = 4.0_JPRBT*pi * sin(s_cap/2.0_JPRBT)**2 -return -end function area_of_cap - -function gamma(x) result(gamma_res) -! -IMPLICIT NONE -real(kind=JPRBT),intent(in) :: x -real(kind=JPRBT) :: gamma_res -real(kind=JPRBT) :: p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13 -real(kind=JPRBT) :: w,y -integer(kind=jpim) :: k,n -parameter (& -& p0 = 0.999999999999999990e+00_JPRBT,& -& p1 = -0.422784335098466784e+00_JPRBT,& -& p2 = -0.233093736421782878e+00_JPRBT,& -& p3 = 0.191091101387638410e+00_JPRBT,& -& p4 = -0.024552490005641278e+00_JPRBT,& -& p5 = -0.017645244547851414e+00_JPRBT,& -& p6 = 0.008023273027855346e+00_JPRBT) -parameter (& -& p7 = -0.000804329819255744e+00_JPRBT,& -& p8 = -0.000360837876648255e+00_JPRBT,& -& p9 = 0.000145596568617526e+00_JPRBT,& -& p10 = -0.000017545539395205e+00_JPRBT,& -& p11 = -0.000002591225267689e+00_JPRBT,& -& p12 = 0.000001337767384067e+00_JPRBT,& -& p13 = -0.000000199542863674e+00_JPRBT) -n = nint(x - 2) -w = x - (n + 2) -y = ((((((((((((p13 * w + p12) * w + p11) * w + p10) *& -& w + p9) * w + p8) * w + p7) * w + p6) * w + p5) *& -& w + p4) * w + p3) * w + p2) * w + p1) * w + p0 -if (n .gt. 0) then - w = x - 1 - do k = 2, n - w = w * (x - k) - end do -else - w = 1 - do k = 0, -n - 1 - y = y * (x + k) - end do -end if -gamma_res = w / y -return -end function gamma - -END MODULE eq_regions_mod diff --git a/src/trans/gpu/internal/field_split_mod.F90 b/src/trans/gpu/internal/field_split_mod.F90 deleted file mode 100755 index cb9664782..000000000 --- a/src/trans/gpu/internal/field_split_mod.F90 +++ /dev/null @@ -1,138 +0,0 @@ -! (C) Copyright 2001- ECMWF. -! (C) Copyright 2001- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE FIELD_SPLIT_MOD -CONTAINS -SUBROUTINE FIELD_SPLIT(KBLK,KF_GP,KKF_UV_G,KVSETUV,KVSETSC,& - & KSTUV_G,KENUV_G,KF_UV_G,KSTSC_G,KENSC_G,KF_SCALARS_G,& - & KSTUV,KENUV,KF_UV,KSTSC,KENSC,KF_SCALARS) - -!**** *FIELD_SPLIT* - Split fields - -! Purpose. -! -------- -! Split fields - -!** Interface. -! ---------- -! CALL FIELD_SPLIT(...) - -! Explicit arguments : -! -------------------- -! KBLK - block number -! KF_GP - total number of output gridpoint fields -! KKF_UV_G - global number of spectral u-v fields -! KVSETUV - IVSETUV from SHUFFLE -! KVSETSC - IVSETUV from SHUFFLE - -! All the following output arguments are quantities for THIS packet. -! KSTUV_G - -! KENUV_G - -! KF_UV_G - -! KSTSC_G - -! KENSC_G - -! KF_SCALARS_G - -! KSTUV - -! KENUV - -! KF_UV - -! KSTSC - -! KENSC - -! KF_SCALARS - - -! Externals. NONE -! ---------- - -! Author. -! ------- -! Mats Hamrud *ECMWF* - -! Modifications. -! -------------- -! Original : 01-01-03 - -! ------------------------------------------------------------------ -USE PARKIND1, ONLY: JPIM -USE TPM_GEN, ONLY: NPROMATR -USE TPM_DISTR, ONLY: MYSETV, NPRTRV -! - -IMPLICIT NONE - -! Declaration of arguments - -INTEGER(KIND=JPIM), INTENT(IN) :: KBLK,KF_GP,KKF_UV_G -INTEGER(KIND=JPIM), INTENT(IN) :: KVSETUV(:),KVSETSC(:) -INTEGER(KIND=JPIM), INTENT(OUT) :: KSTUV_G,KENUV_G,KF_UV_G,KSTSC_G,KENSC_G,KF_SCALARS_G -INTEGER(KIND=JPIM), INTENT(OUT) :: KSTUV,KENUV,KF_UV,KSTSC,KENSC,KF_SCALARS - -! Local variables - -INTEGER(KIND=JPIM) :: ISTF,IENF,J - -! ------------------------------------------------------------------ - -ISTF = (KBLK-1)*NPROMATR+1 -IENF = MIN(KBLK*NPROMATR,KF_GP) - -KSTUV_G = (KBLK-1)*NPROMATR/2+1 -KENUV_G = MIN(KBLK*NPROMATR/2,KKF_UV_G) -IF(ISTF > 2*KKF_UV_G) KSTUV_G = KENUV_G+1 -KF_UV_G = KENUV_G-KSTUV_G+1 -KSTSC_G = MAX(ISTF-2*KKF_UV_G,1) -KENSC_G = MAX(IENF-2*KKF_UV_G,0) -KF_SCALARS_G = KENSC_G-KSTSC_G+1 - -! Spectral fields distributed over fields - -IF(NPRTRV > 1) THEN - KF_UV = 0 - KSTUV = 1 - DO J=1,KSTUV_G-1 - IF(KVSETUV(J) == MYSETV) THEN - KSTUV = KSTUV+1 - ENDIF - ENDDO - KENUV = KSTUV-1 - DO J=KSTUV_G,KENUV_G - IF(KVSETUV(J) == MYSETV) THEN - KF_UV = KF_UV+1 - KENUV = KENUV+1 - ENDIF - ENDDO - KF_SCALARS = 0 - KSTSC = 1 - DO J=1,KSTSC_G-1 - IF(KVSETSC(J) == MYSETV) THEN - KSTSC =KSTSC+1 - ENDIF - ENDDO - KENSC = KSTSC-1 - DO J=KSTSC_G,KENSC_G - IF(KVSETSC(J) == MYSETV) THEN - KF_SCALARS = KF_SCALARS+1 - KENSC = KENSC+1 - ENDIF - ENDDO -ELSE - - ! Spectral fields not distributed over fields - - KF_UV = KF_UV_G - KSTUV = KSTUV_G - KENUV = KENUV_G - KF_SCALARS = KF_SCALARS_G - KSTSC = KSTSC_G - KENSC = KENSC_G -ENDIF - -! ------------------------------------------------------------------ - -END SUBROUTINE FIELD_SPLIT -END MODULE FIELD_SPLIT_MOD diff --git a/src/trans/gpu/internal/gawl_mod.F90 b/src/trans/gpu/internal/gawl_mod.F90 deleted file mode 100755 index a236638f4..000000000 --- a/src/trans/gpu/internal/gawl_mod.F90 +++ /dev/null @@ -1,117 +0,0 @@ -! (C) Copyright 1992- ECMWF. -! (C) Copyright 1992- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE GAWL_MOD -CONTAINS -SUBROUTINE GAWL(PFN,PL,PW,PEPS,KN,KITER,PMOD) - -!**** *GAWL * - Routine to perform the Newton loop - -! Purpose. -! -------- -! Find 0 of Legendre polynomial with Newton loop -!** Interface. -! ---------- -! *CALL* *GAWL(PFN,PL,PW,PEPS,KN,KITER,PMOD) - -! Explicit arguments : -! -------------------- -! PFN Fourier coefficients of series expansion -! for the ordinary Legendre polynomials (in) -! PL Gaussian latitude (inout) -! PW Gaussian weight (out) -! PEPS 0 of the machine (in) -! KN Truncation (in) -! KITER Number of iterations (out) -! PMOD Last modification (inout) - -! Implicit arguments : -! -------------------- -! None - -! Method. -! ------- -! Newton Loop. - -! Externals. -! ---------- -! CPLEDN - -! Reference. -! ---------- - -! ARPEGE Documentation vol.2, ch3. - -! Author. -! ------- -! Philippe Courtier *ECMWF* - -! Modifications. -! -------------- -! Original : 92-12-18 -! K. Yessad (Sep 2008): cleaning, improve comments. -! Nils Wedi + Mats Hamrud, 2009-02-05 revised following Swarztrauber, 2002 -! F. Vana 05-Mar-2015 Support for single precision -! ------------------------------------------------------------------ - -USE EC_PARKIND, ONLY: JPRD, JPIM -USE CPLEDN_MOD, ONLY: CPLEDN - -! ------------------------------------------------------------------ - -IMPLICIT NONE - -INTEGER(KIND=JPIM),INTENT(IN) :: KN -REAL(KIND=JPRD),INTENT(IN) :: PFN(0:KN/2) -REAL(KIND=JPRD),INTENT(INOUT) :: PL -REAL(KIND=JPRD),INTENT(OUT) :: PW -REAL(KIND=JPRD),INTENT(IN) :: PEPS -INTEGER(KIND=JPIM),INTENT(OUT) :: KITER -REAL(KIND=JPRD),INTENT(INOUT) :: PMOD - -! ------------------------------------------------------------------ - - -INTEGER(KIND=JPIM) :: IFLAG, ITEMAX, JTER, IODD -REAL(KIND=JPRD) :: ZW, ZX, ZXN - -! ------------------------------------------------------------------ - -!* 1. Initialization. -! --------------- - -ITEMAX = 20 -ZX = PL -IFLAG = 0 -IODD=MOD(KN,2) - -! ------------------------------------------------------------------ - -!* 2. Newton iteration. -! ----------------- - -DO JTER=1,ITEMAX+1 - KITER = JTER - CALL CPLEDN(KN,IODD,PFN,ZX,IFLAG,ZW,ZXN,PMOD) - ZX = ZXN - - IF(IFLAG == 1) EXIT - IF(ABS(PMOD) <= PEPS*1000._JPRD) IFLAG = 1 -ENDDO - -PL = ZXN -PW = ZW - -! ------------------------------------------------------------------ - -END SUBROUTINE GAWL -END MODULE GAWL_MOD - - diff --git a/src/trans/gpu/internal/myrecvset_mod.F90 b/src/trans/gpu/internal/myrecvset_mod.F90 deleted file mode 100755 index fd01109c5..000000000 --- a/src/trans/gpu/internal/myrecvset_mod.F90 +++ /dev/null @@ -1,83 +0,0 @@ -! (C) Copyright 2000- ECMWF. -! (C) Copyright 2000- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE MYRECVSET_MOD -CONTAINS -FUNCTION MYRECVSET(KSETS,KMYSET,KSET) - - -!**** *MYRECVSET* RETURNS SET NUMBER TO SEND TO - -! Purpose. -! -------- -! - -!** Interface. -! ---------- -! ISENDSET = MYRECVSET(KSETS,KMYSET,KSET) - -! Explicit arguments : -! -------------------- -! input: KSETS - -! Implicit arguments : NONE -! -------------------- -! Method. -! ------- - -! - -! Externals. -! ---------- -! NONE - -! Reference. -! ---------- -! ECMWF Research Department documentation of the IFS - -! Author. -! ------- -! Mats Hamrud *ECMWF* - -! Modifications. -! -------------- -! Original : 00-02-03 - -! ------------------------------------------------------------------ - -USE PARKIND1, ONLY: JPIM -USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS -! - -IMPLICIT NONE -INTEGER(KIND=JPIM) :: MYRECVSET -INTEGER(KIND=JPIM),INTENT(IN) :: KSETS,KMYSET,KSET - - -! ------------------------------------------------------------------ - -!* 1. Check input argument for validity -! --------------------------------- - -IF(KSETS < 1 .OR. KMYSET > KSETS .OR. KSET > KSETS-1) THEN - - CALL ABORT_TRANS(' MYRECVSET: INVALID ARGUMENT ') - -ELSE - -!* 2. Compute output parameters -! ------------------------- - - MYRECVSET = MOD(-KSET-1+KMYSET+KSETS,KSETS)+1 - -ENDIF - -END FUNCTION MYRECVSET -END MODULE MYRECVSET_MOD diff --git a/src/trans/gpu/internal/mysendset_mod.F90 b/src/trans/gpu/internal/mysendset_mod.F90 deleted file mode 100755 index 59be163b6..000000000 --- a/src/trans/gpu/internal/mysendset_mod.F90 +++ /dev/null @@ -1,80 +0,0 @@ -! (C) Copyright 2000- ECMWF. -! (C) Copyright 2000- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE MYSENDSET_MOD -CONTAINS -FUNCTION MYSENDSET(KSETS,KMYSET,KSET) - - -!**** *MYSENDSET* RETURNS SET NUMBER TO SEND TO - -! Purpose. -! -------- -! - -!** Interface. -! ---------- -! ISENDSET = MYSENDSET(KSETS,KMYSET,KSET) - -! Explicit arguments : -! -------------------- -! input: KSETS - -! Implicit arguments : NONE -! -------------------- -! Method. -! ------- - -! Externals. -! ---------- -! NONE - -! Reference. -! ---------- -! ECMWF Research Department documentation of the IFS - -! Author. -! ------- -! Mats Hamrud *ECMWF* - -! Modifications. -! -------------- -! Original : 00-02-03 - -! ------------------------------------------------------------------ - -USE PARKIND1, ONLY: JPIM -USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS - -IMPLICIT NONE -INTEGER(KIND=JPIM) :: MYSENDSET -INTEGER(KIND=JPIM),INTENT(IN) :: KSETS,KMYSET,KSET - - -! ------------------------------------------------------------------ - -!* 1. Check input argument for validity -! --------------------------------- - -IF(KSETS < 1 .OR. KMYSET > KSETS .OR. KSET > KSETS-1) THEN - - CALL ABORT_TRANS(' MYSENDSET: INVALID ARGUMENT ') - -ELSE - -!* 2. Compute output parameters -! ------------------------- - - MYSENDSET = MOD(KMYSET+KSET-1,KSETS)+1 - -ENDIF - -END FUNCTION MYSENDSET -END MODULE MYSENDSET_MOD diff --git a/src/trans/gpu/internal/pe2set_mod.F90 b/src/trans/gpu/internal/pe2set_mod.F90 deleted file mode 100755 index c430b7506..000000000 --- a/src/trans/gpu/internal/pe2set_mod.F90 +++ /dev/null @@ -1,119 +0,0 @@ -! (C) Copyright 1998- ECMWF. -! (C) Copyright 1998- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE PE2SET_MOD -CONTAINS -SUBROUTINE PE2SET(KPE,KPRGPNS,KPRGPEW,KPRTRW,KPRTRV) - -!**** *PE2SET* - Convert from PE number to set numbers - -! Purpose. -! -------- -! Convert from PE number to set numbers in both -! grid-point space and spectral space - -!** Interface. -! ---------- -! *CALL* *PE2SET(KPE,KPRGPNS,KPRGPEW,KPRTRW,KPRTRV) - -! Explicit arguments : -! -------------------- -! input: KPE - integer processor number -! in the range 1 .. NPROC -! output: KPRGPNS - integer A set number in grid space -! in the range 1 .. NPRGPNS -! KPRGPEW - integer B set number in grid space -! in the range 1 .. NPRGPEW -! KPRTRW - integer A set number in spectral space -! in the range 1 .. NPRTRW -! KPRTRV - integer B set number in spectral space -! in the range 1 .. NPRTRV - -! Implicit arguments : YOMMP parameters -! NPRGPNS,NPRGPEW,NPRTRW,NPRTRV,NPROC - -! -------------------- -! Method. -! ------- - -! PE allocation order is row oriented (e.g. NPRGPNS or NPRTRW = 4): - -! 1 2 3 4 -! 5 6 7 8 -! 9 10 11 12 -! 13 14 15 16 -! . . . . - -! Externals. -! ---------- -! NONE - -! Reference. -! ---------- -! ECMWF Research Department documentation of the IFS - -! Author. -! ------- -! David Dent *ECMWF* - -! Modifications. -! -------------- -! Original : 98-08-19 -! Revision : 98-10-13 row ordering -! ------------------------------------------------------------------ - -USE PARKIND1, ONLY: JPIM, JPRB -USE TPM_DISTR, ONLY: LEQ_REGIONS, NPRGPEW, NPROC, NPRTRV -USE EQ_REGIONS_MOD, ONLY: N_REGIONS, N_REGIONS_NS -USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS -! - -IMPLICIT NONE -INTEGER(KIND=JPIM),INTENT(IN) :: KPE -INTEGER(KIND=JPIM),INTENT(OUT) :: KPRGPNS,KPRGPEW,KPRTRW,KPRTRV - -INTEGER(KIND=JPIM) :: IPE,JA -! ------------------------------------------------------------------ - -!* 1. Check input argument for validity -! --------------------------------- - -IF(KPE <= 0.OR.KPE > NPROC) THEN - WRITE(*,'(A,2I8)') ' PE2SET INVALID ARGUMENT ',KPE,NPROC - CALL ABORT_TRANS(' PE2SET INVALID ARGUMENT ') - -ELSE - -!* 2. Compute output parameters -! ------------------------- - - IF( LEQ_REGIONS )THEN - KPRGPNS=1 - IPE=KPE - DO JA=1,N_REGIONS_NS - IF( IPE > N_REGIONS(JA) )THEN - IPE=IPE-N_REGIONS(JA) - KPRGPNS=KPRGPNS+1 - CYCLE - ENDIF - KPRGPEW=IPE - EXIT - ENDDO - ELSE - KPRGPEW=MOD(KPE-1,NPRGPEW)+1 - KPRGPNS=(KPE-1)/NPRGPEW+1 - ENDIF - KPRTRV =MOD(KPE-1,NPRTRV)+1 - KPRTRW =(KPE-1)/NPRTRV+1 - -ENDIF - -END SUBROUTINE PE2SET -END MODULE PE2SET_MOD diff --git a/src/trans/gpu/internal/set2pe_mod.F90 b/src/trans/gpu/internal/set2pe_mod.F90 deleted file mode 100755 index 380a27698..000000000 --- a/src/trans/gpu/internal/set2pe_mod.F90 +++ /dev/null @@ -1,130 +0,0 @@ -! (C) Copyright 1998- ECMWF. -! (C) Copyright 1998- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE SET2PE_MOD -CONTAINS -SUBROUTINE SET2PE(KPE,KPRGPNS,KPRGPEW,KPRTRW,KPRTRV) - - -!**** *SET2PE* - Convert from set numbers to PE number - -! Purpose. -! -------- -! Convert from set numbers in either grid-point space or spectral space -! to PE number - -!** Interface. -! ---------- -! *CALL* *SET2PE(KPRGPNS,KPRGPEW,KPRTRW,KPRTRV,KPE) - -! Explicit arguments : -! -------------------- - -! input : KPRGPNS - integer A set number in grid space -! in the range 1 .. NPRGPNS -! KPRGPEW - integer B set number in grid space -! in the range 1 .. NPRGPEW -! KPRTRW - integer A set number in spectral space -! in the range 1 .. NPRTRW -! KPRTRV - integer B set number in spectral space -! in the range 1 .. NPRTRV -! output: KPE - integer processor number -! in the range 1 .. NPROC - -! Normally, one pair of input set numbers will be set to zero -! SET2PE will compute KPE from the first pair if they are valid numbers. -! else from the other pair, - -! Implicit arguments : YOMMP parameters -! NPRGPNS,NPRGPEW,NPRTRW,NPRTRV,NPROC - -! -------------------- -! Method. -! ------- - -! Externals. -! ---------- -! NONE - -! Reference. -! ---------- -! ECMWF Research Department documentation of the IFS - -! Author. -! ------- -! David Dent *ECMWF* - -! Modifications. -! -------------- -! Original : 98-08-19 -! ------------------------------------------------------------------ - - -USE PARKIND1, ONLY: JPIM -USE TPM_DISTR, ONLY: LEQ_REGIONS, NPRGPEW, NPRGPNS, NPRTRV, NPRTRW -USE EQ_REGIONS_MOD , ONLY: N_REGIONS, N_REGIONS_NS -USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS -! - -IMPLICIT NONE -INTEGER(KIND=JPIM),INTENT(IN) :: KPRGPNS,KPRGPEW,KPRTRW,KPRTRV -INTEGER(KIND=JPIM),INTENT(OUT) :: KPE - -INTEGER(KIND=JPIM) :: IPE,JA -! ------------------------------------------------------------------ - -!* 1. Choose from input parameters -! ---------------------------- - -IF(KPRGPNS > 0.AND.KPRGPEW > 0) THEN - - IF( LEQ_REGIONS )THEN - IF( KPRGPNS > N_REGIONS_NS )THEN - WRITE(*,'(A,2I8)') ' SET2PE INVALID ARGUMENT ',KPRGPNS,N_REGIONS_NS - CALL ABOR1(' SET2PE INVALID ARGUMENT ') - ENDIF - IF( KPRGPEW > N_REGIONS(KPRGPNS) )THEN - WRITE(*,'(A,2I8)') ' SET2PE INVALID ARGUMENT ',KPRGPEW,N_REGIONS(KPRGPNS) - CALL ABOR1(' SET2PE INVALID ARGUMENT ') - ENDIF - KPE=0 - DO JA=1,KPRGPNS-1 - KPE=KPE+N_REGIONS(JA) - ENDDO - KPE=KPE+KPRGPEW - ELSE - IF(KPRGPNS <= NPRGPNS.AND.KPRGPEW <= NPRGPEW) THEN - -!* 2. Grid-space set values supplied -! ------------------------------ - - KPE=(KPRGPNS-1)*NPRGPEW + KPRGPEW - ELSE - WRITE(*,'(A,2I8)') ' SET2PE INVALID ARGUMENT ',KPRGPNS,KPRGPEW - CALL ABORT_TRANS(' SET2PE INVALID ARGUMENT ') - ENDIF - ENDIF - -ELSE - -!* 3. Spectral space set values supplied -! ---------------------------------- - - IF(KPRTRW <= NPRTRW.AND.KPRTRV <= NPRTRV) THEN - KPE=(KPRTRW-1)*NPRTRV + KPRTRV - ELSE - WRITE(*,'(A,2I8)') ' SET2PE INVALID ARGUMENT ',KPRTRW,KPRTRV - CALL ABORT_TRANS(' SET2PE INVALID ARGUMENT ') - ENDIF - -ENDIF - -END SUBROUTINE SET2PE -END MODULE SET2PE_MOD diff --git a/src/trans/gpu/internal/sugaw_mod.F90 b/src/trans/gpu/internal/sugaw_mod.F90 deleted file mode 100755 index c5752fc97..000000000 --- a/src/trans/gpu/internal/sugaw_mod.F90 +++ /dev/null @@ -1,429 +0,0 @@ -! (C) Copyright 1987- ECMWF. -! (C) Copyright 1987- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE SUGAW_MOD -CONTAINS -SUBROUTINE SUGAW(KDGL,KM,KN,PL,PW,PANM,PFN) - -USE PARKIND1, ONLY: JPRD, JPIM -USE PARKIND2, ONLY: JPRH -USE TPM_CONSTANTS, ONLY: RA -USE TPM_GEN, ONLY: NOUT -USE GAWL_MOD, ONLY: GAWL -USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS -USE SUPOLF_MOD, ONLY: SUPOLF -USE TPM_POL, ONLY: DDI - -!**** *SUGAW * - Routine to initialize the Gaussian -! abcissa and the associated weights - -! Purpose. -! -------- -! Initialize arrays PL, and PW (quadrature abscissas and weights) -!** Interface. -! ---------- -! *CALL* *SUGAW(KN,PFN,PL,PW) * - -! Explicit arguments : -! -------------------- -! INPUT: -! KDGL : Number of Gauss abscissas -! KM : Polynomial order m -! KN : Polynomial degree n -! PFN : Fourier coefficients of series expansion for -! the ordinary Legendre polynomials -! OUTPUT: -! PL (KN) : abscissas of Gauss -! PW (KN) : Weights of the Gaussian integration - -! PL (i) is the abscissa i starting from the northern pole, it is -! the cosine of the colatitude of the corresponding row of the collocation -! grid. - -! Implicit arguments : -! -------------------- -! None - -! Method. -! ------- -! See documentation - -! Externals. -! ---------- - -! Reference. -! ---------- - -! S.L. Belousov, Tables of normalized associated Legendre Polynomials, Pergamon Press (1962) -! P.N. Swarztrauber, On computing the points and weights for Gauss-Legendre quadrature, -! SIAM J. Sci. Comput. Vol. 24 (3) pp. 945-954 (2002) - -! Author. -! ------- -! Mats Hamrud and Philippe Courtier *ECMWF* - -! Modifications. -! -------------- -! Original : 87-10-15 -! Michel Rochas : 90-08-30 -! Philippe Courtier : 92-12-19 Multitasking -! Ryad El Khatib : 94-04-20 Remove unused comdecks pardim and yomdim -! Mats Hamrud : 94-08-12 Printing level -! K. Yessad (Sep 2008): cleaning, improve comments. -! Nils Wedi + Mats Hamrud, 2009-02-05 revised following Swarztrauber, 2002 -! F. Vana 05-Mar-2015 Support for single precision -! ------------------------------------------------------------------ - -IMPLICIT NONE - -INTEGER(KIND=JPIM),INTENT(IN) :: KDGL -INTEGER(KIND=JPIM),INTENT(IN) :: KM -INTEGER(KIND=JPIM),INTENT(IN) :: KN - -REAL(KIND=JPRD) ,INTENT(IN) :: PANM - -REAL(KIND=JPRD),INTENT(OUT) :: PW(KDGL) -REAL(KIND=JPRD),INTENT(OUT) :: PL(KDGL) - -REAL(KIND=JPRD) ,OPTIONAL, INTENT(IN) :: PFN(0:KDGL,0:KDGL) - -! ------------------------------------------------------------------ - -REAL(KIND=JPRD) :: ZLI(KDGL),ZT(KDGL),ZFN(0:KDGL/2),ZL(KDGL) -REAL(KIND=JPRD) :: ZREG(KDGL),ZMOD(KDGL),ZM(KDGL),ZRR(KDGL) -INTEGER(KIND=JPIM) :: ITER(KDGL) - -INTEGER(KIND=JPIM) :: IALLOW, INS2, ISYM, JGL, IK, IODD, I, IMAX - -REAL(KIND=JPRD) :: Z, ZEPS, Z0, ZPI - -! computations in extended precision for alternative root finding -! which also works for associated polynomials (m>0) -REAL(KIND=JPRH) :: ZLK, ZLK1, ZLLDN, ZANM -REAL(KIND=JPRH) :: ZTHETA, ZTHETA0, ZX, ZX0, ZDX0, ZH, ZPIH, ZS0 -REAL(KIND=JPRH) :: ZK1, ZK2, ZK3, ZK4 -REAL(KIND=JPRH) :: ZF1, ZF2, ZF3 -REAL(KIND=JPRH) :: FP, FQ, FP1, FQ1 -REAL(KIND=JPRH) :: X, ZXOLD, ZBIG, ZEPSH - -INTEGER(KIND=JPIM) :: ISTEPMAX - -LOGICAL :: LLP2, LLREF, LLOLD - -REAL(KIND=JPRD) :: ZDDPOL(0:KN) - -INTEGER(KIND=JPIM), PARAMETER :: JPKD=KIND(ZLK) - -FP(X) = 1._JPRH-X**2 -FQ(X) = REAL(KN*(KN+1),JPRH)-REAL(KM**2,JPRH)/(1._JPRH-X**2) -FP1(X) = -2._JPRH*X -FQ1(X) = -2._JPRH*X*REAL(KM**2,JPRH)/SQRT(1._JPRH-X**2) - -! ------------------------------------------------------------------ -! ------------------------------------------------------------------ -!* 1. Initialization + root + weight computation -! ------------------------------------------ - -LLP2 = .FALSE. -INS2 = KDGL/2 - -LLOLD=( KM == 0 .AND. KN == KDGL ).AND.PRESENT(PFN) - - -CALL GSTATS(1650,0) - -ZEPS = EPSILON(Z) -ZEPSH = EPSILON(X) - -ZBIG = SQRT(HUGE(X)) - -!* 1.1 Find the roots of the ordinary -! Legendre polynomial of degree KN using an analytical first guess -! and then refine to machine precision via Newton's method -! in double precision following Swarztrauber (2002) - -! Nils Comment: in principle the else case could also be used for this but -! this is slightly more accurate and consistent with the past - -IF( LLOLD ) THEN - - ZPI = 2.0_JPRD*ASIN(1.0_JPRD) - IODD=MOD(KDGL,2) - IK=IODD - DO JGL=IODD,KDGL,2 - ZFN(IK)=PFN(KDGL,JGL) - IK=IK+1 - ENDDO - - DO JGL=1,INS2 - Z = REAL(4*JGL-1,JPRD)*ZPI/REAL(4*KN+2,JPRD) - ! analytic initial guess for cos(theta) (same quality as RK below) - ! ZX = 1._JPRD-REAL(KN-1,JPRD)/REAL(8*KN*KN*KN,JPRD)-(1._JPRD/REAL(384*KN*KN*KN*KN))*(39._JPRD-28._JPRD/SIN(Z)**2) - ! PL(JGL) = ACOS(ZX*COS(Z)) - ZL(JGL) = Z+1.0_JPRD/(TAN(Z)*REAL(8*KN**2,JPRD)) - ZREG(JGL) = COS(Z) - ZLI(JGL) = COS(ZL(JGL)) - ENDDO - - ! refine PL here via Newton's method - - !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(JGL) - DO JGL=INS2,1,-1 - CALL GAWL(ZFN,ZL(JGL),PW(JGL),ZEPS,KN,ITER(JGL),ZMOD(JGL)) - ENDDO - !$OMP END PARALLEL DO - - ! convert to physical latitude space PMU - !DIR$ IVDEP - !OCL NOVREC - DO JGL=1,INS2 - PL(JGL) = COS(ZL(JGL)) - ENDDO - -ELSE - -!* 1.2 Find the roots of the associated -! Legendre polynomial of degree KN and the associated Gaussian weights -! using a Runge-Kutta 4 integration of the Pruefer transformed Sturm-Liouville problem -! (Tygert (J. Comput. Phys. 2008) and Glaser et al., SIAM J. SCI. COMPUT. Vol. 29 (4) 1420-1438) -! - - ISTEPMAX=10 - - ZANM = REAL(PANM, JPKD) - ZPIH = 2.0_JPRH*ASIN(1.0_JPRH) - - ZX0 = 0._JPRH - Z0 = 0._JPRD - - ! first guess starting point - IF( MOD(KN-KM,2) == 0 ) THEN - ! even, extremum at X == 0 - ZTHETA0 = 0._JPRH - ZH = -0.5_JPRH*ZPIH/REAL(ISTEPMAX,JPRH) - ELSE - ! odd, root at X == 0 - ZTHETA0 = 0.5_JPRH*ZPIH - ZX0 = 0._JPRH - ZH = -ZPIH/REAL(ISTEPMAX,JPRH) - ENDIF - - ZX = ZX0 - ZTHETA = ZTHETA0 - - ZF1 = SQRT(FQ(ZX)/FP(ZX)) - ZF2 = FQ1(ZX)/FQ(ZX) - ZF3 = FP1(ZX)/FP(ZX) - - ! Formula (81) in Tygert - ZDX0=-1._JPRH/(ZF1 + 0.25_JPRH*(ZF2 + ZF3)*SIN(2._JPRH*ZTHETA)) - - ! loop over all roots - LLREF=.TRUE. - DO JGL=INS2,1,-1 - - ! runge-kutta - DGL:DO IK=1,ISTEPMAX - - ZK1 = ZDX0 - ZTHETA = ZTHETA + 0.5_JPRH*ZH - - ZX = ZX0 + 0.5_JPRH*ZH*ZK1 - - ZF1 = SQRT(FQ(ZX)/FP(ZX)) - ZF2 = FQ1(ZX)/FQ(ZX) - ZF3 = FP1(ZX)/FP(ZX) - - ZK2 = -1._JPRH/(ZF1 + 0.25_JPRH*(ZF2 + ZF3)*SIN(2._JPRH*ZTHETA)) - ZX = ZX0 + 0.5_JPRH*ZH*ZK2 - - ZF1 = SQRT(FQ(ZX)/FP(ZX)) - ZF2 = FQ1(ZX)/FQ(ZX) - ZF3 = FP1(ZX)/FP(ZX) - - ZK3 = -1._JPRH/(ZF1 + 0.25_JPRH*(ZF2 + ZF3)*SIN(2._JPRH*ZTHETA)) - ZTHETA = ZTHETA + 0.5_JPRH*ZH - ZX = ZX0 + ZH*ZK3 - - ZF1 = SQRT(FQ(ZX)/FP(ZX)) - ZF2 = FQ1(ZX)/FQ(ZX) - ZF3 = FP1(ZX)/FP(ZX) - - ZK4 = -1._JPRH/(ZF1 + 0.25_JPRH*(ZF2 + ZF3)*SIN(2._JPRH*ZTHETA)) - ZX = ZX0 + (1._JPRH/6._JPRH)*ZH*(ZK1+2._JPRH*ZK2+2._JPRH*ZK3+ZK4) - ZXOLD = ZX0 - - ZX0 = ZX - - IF( .NOT.ZX==ZX ) THEN - WRITE(NOUT,*) 'invoke overflow ...ZX ',KM, KN, JGL - ZX = ZXOLD - ZX0 = ZXOLD - EXIT DGL - ENDIF - - ZF1 = SQRT(FQ(ZX)/FP(ZX)) - ZF2 = FQ1(ZX)/FQ(ZX) - ZF3 = FP1(ZX)/FP(ZX) - - ZDX0 = -1._JPRH/(ZF1 + 0.25_JPRH*(ZF2 + ZF3)*SIN(2._JPRH*ZTHETA)) - - ENDDO DGL - -! ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -! Everything from here until <> is to refine the -! root and compute the starting point for the next root search -! ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - ! should not happen, but does if loss of accuracy in supolf occurs (useful for debugging) - IF( JGL < INS2 ) LLREF = PW(JGL+1).GT.ZEPSH - - IF( LLREF ) THEN - - ! chosen for speed/accuracy compromise - IMAX=3 - LOOP: DO I=1,IMAX - ! supol fast - ZS0 = ACOS(ZX0) - CALL SUPOLF(KM,KN,REAL(ZX0,JPRD),ZDDPOL) - ZLK=REAL(ZDDPOL(KN),JPKD) - ZLK1= REAL(ZDDPOL(KN-1),JPKD) - ZLLDN= -(ZANM*ZLK1-DDI(KN)*COS(ZS0)*ZLK)/SIN(ZS0) - - IF( ABS(ZLLDN) > ZEPSH ) THEN - ! single Newton refinement in theta - ZS0 = ZS0 - ZLK/ZLLDN - ZX = COS(ZS0) - ELSE - ! do nothing - ZX = ZX0 - ENDIF - - IF( ABS(ZX-ZX0) > 1000._JPRD*ZEPS ) THEN - ZX0 = ZX - ELSE - EXIT LOOP - ENDIF - ENDDO LOOP - - ! recompute for accuracy weights - CALL SUPOLF(KM,KN,REAL(ZX,JPRD),ZDDPOL) - ! option f in Schwarztrauber to compute the weights - ZS0 = ACOS(ZX) - ZLK=REAL(ZDDPOL(KN),JPKD) - ZLK1= REAL(ZDDPOL(KN-1),JPKD) - ZLLDN= -(ZANM*ZLK1-DDI(KN)*COS(ZS0)*ZLK)/SIN(ZS0) - - PW(JGL) = REAL(REAL(2*KN+1,JPRH)/ZLLDN**2,JPRD) - - ! catch overflow, should never happen - IF( .NOT.(PW(JGL)==PW(JGL)) ) THEN - WRITE(NOUT,*) 'invoke overflow ...PW ',KM, KN, JGL - PW(JGL) = 0.0_JPRD - ENDIF - - ELSE - ! should never happen ... - WRITE(NOUT,*) 'Refinement not possible ... PW set to 0',KM, KN, JGL - PW(JGL) = 0.0_JPRD - ENDIF - - ZX0 = ZX - PL(JGL) = REAL(ZX0,JPRD) - - ! catch overflow, should never happen - IF( .NOT.(PW(JGL)==PW(JGL)) ) THEN - WRITE(NOUT,*) 'invoke overflow ...PW ',KM, KN, JGL - PW(JGL) = 0.0_JPRD - ENDIF - -! ++++++++++++++++++++++++++++++++++++++++++++++++ -! <<<< END REFINEMENT >>>> -! ++++++++++++++++++++++++++++++++++++++++++++++++ - - ZF1 = SQRT(FQ(ZX0)/FP(ZX0)) - ZF2 = FQ1(ZX0)/FQ(ZX0) - ZF3 = FP1(ZX0)/FP(ZX0) - - ! continue to next root with refined ZX,ZR as initial condition - ZH = -ZPIH/REAL(ISTEPMAX,JPRH) - ZTHETA = 0.5_JPRH*ZPIH - ZDX0 = -1._JPRH/(ZF1 + 0.25_JPRH*(ZF2 + ZF3)*SIN(2._JPRH*ZTHETA)) - ENDDO - -ENDIF - -CALL GSTATS(1650,1) -! ------------------------------------------------------------------ - -!DIR$ IVDEP -!OCL NOVREC -DO JGL=1,KDGL/2 - ISYM = KDGL-JGL+1 - PL(ISYM) = -PL(JGL) - PW(ISYM) = PW(JGL) -ENDDO - -! ------------------------------------------------------------------ - -!* 3. Diagnostics. -! ------------ - -IF( LLOLD ) THEN - - IF(LLP2)THEN - DO JGL=1,INS2 - ZM(JGL) = (ACOS(PL(JGL))-ACOS(ZLI(JGL)))*RA - ZRR(JGL) = (ACOS(PL(JGL))-ACOS(ZREG(JGL)))*RA - ZT(JGL) = ACOS(PL(JGL))*180._JPRD/ZPI - ENDDO - ENDIF - - IALLOW = 20 - DO JGL=1,INS2 - - IF(LLP2)THEN - WRITE(UNIT=NOUT,FMT=& - &'('' M ='',I4,'' ROW ='',I4,'' ITERATIONS='',I4,'' ROOT='',F30.20,& - &'' WEIGHT='',F30.20,'' MODIF :'',E8.2)')KM,JGL,ITER(JGL),PL(JGL)& - &,PW(JGL),PL(JGL)-ZLI(JGL) - WRITE(UNIT=NOUT,FMT=& - &'(10X,'' LAST INC. : '',E8.2,'' MODIF IN M : '',F10.3,& - &'' FROM THE REGULAR GRID : '',F10.3,'' COLAT '',F10.3)')& - &ZMOD(JGL),ZM(JGL),ZRR(JGL),ZT(JGL) - ENDIF - - IF(ITER(JGL) > IALLOW)THEN - WRITE(UNIT=NOUT,FMT='('' CONVERGENCE FAILED IN SUGAW '')') - WRITE(UNIT=NOUT,FMT='('' ALLOWED : '',I4,''& - &NECESSARY : '',& - &I4)')IALLOW,ITER(JGL) - CALL ABORT_TRANS(' FAILURE IN SUGAW ') - ENDIF - - ENDDO - -ELSE - - IF(LLP2)THEN - DO JGL=1,INS2 - WRITE(UNIT=NOUT,FMT=& - &'('' M ='',I4,'' ROW ='',I4,'' ITERATIONS='',I4,'' ROOT='',F30.20,& - &'' WEIGHT='',F30.20,'' COLAT '',F10.3)')KM,JGL,0,PL(JGL),PW(JGL),& - & ACOS(PL(JGL))*180._JPRD/ZPIH - ENDDO - ENDIF - -ENDIF - -! ------------------------------------------------------------------ - -END SUBROUTINE SUGAW -END MODULE SUGAW_MOD diff --git a/src/trans/gpu/internal/sump_trans0_mod.F90 b/src/trans/gpu/internal/sump_trans0_mod.F90 deleted file mode 100755 index aa8b94926..000000000 --- a/src/trans/gpu/internal/sump_trans0_mod.F90 +++ /dev/null @@ -1,111 +0,0 @@ -! (C) Copyright 2000- ECMWF. -! (C) Copyright 2000- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE SUMP_TRANS0_MOD -CONTAINS -SUBROUTINE SUMP_TRANS0 - -! Set up distributed environment for the transform package (part 0) - -USE PARKIND1, ONLY: JPIM -USE MPL_MODULE, ONLY: MPL_GROUPS_CREATE, MPL_MYRANK, MPL_NPROC -USE TPM_GEN, ONLY: NOUT, LMPOFF, NPRINTLEV -USE TPM_DISTR, ONLY: LEQ_REGIONS, MTAGDISTGP, MTAGDISTSP, MTAGGL, MTAGLETR, MTAGLG, MTAGLM, & - & MTAGML, MTAGPART, MYSETV, MYSETW, NPRCIDS, NPRGPEW, NPRGPNS, NPRTRNS, & - & NPRTRV, NPRTRW, MYPROC, NPROC -USE EQ_REGIONS_MOD, ONLY: EQ_REGIONS, MY_REGION_EW, MY_REGION_NS, N_REGIONS, N_REGIONS_EW, & - & N_REGIONS_NS -USE PE2SET_MOD, ONLY: PE2SET -USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS - -IMPLICIT NONE - -LOGICAL :: LLP1,LLP2 -INTEGER(KIND=JPIM) :: IPROC,JJ - -! ------------------------------------------------------------------ - -LLP1 = NPRINTLEV>0 -LLP2 = NPRINTLEV>1 -IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUMP_TRANS0 ===' - - -NPROC = NPRGPNS*NPRGPEW -NPRTRNS = NPRTRW -IF(MOD(NPROC,NPRTRW) /= 0 .OR. NPRTRW > NPROC) THEN - CALL ABORT_TRANS('SUMP_TRANS0: NPROC INCONSISTENT WITH NPRTRW') -ENDIF -NPRTRV = NPROC/NPRTRW -IF(LLP1) WRITE(NOUT,*)'NPROC =',NPROC,' NPRGPNS=',NPRGPNS,' NPRGPEW=',& - & NPRGPEW,' NPRTRW=',NPRTRW,' NPRTRV=',NPRTRV - -IF(NPROC > 1 ) THEN - IPROC = MPL_NPROC() - IF(IPROC /= NPROC) THEN - WRITE(NOUT,*) 'SUMP_TRANS0: NPROC=',NPROC,' BUT MPL_NPROC RETURNS',& - & IPROC - CALL ABORT_TRANS('SUMP_TRANS0: NPROC INCONSISTENT WITH MPL_NPROC') - ENDIF - MYPROC = MPL_MYRANK() -ELSE - MYPROC = 1 -ENDIF - -IF (MYPROC > NPROC) THEN - CALL ABORT_TRANS('SUMP_TRANS0: INCONSISTENCY IN NUMBER OF PROCESSORS USED') -ENDIF - -IF( LEQ_REGIONS )THEN - ALLOCATE(N_REGIONS(NPROC+2)) - N_REGIONS(:)=0 - CALL EQ_REGIONS(NPROC) -ELSE - N_REGIONS_NS=NPRGPNS - ALLOCATE(N_REGIONS(N_REGIONS_NS)) - N_REGIONS(:)=NPRGPEW - N_REGIONS_EW=NPRGPEW -ENDIF -CALL PE2SET(MYPROC,MY_REGION_NS,MY_REGION_EW,MYSETW,MYSETV) -IF(LLP1) WRITE(NOUT,*)'MYPROC=',MYPROC,'MY_REGION_NS =',MY_REGION_NS,& - & ' MY_REGION_EW=',MY_REGION_EW,' MYSETW=',MYSETW,' MYSETV=',MYSETV - - -ALLOCATE(NPRCIDS(NPROC)) -IF(LLP2)WRITE(NOUT,9) 'NPRCIDS ',SIZE(NPRCIDS ),SHAPE(NPRCIDS ) -DO JJ=1,NPROC - NPRCIDS(JJ) = JJ -ENDDO - -! Message passing tags - -MTAGLETR = 18000 -MTAGML = 19000 -MTAGLG = 20000 -MTAGPART = 21000 -MTAGDISTSP = 22000 -MTAGGL = 23000 -MTAGLM = 24000 -MTAGDISTGP = 25000 - -! Create communicators for MPI groups - -IF (.NOT.LMPOFF) THEN - CALL MPL_GROUPS_CREATE(NPRTRW, NPRTRV) -ENDIF - -! Setup labels for timing package (gstats) - -! CF ifs/utility GSTATS_OUTPUT_IFS - -! ------------------------------------------------------------------ -9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) - -END SUBROUTINE SUMP_TRANS0 -END MODULE SUMP_TRANS0_MOD diff --git a/src/trans/gpu/internal/sump_trans_mod.F90 b/src/trans/gpu/internal/sump_trans_mod.F90 index 074013786..a0f2260b4 100755 --- a/src/trans/gpu/internal/sump_trans_mod.F90 +++ b/src/trans/gpu/internal/sump_trans_mod.F90 @@ -19,17 +19,21 @@ SUBROUTINE SUMP_TRANS ! Modifications : ! P.Marguinaud : 11-Sep-2012 : Fix twice allocated pointer -USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD -USE TPM_GEN, ONLY: NOUT, NPRINTLEV -USE TPM_DIM, ONLY: R -USE TPM_GEOMETRY, ONLY: G -USE TPM_DISTR, ONLY: D, LEQ_REGIONS, MYSETW, NPRTRNS, NPRTRW, NPROC, MYPROC -USE SUMPLATF_MOD, ONLY: SUMPLATF -USE SUMPLAT_MOD, ONLY: SUMPLAT -USE SUSTAONL_MOD, ONLY: SUSTAONL -USE MYSENDSET_MOD, ONLY: MYSENDSET -USE MYRECVSET_MOD, ONLY: MYRECVSET -USE EQ_REGIONS_MOD, ONLY: MY_REGION_NS, MY_REGION_EW, N_REGIONS, N_REGIONS_EW, N_REGIONS_NS +USE EC_PARKIND ,ONLY : JPIM ,JPRD + +USE TPM_GEN ,ONLY : NOUT, NPRINTLEV +USE TPM_DIM ,ONLY : R +USE TPM_GEOMETRY ,ONLY : G +USE TPM_DISTR ,ONLY : D, LEQ_REGIONS, MYSETW, NPRTRNS, NPRTRW, NPROC, MYPROC + +USE SUMPLATF_MOD ,ONLY : SUMPLATF +USE SUMPLAT_MOD ,ONLY : SUMPLAT +USE SUSTAONL_MOD ,ONLY : SUSTAONL +USE MYSENDSET_MOD ,ONLY : MYSENDSET +USE MYRECVSET_MOD ,ONLY : MYRECVSET +USE EQ_REGIONS_MOD ,ONLY : MY_REGION_NS, MY_REGION_EW, & + & N_REGIONS, N_REGIONS_EW, N_REGIONS_NS + ! IMPLICIT NONE @@ -39,9 +43,8 @@ SUBROUTINE SUMP_TRANS INTEGER(KIND=JPIM) :: IGPTOT,IMEDIAP,IRESTM,JA,JB,IOFF,OFFSET1,OFFSET2,KMLOC,KM INTEGER(KIND=JPIM),ALLOCATABLE :: IGPTOTL(:,:) -REAL(KIND=JPRBT),ALLOCATABLE :: ZDUM(:) -REAL(KIND=JPRBT) :: ZMEDIAP -REAL(KIND=JPRD) :: ZTIME0,ZTIME1,ZTIME2 +REAL(KIND=JPRD),ALLOCATABLE :: ZDUM(:) +REAL(KIND=JPRD) :: ZMEDIAP LOGICAL :: LLP1,LLP2 diff --git a/src/trans/gpu/internal/sump_trans_preleg_mod.F90 b/src/trans/gpu/internal/sump_trans_preleg_mod.F90 deleted file mode 100755 index 0b342f6db..000000000 --- a/src/trans/gpu/internal/sump_trans_preleg_mod.F90 +++ /dev/null @@ -1,146 +0,0 @@ -! (C) Copyright 2000- ECMWF. -! (C) Copyright 2000- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE SUMP_TRANS_PRELEG_MOD -CONTAINS -SUBROUTINE SUMP_TRANS_PRELEG - -! Set up distributed environment for the transform package (part 1) - -USE PARKIND1, ONLY: JPIM -USE TPM_GEN, ONLY: NOUT, NPRINTLEV -USE TPM_DIM, ONLY: R -USE TPM_DISTR, ONLY: D, NPRTRW, NPRTRV, MYSETW -USE SUWAVEDI_MOD, ONLY: SUWAVEDI -! - -IMPLICIT NONE - -INTEGER(KIND=JPIM) :: JW,JV,JJ,JM,JMLOC,ILATPP,IRESTL,IMLOC,IDT,INM,ILAST - -INTEGER(KIND=JPIM) :: IMYMS(R%NSMAX+1),INUMTPP(NPRTRW) -INTEGER(KIND=JPIM) :: IDUMI1,IDUMI2,IDUMI3 -INTEGER(KIND=JPIM) :: IDUM2(0:R%NSMAX), IDUM3(NPRTRW+1), IDUM4(R%NSMAX+1) - -LOGICAL :: LLP1,LLP2 - -! ------------------------------------------------------------------ - -IF(.NOT.D%LGRIDONLY) THEN - -LLP1 = NPRINTLEV>0 -LLP2 = NPRINTLEV>1 -IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SUMP_TRANS_PRELEG ===' - -!* 1. Initialize partitioning of wave numbers to PEs ! -! ---------------------------------------------- - - ALLOCATE(D%NASM0(0:R%NSMAX)) - IF(LLP2)WRITE(NOUT,9) 'D%NASM0 ',SIZE(D%NASM0 ),SHAPE(D%NASM0 ) - ALLOCATE(D%NATM0(0:R%NTMAX)) - IF(LLP2)WRITE(NOUT,9) 'D%NATM0 ',SIZE(D%NATM0 ),SHAPE(D%NATM0 ) - ALLOCATE(D%NUMPP(NPRTRW)) - IF(LLP2)WRITE(NOUT,9) 'D%NUMPP ',SIZE(D%NUMPP ),SHAPE(D%NUMPP ) - ALLOCATE(D%NPOSSP(NPRTRW+1)) - IF(LLP2)WRITE(NOUT,9) 'D%NPOSSP',SIZE(D%NPOSSP ),SHAPE(D%NPOSSP ) - ALLOCATE(D%NPROCM(0:R%NSMAX)) - IF(LLP2)WRITE(NOUT,9) 'D%NPROCM',SIZE(D%NPROCM ),SHAPE(D%NPROCM ) - ALLOCATE(D%NPTRMS(NPRTRW)) - IF(LLP2)WRITE(NOUT,9) 'D%NPTRMS ',SIZE(D%NPTRMS ),SHAPE(D%NPTRMS ) - ALLOCATE(D%NALLMS(R%NSMAX+1)) - IF(LLP2)WRITE(NOUT,9) 'D%NALLMS ',SIZE(D%NALLMS ),SHAPE(D%NALLMS ) - ALLOCATE(D%NDIM0G(0:R%NSMAX)) - IF(LLP2)WRITE(NOUT,9) 'D%NDIM0G ',SIZE(D%NDIM0G ),SHAPE(D%NDIM0G ) - - CALL SUWAVEDI(R%NSMAX,R%NTMAX,NPRTRW,MYSETW,& - &D%NASM0,D%NSPOLEGL,D%NPROCM,D%NUMPP,& - &D%NSPEC,D%NSPEC2,D%NSPEC2MX,D%NPOSSP,IMYMS,& - &D%NPTRMS,D%NALLMS,D%NDIM0G) - CALL SUWAVEDI(R%NTMAX,R%NTMAX,NPRTRW,MYSETW,& - &KASM0=D%NATM0,KUMPP=INUMTPP,KSPEC2=D%NTPEC2) - - D%NUMP = D%NUMPP (MYSETW) - ALLOCATE(D%MYMS(D%NUMP)) - IF(LLP2)WRITE(NOUT,9) 'D%MYMS ',SIZE(D%MYMS ),SHAPE(D%MYMS ) - D%MYMS(:) = IMYMS(1:D%NUMP) - D%NUMTP = INUMTPP(MYSETW) - ALLOCATE(D%NLATLS(NPRTRW,NPRTRV)) - IF(LLP2)WRITE(NOUT,9) 'D%NLATLS',SIZE(D%NLATLS ),SHAPE(D%NLATLS ) - ALLOCATE(D%NLATLE(NPRTRW,NPRTRV)) - IF(LLP2)WRITE(NOUT,9) 'D%NLATLE',SIZE(D%NLATLE ),SHAPE(D%NLATLE ) - - D%NLATLS(:,:) = 999999 - D%NLATLE(:,:) = -1 - - ILATPP = R%NDGNH/NPRTRW - IRESTL = R%NDGNH-NPRTRW*ILATPP - DO JW=1,NPRTRW - IF (JW > IRESTL) THEN - D%NLATLS(JW,1) = IRESTL*(ILATPP+1)+(JW-IRESTL-1)*ILATPP+1 - D%NLATLE(JW,1) = D%NLATLS(JW,1)+ILATPP-1 - ELSE - D%NLATLS(JW,1) = (JW-1)*(ILATPP+1)+1 - D%NLATLE(JW,1) = D%NLATLS(JW,1)+ILATPP - ENDIF - ENDDO - ILAST=0 - DO JW=1,NPRTRW - ILATPP = (D%NLATLE(JW,1)-D%NLATLS(JW,1)+1)/NPRTRV - IRESTL = (D%NLATLE(JW,1)-D%NLATLS(JW,1)+1)-NPRTRV*ILATPP - DO JV=1,NPRTRV - IF (JV > IRESTL) THEN - D%NLATLS(JW,JV) = IRESTL*(ILATPP+1)+(JV-IRESTL-1)*ILATPP+1+ILAST - D%NLATLE(JW,JV) = D%NLATLS(JW,JV)+ILATPP-1 - ELSE - D%NLATLS(JW,JV) = (JV-1)*(ILATPP+1)+1+ILAST - D%NLATLE(JW,JV) = D%NLATLS(JW,JV)+ILATPP - ENDIF - ENDDO - ILAST=D%NLATLE(JW,NPRTRV) - ENDDO - - IF (LLP1) THEN - DO JW=1,NPRTRW - DO JV=1,NPRTRV - WRITE(NOUT,'(" JW=",I6," JV=",I6," D%NLATLS=",I6," D%NLATLE=",I6)')& - & JW,JV,D%NLATLS(JW,JV),D%NLATLE(JW,JV) - ENDDO - ENDDO - ENDIF - - ALLOCATE(D%NPMT(0:R%NSMAX)) - IF(LLP2)WRITE(NOUT,9) 'D%NPMT ',SIZE(D%NPMT ),SHAPE(D%NPMT ) - ALLOCATE(D%NPMS(0:R%NSMAX)) - IF(LLP2)WRITE(NOUT,9) 'D%NPMS ',SIZE(D%NPMS ),SHAPE(D%NPMS ) - ALLOCATE(D%NPMG(0:R%NSMAX)) - IF(LLP2)WRITE(NOUT,9) 'D%NPMG ',SIZE(D%NPMG ),SHAPE(D%NPMG ) - IDT = R%NTMAX-R%NSMAX - INM = 0 - DO JMLOC=1,D%NUMP - IMLOC = D%MYMS(JMLOC) - D%NPMT(IMLOC) = INM - D%NPMS(IMLOC) = INM+IDT - INM = INM+R%NTMAX+2-IMLOC - ENDDO - INM = 0 - DO JM=0,R%NSMAX - D%NPMG(JM) = INM - INM = INM+R%NTMAX+2-JM - ENDDO - - D%NLEI3D = (R%NLEI3-1)/NPRTRW+1 - -ENDIF - -! ------------------------------------------------------------------ -9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) - -END SUBROUTINE SUMP_TRANS_PRELEG -END MODULE SUMP_TRANS_PRELEG_MOD diff --git a/src/trans/gpu/internal/sumplat_mod.F90 b/src/trans/gpu/internal/sumplat_mod.F90 deleted file mode 100755 index 4b444d333..000000000 --- a/src/trans/gpu/internal/sumplat_mod.F90 +++ /dev/null @@ -1,254 +0,0 @@ -! (C) Copyright 1995- ECMWF. -! (C) Copyright 1995- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE SUMPLAT_MOD -CONTAINS -SUBROUTINE SUMPLAT(KDGL,KPROC,KPROCA,KMYSETA,LDSPLIT,LDEQ_REGIONS,& - & KFRSTLAT,KLSTLAT,KFRSTLOFF,KPTRLAT,& - & KPTRFRSTLAT,KPTRLSTLAT,KPTRFLOFF,& - & PWEIGHT,LDWEIGHTED_DISTR,PMEDIAP,KPROCAGP,& - & KMEDIAP,KRESTM,LDSPLITLAT,KMYPROC,KLOEN) - -!**** *SUMPLAT * - Initialize gridpoint distrbution in N-S direction - -! Purpose. -! -------- - - -!** Interface. -! ---------- -! *CALL* *SUMPLAT * - -! Explicit arguments - input : -! -------------------- -! KDGL -last latitude -! KPROC -total number of processors -! KPROCA -number of processors in A direction -! KMYSETA -process number in A direction -! LDSPLIT -true for latitudes shared between sets -! LDEQ_REGIONS -true if eq_regions partitioning -! PWEIGHT -weight per grid-point if weighted distribution -! LDWEIGHTED_DISTR -true if weighted distribution - -! Explicit arguments - output: -! -------------------- -! PMEDIAP -mean weight per PE if weighted distribution -! KMEDIAP -mean number of grid points per PE -! KPROCAGP -number of grid points per A set -! KRESTM -number of PEs with one extra point -! KFRSTLAT -first latitude row on processor -! KLSTLAT -last latitude row on processor -! KFRSTLOFF -offset for first latitude in set -! KPTRLAT -pointer to start of latitude -! KPTRFRSTLAT-pointer to first latitude -! KPTRLSTLAT -pointer to last latitude -! KPTRFLOFF -offset for pointer to first latitude -! LDSPLITLAT -true for latitudes which are split - -! Implicit arguments : -! -------------------- - - -! Method. -! ------- -! See documentation - -! Externals. SUMPLATB and SUEMPLATB. -! ---------- - -! Reference. -! ---------- -! ECMWF Research Department documentation of the IFS - -! Author. -! ------- -! MPP Group *ECMWF* - -! Modifications. -! -------------- -! Original : 95-10-01 -! David Dent:97-06-02 parameters KFRSTLAT etc added -! JF. Estrade:97-11-13 Adaptation to ALADIN case -! J.Boutahar: 98-07-06 phasing with CY19 -! Modified 98-08-10 by K. YESSAD: removal of LRPOLE option + cleanings -! (correct computation of extrapolar latitudes for KPROCL). -! Modified 98-12-07 by K. YESSAD and C. FISCHER: cleaning. -! - merge old sumplat.F and suemplat.F -! - gather 'lelam' code and 'not lelam' code. -! - clean (useless duplication of variables, non doctor features). -! - remodularise according to lelam/not lelam -! -> lelam features in new routine suemplatb.F, -! not lelam features in new routine sumplatb.F -! ------------------------------------------------------------------ - -USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT -USE TPM_GEOMETRY, ONLY: G -USE TPM_DISTR, ONLY: MYPROC -USE SUMPLATB_MOD, ONLY: SUMPLATB -USE SUMPLATBEQ_MOD, ONLY: SUMPLATBEQ -USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS -! - -IMPLICIT NONE - - -! * DUMMY: -REAL(KIND=JPRBT),INTENT(OUT) :: PMEDIAP -INTEGER(KIND=JPIM),INTENT(OUT) :: KMEDIAP -INTEGER(KIND=JPIM),INTENT(OUT) :: KRESTM -INTEGER(KIND=JPIM),INTENT(IN) :: KDGL -INTEGER(KIND=JPIM),INTENT(IN) :: KPROC -INTEGER(KIND=JPIM),INTENT(IN) :: KPROCA -INTEGER(KIND=JPIM),INTENT(IN) :: KMYSETA -REAL(KIND=JPRBT),INTENT(IN) :: PWEIGHT(:) -LOGICAL,INTENT(INOUT) :: LDWEIGHTED_DISTR -INTEGER(KIND=JPIM),INTENT(OUT) :: KFRSTLAT(:) -INTEGER(KIND=JPIM),INTENT(OUT) :: KLSTLAT(:) -INTEGER(KIND=JPIM),INTENT(OUT) :: KFRSTLOFF -INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRLAT(:) -INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRFRSTLAT(:) -INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRLSTLAT(:) -INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRFLOFF -INTEGER(KIND=JPIM),INTENT(OUT) :: KPROCAGP(KPROCA) -LOGICAL,INTENT(IN) :: LDSPLIT -LOGICAL,INTENT(IN) :: LDEQ_REGIONS -LOGICAL,INTENT(OUT) :: LDSPLITLAT(:) -INTEGER(KIND=JPIM),INTENT(IN) :: KMYPROC -INTEGER(KIND=JPIM),INTENT(IN) :: KLOEN(KDGL) - -! * LOCAL: -! === END OF INTERFACE BLOCK === -INTEGER(KIND=JPIM) :: INDIC(KPROCA),ILAST(KPROCA) - -! LOCAL INTEGER SCALARS -INTEGER(KIND=JPIM) :: IPTRLATITUDE, JA, JGL - -LOGICAL :: LLFOURIER -LOGICAL :: LLDEBUG=.FALSE. - -! ----------------------------------------------------------------- - -!* 1. CODE DEPENDING ON 'LELAM': COMPUTATION OF -! KMEDIAP, KRESTM, INDIC, ILAST. -! ----------------------------------------- -INDIC(:)=0 -ILAST(:)=0 - -IF(LDWEIGHTED_DISTR.AND..NOT.LDEQ_REGIONS)THEN - CALL ABORT_TRANS ('SUMPLAT: LDWEIGHTED_DISTR=T AND LDEQ_REGIONS=F NOT SUPPORTED') -ENDIF - -IF( LDEQ_REGIONS )THEN - CALL SUMPLATBEQ(1,KDGL,KPROC,KPROCA,KLOEN,LDSPLIT,LDEQ_REGIONS,& - &PWEIGHT,LDWEIGHTED_DISTR,PMEDIAP,KPROCAGP,& - &KMEDIAP,KRESTM,INDIC,ILAST) -ELSE - LLFOURIER=.FALSE. - CALL SUMPLATB(1,KDGL,KPROCA,KLOEN,LDSPLIT,LLFOURIER,& - &KMEDIAP,KRESTM,INDIC,ILAST) -ENDIF - -! ----------------------------------------------------------------- - -!* 2. CODE NOT DEPENDING ON 'LELAM': COMPUTATION OF -! KFRSTLAT TO LDSPLITLAT. -! --------------------------------------------- - - -! * Computation of first and last latitude of processor sets -! ----------- in grid-point-space ----------------------- - -IF(KMYPROC==1.AND.LLDEBUG)THEN - WRITE(0,'("")') - WRITE(0,'("SUMPLAT_MOD:LDWEIGHTED_DISTR=",L1)')LDWEIGHTED_DISTR - WRITE(0,'("")') - DO JA=1,KPROCA - WRITE(0,'("SUMPLAT_MOD: JA=",I5," ILAST=",I5," INDIC=",I5)')& - &JA,ILAST(JA),INDIC(JA) - ENDDO - WRITE(0,'("")') - IF( LDEQ_REGIONS .AND. LDSPLIT )THEN - DO JA=1,KPROCA - WRITE(0,'("SUMPLAT_MOD: JA=",I5," KPROCAGP=",I12)')& - &JA,KPROCAGP(JA) - ENDDO - WRITE(0,'("")') - ENDIF -ENDIF - -KFRSTLAT(1) = 1 -KLSTLAT(KPROCA) = KDGL -DO JA=1,KPROCA-1 - IF ((.NOT. LDSPLIT) .OR. INDIC(JA) == 0) THEN - KFRSTLAT(JA+1) = ILAST(JA) + 1 - KLSTLAT(JA) = ILAST(JA) - ELSE - KFRSTLAT(JA+1) = INDIC(JA) - KLSTLAT(JA) = INDIC(JA) - ENDIF -ENDDO -KFRSTLOFF=KFRSTLAT(KMYSETA)-1 - -! * Initialise following data structures:- -! NPTRLAT (pointer to the start of each latitude) -! LSPLITLAT (TRUE if latitude is split over two A sets) -! NPTRFRSTLAT (pointer to the first latitude of each A set) -! NPTRLSTLAT (pointer to the last latitude of each A set) - -DO JGL=1,KDGL - KPTRLAT (JGL)=-999 - LDSPLITLAT(JGL)=.FALSE. -ENDDO -IPTRLATITUDE=0 -DO JA=1,KPROCA - DO JGL=KFRSTLAT(JA),KLSTLAT(JA) - IPTRLATITUDE=IPTRLATITUDE+1 - LDSPLITLAT(JGL)=.TRUE. - IF( KPTRLAT(JGL) == -999 )THEN - KPTRLAT(JGL)=IPTRLATITUDE - LDSPLITLAT(JGL)=.FALSE. - ENDIF - ENDDO -ENDDO -DO JA=1,KPROCA - IF( LDSPLITLAT(KFRSTLAT(JA)) .AND. JA /= 1)THEN - KPTRFRSTLAT(JA)=KPTRLAT(KFRSTLAT(JA))+1 - ELSE - KPTRFRSTLAT(JA)=KPTRLAT(KFRSTLAT(JA)) - ENDIF - IF( LDSPLITLAT(KLSTLAT(JA)) .AND. JA == KPROCA)THEN - KPTRLSTLAT(JA)=KPTRLAT(KLSTLAT(JA))+1 - ELSE - KPTRLSTLAT(JA)=KPTRLAT(KLSTLAT(JA)) - ENDIF -ENDDO -KPTRFLOFF=KPTRFRSTLAT(KMYSETA)-1 - -IF(KMYPROC==1.AND.LLDEBUG)THEN - DO JGL=1,KDGL - WRITE(0,'("SUMPLAT_MOD: JGL=",I5," KPTRLAT=",I5," LDSPLITLAT=",L4)')& - & JGL,KPTRLAT(JGL),LDSPLITLAT(JGL) - ENDDO - DO JA=1,KPROCA - WRITE(0,'("SUMPLAT_MOD: JA=",I5," KFRSTLAT=",I5," KLSTLAT=",I5,& - & " KPTRFRSTLAT=",I5," KPTRLSTLAT=",I5," KLSTLAT-KFRSTLAT=",I5,& - & " SUM(G%NLOEN(KFRSTLAT:KLSTLAT))=",I10)')& - & JA,KFRSTLAT(JA),KLSTLAT(JA),KPTRFRSTLAT(JA),KPTRLSTLAT(JA),& - & KLSTLAT(JA)-KFRSTLAT(JA),SUM(G%NLOEN(KFRSTLAT(JA):KLSTLAT(JA))) - ENDDO -ENDIF - -! ------------------------------------------------------------------ - -END SUBROUTINE SUMPLAT -END MODULE SUMPLAT_MOD - - - diff --git a/src/trans/gpu/internal/sumplatb_mod.F90 b/src/trans/gpu/internal/sumplatb_mod.F90 deleted file mode 100755 index ab2954f0c..000000000 --- a/src/trans/gpu/internal/sumplatb_mod.F90 +++ /dev/null @@ -1,224 +0,0 @@ -! (C) Copyright 1998- ECMWF. -! (C) Copyright 1998- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE SUMPLATB_MOD -CONTAINS -SUBROUTINE SUMPLATB(KDGSA,KDGL,KPROCA,KLOENG,LDSPLIT,LDFOURIER,& - & KMEDIAP,KRESTM,KINDIC,KLAST) - -!**** *SUMPLATB * - Routine to initialize parallel environment - -! Purpose. -! -------- - - -!** Interface. -! ---------- -! *CALL* *SUMPLATB * - -! Explicit arguments - input : -! -------------------- -! KDGSA -first latitude (grid-space) -! (may be different from NDGSAG) -! KDGL -last latitude -! KPROCA -number of processors in A direction -! KLOENG -actual number of longitudes per latitude. -! LDSPLIT -true for latitudes shared between sets -! LDFOURIER -true for fourier space partitioning - -! Explicit arguments - output: -! -------------------- -! KMEDIAP -mean number of grid points per PE -! KRESTM -number of PEs with one extra point -! KINDIC -intermediate quantity for 'sumplat' -! KLAST -intermediate quantity for 'sumplat' - -! Implicit arguments : -! -------------------- - - -! Method. -! ------- -! See documentation - -! Externals. NONE. -! ---------- - -! Reference. -! ---------- -! ECMWF Research Department documentation of the IFS - -! Author. -! ------- -! K. YESSAD (after old version of sumplat.F). - -! Modifications. -! -------------- -! Original : 98-12-07 -! G. Mozdzynski (August 2012): rewrite of fourier latitude distribution -! ------------------------------------------------------------------ - - -USE PARKIND_ECTRANS, ONLY: JPIM, JPIB, JPRBT -!USE TPM_DISTR - -IMPLICIT NONE - - -! * DUMMY: -INTEGER(KIND=JPIM),INTENT(IN) :: KDGSA -INTEGER(KIND=JPIM),INTENT(IN) :: KDGL -INTEGER(KIND=JPIM),INTENT(IN) :: KPROCA -INTEGER(KIND=JPIM),INTENT(IN) :: KLOENG(KDGSA:KDGL) -LOGICAL,INTENT(IN) :: LDSPLIT -LOGICAL,INTENT(IN) :: LDFOURIER -INTEGER(KIND=JPIM),INTENT(OUT) :: KMEDIAP -INTEGER(KIND=JPIM),INTENT(OUT) :: KRESTM -INTEGER(KIND=JPIM),INTENT(OUT) :: KINDIC(KPROCA) -INTEGER(KIND=JPIM),INTENT(OUT) :: KLAST(KPROCA) - -! * LOCAL: -INTEGER(KIND=JPIB) :: ICOST(KDGSA:KDGL) -INTEGER(KIND=JPIM) :: ILATS(KPROCA) - -! LOCAL INTEGER SCALARS -INTEGER(KIND=JPIM) :: ICOMP, IGL, JA, JGL, ILAST, IREST, IA -INTEGER(KIND=JPIM) :: ITOT_TOP, ITOT_BOT, IGL_TOP, IGL_BOT -INTEGER(KIND=JPIB) :: IMEDIA,ITOT -REAL(KIND=JPRBT) :: ZLG -LOGICAL :: LLDONE,LLSIMPLE - -! ----------------------------------------------------------------- - -!* 1. COMPUTATION OF KMEDIAP, KRESTM, KINDIC, KLAST. -! ---------------------------------------------- - -! * Computation of KMEDIAP and KRESTM. - -IF( LDFOURIER )THEN - -! DO JGL=1,KDGL -! ZLG=LOG(FLOAT(KLOENG(JGL))) -! ICOST(JGL)=KLOENG(JGL)*ZLG*SQRT(ZLG) -! ENDDO - - DO JGL=1,KDGL - ICOST(JGL)=KLOENG(JGL) - ENDDO - -ELSE - - DO JGL=1,KDGL - ICOST(JGL)=KLOENG(JGL) - ENDDO - -ENDIF - -IMEDIA = SUM(ICOST(KDGSA:KDGL)) -KMEDIAP = IMEDIA / KPROCA -KRESTM = IMEDIA - KMEDIAP * KPROCA -IF (KRESTM > 0) KMEDIAP = KMEDIAP + 1 - -! * Computation of intermediate quantities KINDIC and KLAST - -KINDIC(:)=0 -KLAST(:)=0 - -IF (LDSPLIT) THEN - - IREST = 0 - ILAST =0 - DO JA=1,KPROCA - IF (JA <= KRESTM .OR. KRESTM == 0) THEN - ICOMP = KMEDIAP - ELSE - ICOMP = KMEDIAP - 1 - ENDIF - ITOT = IREST - IGL = ILAST+1 - DO JGL=IGL,KDGL - ILAST = JGL - IF(ITOT+ICOST(JGL) < ICOMP) THEN - ITOT = ITOT+ICOST(JGL) - ELSEIF(ITOT+ICOST(JGL) == ICOMP) THEN - IREST = 0 - KLAST(JA) = JGL - KINDIC(JA) = 0 - EXIT - ELSE - IREST = ICOST(JGL) -(ICOMP-ITOT) - KLAST(JA) = JGL - KINDIC(JA) = JGL - EXIT - ENDIF - ENDDO - ENDDO - -ELSE - - ITOT_TOP=0 - ITOT_BOT=0 - IGL_TOP=1 - IGL_BOT=KDGL - DO JA=1,(KPROCA-1)/2+1 - IF( JA /= KPROCA/2+1 )THEN - LLDONE=.TRUE. - DO WHILE ( LLDONE ) - IF( ITOT_TOP+ICOST(IGL_TOP) < KMEDIAP )THEN - KLAST(JA)=IGL_TOP - ITOT_TOP=ITOT_TOP+ICOST(IGL_TOP) - IGL_TOP=IGL_TOP+1 - ELSE - ITOT_TOP=ITOT_TOP-KMEDIAP - LLDONE=.FALSE. - ENDIF - ENDDO - KLAST(KPROCA-JA+1)=IGL_BOT - LLDONE=.TRUE. - DO WHILE ( LLDONE ) - IF( ITOT_BOT+ICOST(IGL_BOT) < KMEDIAP )THEN - ITOT_BOT=ITOT_BOT+ICOST(IGL_BOT) - IGL_BOT=IGL_BOT-1 - ELSE - ITOT_BOT=ITOT_BOT-KMEDIAP - LLDONE=.FALSE. - ENDIF - ENDDO - ELSE - KLAST(JA)=IGL_BOT - ENDIF - ENDDO - - LLSIMPLE=.FALSE. - DO JA=1,KPROCA - IF( KLAST(JA)==0 )THEN - LLSIMPLE=.TRUE. - EXIT - ENDIF - ENDDO - IF( LLSIMPLE )THEN -! WRITE(0,'("SUMPLATB_MOD: REVERTING TO SIMPLE LATITUDE DISTRIBUTION")') - ILATS(:)=0 - IA=0 - DO JGL=1,KDGL - IA=IA+1 - ILATS(IA)=ILATS(IA)+1 - IF( IA==KPROCA ) IA=0 - ENDDO - KLAST(1)=ILATS(1) - DO JA=2,KPROCA - KLAST(JA)=KLAST(JA-1)+ILATS(JA) - ENDDO - ENDIF - -ENDIF - -END SUBROUTINE SUMPLATB -END MODULE SUMPLATB_MOD diff --git a/src/trans/gpu/internal/sumplatbeq_mod.F90 b/src/trans/gpu/internal/sumplatbeq_mod.F90 deleted file mode 100755 index 88703bce0..000000000 --- a/src/trans/gpu/internal/sumplatbeq_mod.F90 +++ /dev/null @@ -1,288 +0,0 @@ -! (C) Copyright 2006- ECMWF. -! (C) Copyright 2006- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE SUMPLATBEQ_MOD -CONTAINS -SUBROUTINE SUMPLATBEQ(KDGSA,KDGL,KPROC,KPROCA,KLOENG,LDSPLIT,LDEQ_REGIONS,& - &PWEIGHT,LDWEIGHTED_DISTR,PMEDIAP,KPROCAGP,& - &KMEDIAP,KRESTM,KINDIC,KLAST) - -!**** *SUMPLATBEQ * - Routine to initialize parallel environment -! (latitude partitioning for LEQ_REGIONS=T) - -! Purpose. -! -------- - - -!** Interface. -! ---------- -! *CALL* *SUMPLATBEQ * - -! Explicit arguments - input : -! -------------------- -! KDGSA -first latitude (grid-space) -! (may be different from NDGSAG) -! KDGL -last latitude -! KPROC -total number of processors -! KPROCA -number of processors in A direction -! KLOENG -actual number of longitudes per latitude. -! LDSPLIT -true for latitudes shared between sets -! LDEQ_REGIONS -true if eq_regions partitioning -! PWEIGHT -weight per grid-point if weighted distribution -! LDWEIGHTED_DISTR -true if weighted distribution - -! Explicit arguments - output: -! -------------------- -! PMEDIAP -mean weight per PE if weighted distribution -! KMEDIAP -mean number of grid points per PE -! KPROCAGP -number of grid points per A set -! KRESTM -number of PEs with one extra point -! KINDIC -intermediate quantity for 'sumplat' -! KLAST -intermediate quantity for 'sumplat' - -! Implicit arguments : -! -------------------- - - -! Method. -! ------- -! See documentation - -! Externals. NONE. -! ---------- - -! Reference. -! ---------- -! ECMWF Research Department documentation of the IFS - -! Author. -! ------- -! G. Mozdzynski - -! Modifications. -! -------------- -! Original : April 2006 -! ------------------------------------------------------------------ - - -USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT -USE TPM_DISTR, ONLY: MYPROC -USE EQ_REGIONS_MOD, ONLY: N_REGIONS -USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS -! - -IMPLICIT NONE - - -! * DUMMY: -INTEGER(KIND=JPIM),INTENT(IN) :: KDGSA -INTEGER(KIND=JPIM),INTENT(IN) :: KDGL -INTEGER(KIND=JPIM),INTENT(IN) :: KPROC -INTEGER(KIND=JPIM),INTENT(IN) :: KPROCA -INTEGER(KIND=JPIM),INTENT(IN) :: KLOENG(KDGSA:KDGL) -REAL(KIND=JPRBT), INTENT(IN) :: PWEIGHT(:) -LOGICAL,INTENT(IN) :: LDSPLIT -LOGICAL,INTENT(IN) :: LDEQ_REGIONS -LOGICAL,INTENT(INOUT) :: LDWEIGHTED_DISTR -REAL(KIND=JPRBT), INTENT(OUT) :: PMEDIAP -INTEGER(KIND=JPIM),INTENT(OUT) :: KMEDIAP -INTEGER(KIND=JPIM),INTENT(OUT) :: KRESTM -INTEGER(KIND=JPIM),INTENT(OUT) :: KINDIC(KPROCA) -INTEGER(KIND=JPIM),INTENT(OUT) :: KLAST(KPROCA) -INTEGER(KIND=JPIM),INTENT(OUT) :: KPROCAGP(KPROCA) - -! * LOCAL: - -! LOCAL INTEGER SCALARS -INTEGER(KIND=JPIM) :: ICOMP, IGL, IMAXI, IMEDIA, IMEDIAP, ITOT, JA, JB, IA, JGL,& - &ILAST,IREST,IPE,I2REGIONS,IGP -REAL(KIND=JPRBT) :: ZMEDIA, ZCOMP -LOGICAL :: LLDONE - -! ----------------------------------------------------------------- - -!* 1. COMPUTATION OF KMEDIAP, KRESTM, KINDIC, KLAST. -! ---------------------------------------------- -100 CONTINUE -! * Computation of KMEDIAP and KRESTM. - -IF (.NOT.LDWEIGHTED_DISTR) THEN - - IMEDIA = SUM(KLOENG(KDGSA:KDGL)) - KMEDIAP = IMEDIA / KPROC - - IF( KPROC > 1 )THEN -! test if KMEDIAP is too small and no more than 2 asets would be required -! for the first latitude - IF( LDSPLIT )THEN - I2REGIONS=N_REGIONS(1)+N_REGIONS(2) - IF( KMEDIAP < (KLOENG(KDGSA)-1)/I2REGIONS+1 )THEN - WRITE(0,'("SUMPLATBEQ: KMEDIAP=",I6," I2REGIONS=",I3," KLOENG(KDGSA)=",I4)')& - &KMEDIAP,I2REGIONS,KLOENG(KDGSA) - CALL ABORT_TRANS ('SUMPLATBEQ: NPROC TOO BIG FOR THIS RESOLUTION, LDSPLIT=T') - ENDIF - ELSE -! test for number asets too large for the number of latitudes - IF( KPROCA > KDGL )THEN - WRITE(0,'("SUMPLATBEQ: KMEDIAP=",I6," KPROCA=",I4," KDGL=",I4)')& - &KMEDIAP,KPROCA,KDGL - CALL ABORT_TRANS ('SUMPLATBEQ: NPROC TOO BIG FOR THIS RESOLUTION, LDSPLIT=F') - ENDIF - ENDIF - ENDIF - - KRESTM = IMEDIA - KMEDIAP * KPROC - IF (KRESTM > 0) KMEDIAP = KMEDIAP + 1 - -ELSE - - ZMEDIA = SUM(PWEIGHT(:)) - PMEDIAP = ZMEDIA / KPROC - -ENDIF - -! * Computation of intermediate quantities KINDIC and KLAST - -IF (LDSPLIT) THEN - - KPROCAGP(:)=0 - IREST = 0 - ILAST =0 - IPE=0 - ZCOMP=0 - IGP=0 - DO JA=1,KPROCA - ICOMP=0 - DO JB=1,N_REGIONS(JA) - IF( LDWEIGHTED_DISTR )THEN - DO WHILE ( ( JA == KPROCA .OR. ZCOMP < PMEDIAP ) .AND. IGP < SIZE(PWEIGHT) ) - IGP = IGP + 1 - ICOMP = ICOMP + 1 - ZCOMP = ZCOMP + PWEIGHT(IGP) - ENDDO - ZCOMP = ZCOMP - PMEDIAP - ELSE - IPE=IPE+1 - IF (IPE <= KRESTM .OR. KRESTM == 0) THEN - ICOMP = ICOMP + KMEDIAP - ELSE - ICOMP = ICOMP + (KMEDIAP-1) - ENDIF - ENDIF - ENDDO - KPROCAGP(JA)=ICOMP - ITOT = IREST - IGL = ILAST+1 - DO JGL=IGL,KDGL - ILAST = JGL - IF(ITOT+KLOENG(JGL) < ICOMP) THEN - ITOT = ITOT+KLOENG(JGL) - ELSEIF(ITOT+KLOENG(JGL) == ICOMP) THEN - IREST = 0 - KLAST(JA) = JGL - KINDIC(JA) = 0 - EXIT - ELSE - IREST = KLOENG(JGL) -(ICOMP-ITOT) - KLAST(JA) = JGL - KINDIC(JA) = JGL - EXIT - ENDIF - ENDDO - ENDDO - IF( LDWEIGHTED_DISTR )THEN - IF( KLAST(KPROCA) /= KDGL )THEN - DO JA=1,KPROCA - IF( MYPROC == 1 )THEN - WRITE(0,'("SUMPLATBEQ_MOD: JA=",I3," KLAST=",I3," KINDIC=",I3)')& - &JA,KLAST(JA),KINDIC(JA) - ENDIF - ENDDO - WRITE(0,'("SUMPLATBEQ: LWEIGHTED_DISTR=T FAILED TO PARTITION GRID, REVERTING TO ",& - & " LWEIGHTED_DISTR=F PARTITIONING")') - LDWEIGHTED_DISTR=.FALSE. - GOTO 100 - ENDIF - ENDIF - IF( SUM(KPROCAGP(:)) /= SUM(KLOENG(KDGSA:KDGL)) )THEN - IF( MYPROC == 1 )THEN - WRITE(0,'("SUM(KPROCAGP(:))=",I12)')SUM(KPROCAGP(:)) - WRITE(0,'("SUM(KLOENG(:))=",I12)')SUM(KLOENG(KDGSA:KDGL)) - ENDIF - CALL ABORT_TRANS ('SUMPLATBEQ: PROBLEM IN PARTITIONING ') - ENDIF - -ELSE - - IF( LDWEIGHTED_DISTR )THEN - CALL ABORT_TRANS ('SUMPLATBEQ: LSPLIT=F NOT SUPPORTED FOR WEIGHTED DISTRIBUTION ') - ENDIF - - KINDIC(:) = 0 - LLDONE=.FALSE. - IMEDIAP=KMEDIAP - IF( MYPROC == 1 )THEN - WRITE(0,'("SUMPLATBEQ: IMEDIAP=",I6)')IMEDIAP - ENDIF - DO WHILE(.NOT.LLDONE) -! loop until a satisfactory distribution can be found - IA=1 - IMAXI=IMEDIAP*N_REGIONS(IA) - DO JGL=1,KDGL - KLAST(IA)=JGL - IMAXI=IMAXI-KLOENG(JGL) - IF( IA == KPROCA .AND. JGL == KDGL )THEN - IF( MYPROC == 1 )THEN - WRITE(0,'("SUMPLATBEQ: EXIT 1")') - ENDIF - EXIT - ENDIF - IF( IA == KPROCA .AND. JGL < KDGL )THEN - IF( MYPROC == 1 )THEN - WRITE(0,'("SUMPLATBEQ: EXIT 2")') - ENDIF - KLAST(KPROCA)=KDGL - EXIT - ENDIF - IF( IA < KPROCA .AND. JGL == KDGL )THEN - DO JA=KPROCA,IA+1,-1 - KLAST(JA)=KDGL+JA-KPROCA - ENDDO - DO JA=KPROCA,2,-1 - IF( KLAST(JA) <= KLAST(JA-1) )THEN - KLAST(JA-1)=KLAST(JA)-1 - ENDIF - ENDDO - IF( MYPROC == 1 )THEN - WRITE(0,'("SUMPLATBEQ: EXIT 3")') - ENDIF - EXIT - ENDIF - IF( IMAXI <= 0 )THEN - IA=IA+1 - IMAXI=IMAXI+IMEDIAP*N_REGIONS(IA) - ENDIF - ENDDO - IF( KPROCA > 1 .AND. KLAST(KPROCA) == KLAST(KPROCA-1) )THEN - IMEDIAP=IMEDIAP-1 - IF( MYPROC == 1 )THEN - WRITE(0,'("SUMPLATBEQ: REDUCING IMEDIAP=",I6)')IMEDIAP - ENDIF - IF( IMEDIAP <= 0 )THEN - CALL ABORT_TRANS ('SUMPLATBEQ: PROBLEM PARTITIONING WITH LSPLIT=F, IMEDIAP <= 0') - ENDIF - ELSE - LLDONE=.TRUE. - ENDIF - ENDDO -ENDIF - -END SUBROUTINE SUMPLATBEQ -END MODULE SUMPLATBEQ_MOD diff --git a/src/trans/gpu/internal/sumplatf_mod.F90 b/src/trans/gpu/internal/sumplatf_mod.F90 deleted file mode 100755 index 80d657293..000000000 --- a/src/trans/gpu/internal/sumplatf_mod.F90 +++ /dev/null @@ -1,148 +0,0 @@ -! (C) Copyright 1995- ECMWF. -! (C) Copyright 1995- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE SUMPLATF_MOD -CONTAINS -SUBROUTINE SUMPLATF(KDGL,KPROCA,KMYSETA,& - &KULTPP,KPROCL,KPTRLS) - -!**** *SUMPLATF * - Initialize fourier space distibution in N-S direction - -! Purpose. -! -------- - - -!** Interface. -! ---------- -! *CALL* *SUMPLATF * - -! Explicit arguments - input : -! -------------------- -! KDGL -last latitude -! KPROCA -number of processors in A direction -! KMYSETA -process number in A direction - -! Explicit arguments - output: -! -------------------- - -! KULTPP -number of latitudes in process -! (in Fourier space) -! KPROCL -process responsible for latitude -! (in Fourier space) -! KPTRLS -pointer to first global latitude -! of process (in Fourier space) - -! Implicit arguments : -! -------------------- - - -! Method. -! ------- -! See documentation - -! Externals. SUMPLATB and SUEMPLATB. -! ---------- - -! Reference. -! ---------- -! ECMWF Research Department documentation of the IFS - -! Author. -! ------- -! MPP Group *ECMWF* - -! Modifications. -! -------------- -! Original : 95-10-01 -! David Dent:97-06-02 parameters KFRSTLAT etc added -! JF. Estrade:97-11-13 Adaptation to ALADIN case -! J.Boutahar: 98-07-06 phasing with CY19 -! Modified 98-08-10 by K. YESSAD: removal of LRPOLE option + cleanings -! (correct computation of extrapolar latitudes for KPROCL). -! Modified 98-12-07 by K. YESSAD and C. FISCHER: cleaning. -! - merge old sumplat.F and suemplat.F -! - gather 'lelam' code and 'not lelam' code. -! - clean (useless duplication of variables, non doctor features). -! - remodularise according to lelam/not lelam -! -> lelam features in new routine suemplatb.F, -! not lelam features in new routine sumplatb.F -! ------------------------------------------------------------------ - -USE PARKIND1, ONLY: JPIM -USE TPM_GEOMETRY, ONLY: G -USE SUMPLATB_MOD, ONLY: SUMPLATB -! - -IMPLICIT NONE - -! * DUMMY: -INTEGER(KIND=JPIM),INTENT(IN) :: KDGL -INTEGER(KIND=JPIM),INTENT(IN) :: KPROCA -INTEGER(KIND=JPIM),INTENT(IN) :: KMYSETA -INTEGER(KIND=JPIM),INTENT(OUT) :: KULTPP(:) -INTEGER(KIND=JPIM),INTENT(OUT) :: KPROCL(:) -INTEGER(KIND=JPIM),INTENT(OUT) :: KPTRLS(:) - -! * LOCAL: -INTEGER(KIND=JPIM) :: INDIC(KPROCA),ILAST(KPROCA) - -! LOCAL INTEGER SCALARS -INTEGER(KIND=JPIM) :: IA, ILAT, ISTART, IMEDIAP,IRESTM, JA, JLTLOC - -LOGICAL :: LLSPLIT,LLFOURIER - -! ----------------------------------------------------------------- - -!* 1. CODE DEPENDING ON 'LELAM': COMPUTATION OF -! KMEDIAP, KRESTM, INDIC, ILAST. -! ----------------------------------------- - -LLSPLIT = .FALSE. -LLFOURIER = .TRUE. - -CALL SUMPLATB(1,KDGL,KPROCA,G%NLOEN,LLSPLIT,LLFOURIER,& - &IMEDIAP,IRESTM,INDIC,ILAST) - -! ----------------------------------------------------------------- - -!* 2. CODE NOT DEPENDING ON 'LELAM': -! ------------------------------ - - - -! * Definitions related to distribution of latitudes along sets -! ------------ in fourier-space ----------------------------- -ISTART = 0 -KULTPP(1) = ILAST(1) -DO JA=1,KPROCA - IF(JA > 1) THEN - IF(ILAST(JA) /= 0) THEN - KULTPP(JA) = ILAST(JA)-ILAST(JA-1) - ELSE - KULTPP(JA) = 0 - ENDIF - ENDIF - DO JLTLOC=1,KULTPP(JA) - ILAT = ISTART + JLTLOC - KPROCL(ILAT) = JA - ENDDO - ISTART = ISTART + KULTPP(JA) -ENDDO - -! * Computes KPTRLS. - -IA = KPROCL(1) -KPTRLS(IA) = 1 -DO JA=IA+1,KPROCA - KPTRLS(JA) = KPTRLS(JA-1) + KULTPP(JA-1) -ENDDO - -END SUBROUTINE SUMPLATF -END MODULE SUMPLATF_MOD diff --git a/src/trans/gpu/internal/supol_mod.F90 b/src/trans/gpu/internal/supol_mod.F90 deleted file mode 100755 index df6540069..000000000 --- a/src/trans/gpu/internal/supol_mod.F90 +++ /dev/null @@ -1,172 +0,0 @@ -! (C) Copyright 1987- ECMWF. -! (C) Copyright 1987- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE SUPOL_MOD -CONTAINS -SUBROUTINE SUPOL(KNSMAX,PDDMU,PFN,PDDPOL) - -!**** *SUPOL * - Routine to compute the Legendre polynomials - -! Purpose. -! -------- -! For a given value of mu, computes the Legendre polynomials. - -!** Interface. -! ---------- -! *CALL* *SUPOL(...) - -! Explicit arguments : -! -------------------- -! KNSMAX : Truncation (triangular) [in] -! PDDMU : Abscissa at which the polynomials are computed (mu) [in] -! PFN : Fourier coefficients of series expansion -! for the ordinary Legendre polynomials [in] -! PDDPOL : Polynomials (the first index is m and the second n) [out] - -! Implicit arguments : None -! -------------------- - -! Method. -! ------- -! See documentation about spectral transforms -! (doc (IDTS) by K. Yessad, appendix 3, or doc (NTA30) by M. Rochas) - -! Externals. -! ---------- - -! Reference. -! ---------- -! ECMWF Research Department documentation of the IFS - -! Author. -! ------- -! Mats Hamrud and Philippe Courtier *ECMWF* - -! Modifications. -! -------------- -! Original : 87-10-15 -! K. YESSAD (MAY 1998): modification to avoid underflow. -! M.Hamrud 01-Oct-2003 CY28 Cleaning -! R. El Khatib 11-Apr-2007 Emulation of vectorized quadruple precision -! on NEC -! K. YESSAD (NOV 2008): make consistent arp/SUPOLA and tfl/SUPOL. -! Nils Wedi + Mats Hamrud, 2009-02-05 revised following Swarztrauber, 2002 -! R. El Khatib 30-Apr-2013 Open-MP parallelization -! F. Vana 05-Mar-2015 Support for single precision -! ------------------------------------------------------------------ - -USE EC_PARKIND, ONLY: JPRD, JPIM -USE TPM_POL, ONLY: DDI, DDA, DDH, DDE, DDC, DDD - -IMPLICIT NONE - -INTEGER(KIND=JPIM),INTENT(IN) :: KNSMAX -REAL(KIND=JPRD) ,INTENT(IN) :: PDDMU -REAL(KIND=JPRD) ,INTENT(IN) :: PFN(0:KNSMAX,0:KNSMAX) - -REAL(KIND=JPRD) ,INTENT(OUT) :: PDDPOL(0:KNSMAX,0:KNSMAX) - -REAL(KIND=JPRD) :: ZDLX,ZDLX1,ZDLSITA,ZDL1SITA,ZDLS,ZDLK,ZDLLDN - -INTEGER(KIND=JPIM) :: JM, JN, JK -REAL(KIND=JPRD) :: Z - -! ------------------------------------------------------------------ - -!* 1. First two columns. -! ------------------ - -ZDLX=PDDMU -ZDLX1=ACOS(ZDLX) -ZDLSITA=SQRT(1.0_JPRD-ZDLX*ZDLX) - -PDDPOL(0,0)=1._JPRD -ZDLLDN = 0.0_JPRD - -! IF WE ARE LESS THAN 1Meter FROM THE POLE, -IF(ABS(REAL(ZDLSITA,KIND(Z))) <= SQRT(EPSILON(Z)))THEN - ZDLX=1._JPRD - ZDLSITA=0._JPRD - ZDL1SITA=0._JPRD -ELSE - ZDL1SITA=1.0_JPRD/ZDLSITA -ENDIF - -!* ordinary Legendre polynomials from series expansion -! --------------------------------------------------- - -! even N -!$OMP PARALLEL DO PRIVATE(JN,ZDLK,ZDLLDN,JK) -DO JN=2,KNSMAX,2 - ZDLK = 0.5_JPRD*PFN(JN,0) - ZDLLDN = 0.0_JPRD - ! represented by only even k - DO JK=2,JN,2 - ! normalised ordinary Legendre polynomial == \overbar{P_n}^0 - ZDLK = ZDLK + PFN(JN,JK)*COS(DDI(JK)*ZDLX1) - ! normalised associated Legendre polynomial == \overbar{P_n}^1 - ZDLLDN = ZDLLDN + DDA(JN)*PFN(JN,JK)*DDI(JK)*SIN(DDI(JK)*ZDLX1) - ENDDO - PDDPOL(0,JN) = ZDLK - PDDPOL(1,JN) = ZDLLDN -ENDDO -!$OMP END PARALLEL DO -! odd N -!$OMP PARALLEL DO PRIVATE(JN,ZDLK,ZDLLDN,JK) -DO JN=1,KNSMAX,2 - ZDLK = 0.0_JPRD - ZDLLDN = 0.0_JPRD - ! represented by only odd k - DO JK=1,JN,2 - ! normalised ordinary Legendre polynomial == \overbar{P_n}^0 - ZDLK = ZDLK + PFN(JN,JK)*COS(DDI(JK)*ZDLX1) - ! normalised associated Legendre polynomial == \overbar{P_n}^1 - ZDLLDN = ZDLLDN + DDA(JN)*PFN(JN,JK)*DDI(JK)*SIN(DDI(JK)*ZDLX1) - ENDDO - PDDPOL(0,JN) = ZDLK - PDDPOL(1,JN) = ZDLLDN -ENDDO -!$OMP END PARALLEL DO - -! ------------------------------------------------------------------ - -!* 2. Diagonal (the terms 0,0 and 1,1 have already been computed) -! Belousov, equation (23) -! ----------------------------------------------------------- - -ZDLS=ZDL1SITA*TINY(ZDLS) - -#ifdef VPP -!OCL SCALAR -#endif -DO JN=2,KNSMAX - PDDPOL(JN,JN)=PDDPOL(JN-1,JN-1)*ZDLSITA*DDH(JN) - IF ( ABS(PDDPOL(JN,JN)) < ZDLS ) PDDPOL(JN,JN)=0.0_JPRD -ENDDO - -! ------------------------------------------------------------------ - -!* 3. General recurrence (Belousov, equation 17) -! ----------------------------------------- - -DO JN=3,KNSMAX -!DIR$ IVDEP -!OCL NOVREC - DO JM=2,JN-1 - PDDPOL(JM,JN)=DDC(JM,JN)*PDDPOL(JM-2,JN-2)& - &-DDD(JM,JN)*PDDPOL(JM-2,JN-1)*ZDLX & - &+DDE(JM,JN)*PDDPOL(JM ,JN-1)*ZDLX - ENDDO -ENDDO - -! ------------------------------------------------------------------ - -END SUBROUTINE SUPOL -END MODULE SUPOL_MOD diff --git a/src/trans/gpu/internal/supolf_mod.F90 b/src/trans/gpu/internal/supolf_mod.F90 deleted file mode 100755 index 9c374b95d..000000000 --- a/src/trans/gpu/internal/supolf_mod.F90 +++ /dev/null @@ -1,283 +0,0 @@ -! (C) Copyright 1987- ECMWF. -! (C) Copyright 1987- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE SUPOLF_MOD -CONTAINS -SUBROUTINE SUPOLF(KM,KNSMAX,DDMU,DDPOL,KCHEAP) - -!**** *SUPOL * - Routine to compute the Legendre polynomials - -! Purpose. -! -------- -! For a given value of mu and M, computes the Legendre -! polynomials upto KNSMAX - -!** Interface. -! ---------- -! *CALL* *SUPOLF(KM,KNSMAX,DDMU,DDPOL,KCHEAP) - -! Explicit arguments : -! -------------------- -! KM : zonal wavenumber M -! KNSMAX : Truncation (triangular) -! DDMU : Abscissa at which the polynomials are computed (mu) -! DDPOL : Polynomials (the first index is m and the second n) -! KCHEAP : odd/even saving switch - - -! Implicit arguments : None -! -------------------- - -! Method. -! ------- -! See documentation - -! Externals. -! ---------- - -! Reference. -! ---------- -! ECMWF Research Department documentation of the IFS - -! Author. -! ------- -! Nils Wedi + George Mozdzynski + Mats Hamrud - -! Modifications. -! -------------- -! Original : 87-10-15 -! K. YESSAD (MAY 1998): modification to avoid underflow. -! R. El Khatib 11-Apr-2007 Emulation of vectorized quadruple precision -! on NEC -! F. Vana 05-Mar-2015 Support for single precision -! ------------------------------------------------------------------ - -USE EC_PARKIND, ONLY: JPRD, JPIM -USE TPM_POL, ONLY: DFI, DFB, DFG, DFA, DFF - -IMPLICIT NONE - -INTEGER(KIND=JPIM),INTENT(IN) :: KM -INTEGER(KIND=JPIM),INTENT(IN) :: KNSMAX -REAL(KIND=JPRD) ,INTENT(IN) :: DDMU -REAL(KIND=JPRD) ,INTENT(OUT) :: DDPOL(0:KNSMAX) - -INTEGER(KIND=JPIM),INTENT(IN), OPTIONAL :: KCHEAP - -REAL(KIND=JPRD) :: DLX,DLX1,DLSITA,DLSITA2,DL1SITA,DLK,DL1, DLKM1, DLKM2 - -INTEGER(KIND=JPIM), PARAMETER :: JPKD=KIND(DLX) - -INTEGER(KIND=JPIM) :: JN, KKL, ICHEAP, IC, IEND -REAL(KIND=JPRD) :: DCL, DDL - -REAL(KIND=JPRD) :: ZFAC, ZLSITA, ZFAC0, ZFAC1, ZMULT, ZEPS - -INTEGER(KIND=JPIM) :: JCORR, ICORR3, ICORR(KNSMAX) -REAL(KIND=JPRD) :: ZSCALE, ZISCALE - -DCL(KKL)=SQRT((REAL(KKL-KM+1,JPKD)*REAL(KKL-KM+2,JPKD)* & - & REAL(KKL+KM+1,JPKD)*REAL(KKL+KM+2,JPKD))/(REAL(2*KKL+1,JPKD)*REAL(2*KKL+3,JPKD)*& - & REAL(2*KKL+3,JPKD)*REAL(2*KKL+5,JPKD))) -DDL(KKL)=(2.0_JPKD*REAL(KKL,JPKD)*REAL(KKL+1,JPKD)-2.0_JPKD*REAL(KM**2,JPKD)-1.0_JPKD)/ & - & (REAL(2*KKL-1,JPKD)*REAL(2*KKL+3,JPKD)) - -! ------------------------------------------------------------------ - -!* 1. First two columns. -! ------------------ - -ZEPS = EPSILON(ZSCALE) -ICORR3=0 - -ICHEAP=1 -IF( PRESENT(KCHEAP) ) THEN - ICHEAP = KCHEAP -ENDIF - -DLX=DDMU -DLX1=ACOS(DLX) -DLSITA2=1.0_JPRD-DLX*DLX -DLSITA=SQRT(DLSITA2) - -!* ordinary Legendre polynomials from series expansion -! --------------------------------------------------- - -! this is supol_fast just using single KM -IF( ABS(REAL(DLSITA,JPRD)) <= ZEPS ) THEN - DLX=1._JPRD - DLSITA=0._JPRD - DL1SITA=0._JPRD - DLSITA2=0._JPRD -ELSE - DL1SITA=1.0_JPRD/DLSITA -ENDIF - -DLKM2=1._JPRD -DLKM1=DLX - -IF( KM == 0 ) THEN - DDPOL(0)=DLKM2 - DDPOL(1)=DLKM1*DFB(1)/DFA(1) - DO JN=2,KNSMAX - DLK=DFF(JN)*DLX*DLKM1-DFG(JN)*DLKM2 - DL1=DFI(JN)*(DLKM1-DLX*DLK)*DL1SITA - DDPOL(JN)=DLK*DFB(JN)/DFA(JN) - DLKM2=DLKM1 - DLKM1=DLK - ENDDO -ELSEIF( KM == 1 ) THEN - DDPOL(0)=0 - DDPOL(1)=DLSITA*DFB(1) - DO JN=2,KNSMAX - DLK=DFF(JN)*DLX*DLKM1-DFG(JN)*DLKM2 - DL1=DFI(JN)*(DLKM1-DLX*DLK)*DL1SITA - DDPOL(JN)=DL1*DFB(JN) - DLKM2=DLKM1 - DLKM1=DLK - ENDDO -ELSE - -! ------------------------------------------------------------------ -!* KM >= 2 -! ------------------------------------------------------------------ - -! ZSCALE=1._JPRD/ZEPS - ! Maintaining the consistency with the CY41R1 reference - ZSCALE=1.0E+100_JPRD - ZISCALE=1.0E-100_JPRD - ! General case - !ZSCALE = 10._JPRD**( MAXEXPONENT(ZSCALE)/10) - !ZISCALE = 10._JPRD**(-MAXEXPONENT(ZSCALE)/10) - - IEND=KM/2 - ZLSITA=1._JPRD -! WRITE(*,*) 'SUPOLF: DLSITA2=',DLSITA2,' DDMU=',DDMU,' DLX=',DLX - DO JN=1,IEND - ZLSITA=ZLSITA*DLSITA2 - IF( ABS(ZLSITA) < ZISCALE ) THEN - ZLSITA=ZLSITA*ZSCALE - ICORR3=ICORR3+1 - ENDIF - ENDDO - IF( MOD(KM,2) == 1 ) ZLSITA=ZLSITA*DLSITA -! WRITE(*,*) 'SUPOLF: ZSCALE=',ZSCALE,' ICORR3=',ICORR3,' KM=',KM,' ZLSITA=',ZLSITA - - ZFAC0=1._JPRD - ZFAC=1._JPRD - DO JN=1,KM-1 - ZFAC=ZFAC*SQRT(REAL(2*JN-1,JPRD)) - ZFAC=ZFAC/SQRT(REAL(2*JN,JPRD)) - ENDDO - ZFAC=ZFAC*SQRT(REAL(2*KM-1,JPRD)) -! WRITE(*,*) 'SUPOLF: ZSCALE=',ZSCALE,' ICORR3=',ICORR3,' ZFAC=',ZFAC - - ZFAC1=1._JPRD - DO IC=0,MIN(KNSMAX-KM,3) - - ! (2m+i)! - ZFAC0 = ZFAC0 * REAL(2*KM+IC,JPRD) - - SELECT CASE (IC) - CASE (0) - ZMULT=ZFAC - CASE (1) - ZFAC=ZFAC*REAL(2*KM+IC,JPRD) - ZMULT=ZFAC*DLX - CASE (2) - ZMULT=0.5_JPRD*ZFAC*(REAL(2*KM+3,JPRD)*DLX*DLX-1._JPRD) - CASE (3) - ZFAC=ZFAC*REAL(2*KM+IC,JPRD) - ZMULT=(1._JPRD/6._JPRD)*DLX*ZFAC*(REAL(2*KM+5,JPRD)*DLX*DLX-3._JPRD) - END SELECT - - DDPOL(KM+IC) = ZLSITA*ZMULT*SQRT(2._JPRD*(REAL(KM+IC,JPRD)+0.5_JPRD)*ZFAC1/ZFAC0) - - ZFAC1=ZFAC1*REAL(IC+1,JPRD) - - ENDDO - - ICORR(:)=ICORR3 - IF( ICHEAP == 2 ) THEN - ! symmetric case - DO JN=KM+2,KNSMAX-2,2 - - IF( ABS(DDPOL(JN-2)) > ZSCALE ) THEN - DDPOL(JN-2)=DDPOL(JN-2)/ZSCALE - DDPOL(JN)=DDPOL(JN)/ZSCALE - ICORR(JN-2:KNSMAX)=ICORR(JN-2:KNSMAX)-1 - ENDIF - - DDPOL(JN+2)=((DLX*DLX-DDL(JN))*DDPOL(JN)-DCL(JN-2)*DDPOL(JN-2))/DCL(JN) - ENDDO - - DO JN=KM,KNSMAX,2 - DO JCORR=1,ICORR(JN) - DDPOL(JN)=DDPOL(JN)/ZSCALE - IF( DDPOL(JN) < ZEPS ) THEN - DDPOL(JN) = ZEPS - ENDIF - ENDDO - ENDDO - - ELSEIF( ICHEAP == 3 ) THEN - ! antisymmetric case - DO JN=KM+3,KNSMAX-2,2 - - IF( ABS(DDPOL(JN-2)) > ZSCALE ) THEN - DDPOL(JN-2)=DDPOL(JN-2)/ZSCALE - DDPOL(JN)=DDPOL(JN)/ZSCALE - ICORR(JN-2:KNSMAX)=ICORR(JN-2:KNSMAX)-1 - ENDIF - - DDPOL(JN+2)=((DLX*DLX-DDL(JN))*DDPOL(JN)-DCL(JN-2)*DDPOL(JN-2))/DCL(JN) - ENDDO - - DO JN=KM+1,KNSMAX,2 - DO JCORR=1,ICORR(JN) - DDPOL(JN)=DDPOL(JN)/ZSCALE - IF( DDPOL(JN) < ZEPS ) THEN - DDPOL(JN) = ZEPS - ENDIF - ENDDO - ENDDO - - ELSE - DO JN=KM+2,KNSMAX-2 - - IF( ABS(DDPOL(JN-2)) > ZSCALE ) THEN - DDPOL(JN-2)=DDPOL(JN-2)/ZSCALE - DDPOL(JN-1)=DDPOL(JN-1)/ZSCALE - DDPOL(JN)=DDPOL(JN)/ZSCALE - DDPOL(JN+1)=DDPOL(JN+1)/ZSCALE - ICORR(JN-2:KNSMAX)=ICORR(JN-2:KNSMAX)-1 - ENDIF - - DDPOL(JN+2)=((DLX*DLX-DDL(JN))*DDPOL(JN)-DCL(JN-2)*DDPOL(JN-2))/DCL(JN) - - ENDDO - - DO JN=KM,KNSMAX - DO JCORR=1,ICORR(JN) - DDPOL(JN)=DDPOL(JN)/ZSCALE - IF( DDPOL(JN) < ZEPS ) THEN - DDPOL(JN) = ZEPS - ENDIF - ENDDO - ENDDO - - ENDIF - -ENDIF - -! ------------------------------------------------------------------ - -END SUBROUTINE SUPOLF -END MODULE SUPOLF_MOD diff --git a/src/trans/gpu/internal/sustaonl_mod.F90 b/src/trans/gpu/internal/sustaonl_mod.F90 deleted file mode 100755 index b5e744c93..000000000 --- a/src/trans/gpu/internal/sustaonl_mod.F90 +++ /dev/null @@ -1,454 +0,0 @@ -! (C) Copyright 1995- ECMWF. -! (C) Copyright 1995- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE SUSTAONL_MOD -CONTAINS -SUBROUTINE SUSTAONL(KMEDIAP,KRESTM,LDWEIGHTED_DISTR,PWEIGHT,PMEDIAP,KPROCAGP) - -!**** *SUSTAONL * - Routine to initialize parallel environment - -! Purpose. -! -------- -! Initialize D%NSTA and D%NONL. -! Calculation of distribution of grid points to processors : -! Splitting of grid in B direction - -!** Interface. -! ---------- -! *CALL* *SUSTAONL * - -! Explicit arguments : -! -------------------- -! KMEDIAP - mean number of grid points per PE -! KRESTM - number of PEs with one extra point -! LDWEIGHTED_DISTR -true if weighted distribution -! PWEIGHT -weight per grid-point if weighted distribution -! PMEDIAP -mean weight per PE if weighted distribution -! KPROCAGP -number of grid points per A set - -! Implicit arguments : -! -------------------- - - -! Method. -! ------- -! See documentation - -! Externals. NONE. -! ---------- - -! Reference. -! ---------- -! ECMWF Research Department documentation of the IFS - -! Author. -! ------- -! MPP Group *ECMWF* - -! Modifications. -! -------------- -! Original : 95-10-01 -! Modified 98-08-10 by K. YESSAD: removal of LRPOLE option. -! - removal of LRPOLE in YOMCT0. -! - removal of code under LRPOLE. -! Modified 98-12-04 C. Fischer: merge with SUESTAONL (Aladin) -! R. El Khatib 05-Apr-2007 Enable back vectorization on NEC -! R. El Khatib 30-Apr-2013 Optimization -! R. El Khatib 26-Apr-2018 vectorization -! ------------------------------------------------------------------ - -USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT, JPRD -USE MPL_MODULE, ONLY: MPL_ALLGATHERV, MPL_RECV, MPL_SEND -USE TPM_GEN, ONLY: NOUT, NPRINTLEV -USE TPM_DIM, ONLY: R -USE TPM_GEOMETRY, ONLY: G -USE TPM_DISTR, ONLY: D, LEQ_REGIONS, MTAGPART, NPRCIDS, MYPROC, NPROC -USE SET2PE_MOD, ONLY: SET2PE -USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS -USE EQ_REGIONS_MOD, ONLY: MY_REGION_NS, MY_REGION_EW, N_REGIONS, N_REGIONS_EW, N_REGIONS_NS -! - -IMPLICIT NONE - -! DUMMY -INTEGER(KIND=JPIM),INTENT(IN) :: KMEDIAP -INTEGER(KIND=JPIM),INTENT(IN) :: KRESTM -REAL(KIND=JPRBT),INTENT(IN) :: PWEIGHT(:) -LOGICAL,INTENT(IN) :: LDWEIGHTED_DISTR -REAL(KIND=JPRBT),INTENT(IN) :: PMEDIAP -INTEGER(KIND=JPIM),INTENT(IN) :: KPROCAGP(:) - -! LOCAL - -INTEGER(KIND=JPIM) :: IXPTLAT(R%NDGL), ILSTPTLAT(R%NDGL),ISENDREQ(NPROC) -INTEGER(KIND=JPIM) :: ICHK(R%NDLON,R%NDGL), ICOMBUF(R%NDGL*N_REGIONS_EW*2) -INTEGER(KIND=JPIM) :: I1, I2, IBUFLEN, IDGLG, IDWIDE,& - &IGL, IGL1, IGL2, IGLOFF, IGPTA, & - &IGPTPRSETS, IGPTS, IGPTSP, ILEN, ILRECV, & - &ILSEND, INPLAT, INXLAT, IPOS, & - &IPROCB, IPTSRE, IRECV, IPE, & - &IREST, ISEND, ITAG, JA, JB, JGL, JL, JNPTSRE, & - &ILAT, ILON, ILOEN -INTEGER(KIND=JPIM),ALLOCATABLE :: ICOMBUFG(:) -REAL(KIND=JPRBT),ALLOCATABLE :: ZWEIGHT(:,:) -INTEGER(KIND=JPIM) :: JJ, ILENG(NPROC), IOFF(NPROC) -REAL(KIND=JPRD) :: ZTIME0,ZTIME1,ZTIME2 - -LOGICAL :: LLABORT -LOGICAL :: LLP1,LLP2 - -REAL(KIND=JPRBT) :: ZCOMP,ZPI,ZLON -REAL(KIND=JPRBT) :: ZDIVID(R%NDGL) -INTEGER(KIND=JPIM) :: ILATMD,ILATMD1 - -! ----------------------------------------------------------------- - -ZPI = 2.0_JPRBT*ASIN(1.0_JPRBT) - -IXPTLAT (:)=999999 -ILSTPTLAT(:)=999999 - -LLP1 = NPRINTLEV>0 -LLP2 = NPRINTLEV>1 - -IDWIDE = R%NDGL/2 -IBUFLEN = R%NDGL*N_REGIONS_EW*2 -IDGLG = R%NDGL - -I1 = MAX( 1,D%NFRSTLAT(MY_REGION_NS)-D%NFRSTLOFF) -I2 = MIN(IDGLG,D%NLSTLAT (MY_REGION_NS)-D%NFRSTLOFF) - -ILEN = D%NLSTLAT(MY_REGION_NS) - D%NFRSTLAT(MY_REGION_NS)+1 - -IGPTPRSETS = SUM(G%NLOEN(1:D%NFRSTLAT(MY_REGION_NS)-1)) - -IF (D%LSPLIT) THEN - IF( LEQ_REGIONS )THEN - IGPTA=0 - DO JA=1,MY_REGION_NS-1 - IGPTA = IGPTA + KPROCAGP(JA) - ENDDO - IGPTS = KPROCAGP(MY_REGION_NS) - ELSE - IF (MY_REGION_NS <= KRESTM.OR.KRESTM == 0) THEN - IGPTS = KMEDIAP - IGPTA = KMEDIAP*(MY_REGION_NS-1) - ELSE - IGPTS = KMEDIAP-1 - IGPTA = KMEDIAP*KRESTM+IGPTS*(MY_REGION_NS-1-KRESTM) - ENDIF - ENDIF -ELSE - IGPTA = IGPTPRSETS - IGPTS = SUM(G%NLOEN(D%NFRSTLAT(MY_REGION_NS):D%NLSTLAT(MY_REGION_NS))) -ENDIF - -IGPTSP = IGPTS/N_REGIONS(MY_REGION_NS) -IREST = IGPTS-N_REGIONS(MY_REGION_NS)*IGPTSP -IXPTLAT(1) = IGPTA-IGPTPRSETS+1 -ILSTPTLAT(1) = G%NLOEN(D%NFRSTLAT(MY_REGION_NS)) -INPLAT = G%NLOEN(D%NFRSTLAT(MY_REGION_NS))-IXPTLAT(1)+1 -DO JGL=2,ILEN - IXPTLAT(JGL) = 1 - ILSTPTLAT(JGL) = G%NLOEN(D%NFRSTLAT(MY_REGION_NS)+JGL-1) - INPLAT = INPLAT+G%NLOEN(D%NFRSTLAT(MY_REGION_NS)+JGL-1) -ENDDO -ILSTPTLAT(ILEN) = G%NLOEN(D%NLSTLAT(MY_REGION_NS))-INPLAT+IGPTS - -DO JB=1,N_REGIONS_EW - DO JGL=1,R%NDGL+N_REGIONS_NS-1 - D%NSTA(JGL,JB) = 0 - D%NONL(JGL,JB) = 0 - ENDDO -ENDDO - - -! grid point decomposition -! --------------------------------------- -IF( NPROC > 1 )THEN - DO JGL=1,ILEN - ZDIVID(JGL) = 360000.0_JPRBT/REAL(G%NLOEN(D%NFRSTLAT(MY_REGION_NS)+JGL-1),JPRBT) - ENDDO - IF( LDWEIGHTED_DISTR )THEN - ALLOCATE(ZWEIGHT(G%NLOEN(R%NDGL/2),R%NDGL)) - IGL=0 - DO JGL=1,R%NDGL - DO JL=1,G%NLOEN(JGL) - IGL=IGL+1 - ZWEIGHT(JL,JGL)=PWEIGHT(IGL) - ENDDO - ENDDO - ZCOMP=0 - IGPTS=0 - ENDIF - - DO JB=1,N_REGIONS(MY_REGION_NS) - - IF( .NOT.LDWEIGHTED_DISTR )THEN - - IF (JB <= IREST) THEN - IPTSRE = IGPTSP+1 - ELSE - IPTSRE = IGPTSP - ENDIF - DO JNPTSRE=1,IPTSRE - - ILATMD = 360000 !! 360*1000 - DO JGL=1,ILEN - IF (IXPTLAT(JGL) <= ILSTPTLAT(JGL)) THEN - ILATMD1 = NINT(REAL(IXPTLAT(JGL)-1,JPRB)*ZDIVID(JGL)) - IF(ILATMD1 < ILATMD) THEN - ILATMD = ILATMD1 - INXLAT = JGL - ENDIF - ENDIF - ENDDO - - IF (INXLAT >= I1 .AND. INXLAT <= I2) THEN - IGL=D%NPTRFLOFF+INXLAT - IF (D%NSTA(IGL,JB) == 0) THEN - D%NSTA(IGL,JB) = IXPTLAT(INXLAT) - ENDIF - D%NONL(IGL,JB) = D%NONL(IGL,JB)+1 - ENDIF - IXPTLAT(INXLAT) = IXPTLAT(INXLAT)+1 - ENDDO - - ELSE - - DO WHILE ( (JB < N_REGIONS(MY_REGION_NS) .AND. ZCOMP < PMEDIAP) & - & .OR. (JB == N_REGIONS(MY_REGION_NS) .AND. IGPTS < KPROCAGP(MY_REGION_NS)) ) - - IGPTS = IGPTS + 1 - ILATMD = 360000 !! 360*1000 - DO JGL=1,ILEN - IF (IXPTLAT(JGL) <= ILSTPTLAT(JGL)) THEN - ILATMD1 = NINT(REAL(IXPTLAT(JGL)-1,JPRB)*ZDIVID(JGL)) - IF(ILATMD1 < ILATMD) THEN - ILATMD = ILATMD1 - INXLAT = JGL - ENDIF - ENDIF - ENDDO - - IF (INXLAT >= I1 .AND. INXLAT <= I2) THEN - IGL=D%NPTRFLOFF+INXLAT - IF (D%NSTA(IGL,JB) == 0) THEN - D%NSTA(IGL,JB) = IXPTLAT(INXLAT) - ENDIF - D%NONL(IGL,JB) = D%NONL(IGL,JB)+1 - IF(IGL<1.OR.IGL>R%NDGL+N_REGIONS_NS-1)THEN - CALL ABORT_TRANS(' SUSTAONL: IGL<1.OR.IGL>R%NDGL+N_REGIONS_NS-1') - ENDIF - ILON=D%NSTA(IGL,JB)+D%NONL(IGL,JB)-1 - ILAT=D%NFRSTLAT(MY_REGION_NS)+INXLAT-1 - ILOEN=G%NLOEN(ILAT) - IF(ILON<1.OR.ILON>ILOEN)THEN - CALL ABORT_TRANS(' SUSTAONL: ILON<1.OR.ILON>ILOEN') - ENDIF - ZCOMP = ZCOMP + ZWEIGHT(ILON,ILAT) - ENDIF - IXPTLAT(INXLAT) = IXPTLAT(INXLAT)+1 - ENDDO - - ZCOMP = ZCOMP - PMEDIAP - - ENDIF - - ENDDO - - IF( LDWEIGHTED_DISTR )THEN - DEALLOCATE(ZWEIGHT) - ENDIF - - ! Exchange local partitioning info to produce global view - ! - - CALL GSTATS_BARRIER(795) - CALL GSTATS(814,0) - IF( LEQ_REGIONS )THEN - - ITAG = MTAGPART - IPOS = 0 - DO JGL=1,D%NLSTLAT(MY_REGION_NS)-D%NFRSTLAT(MY_REGION_NS)+1 - IPOS = IPOS+1 - ICOMBUF(IPOS) = D%NSTA(D%NPTRFLOFF+JGL,MY_REGION_EW) - IPOS = IPOS+1 - ICOMBUF(IPOS) = D%NONL(D%NPTRFLOFF+JGL,MY_REGION_EW) - ENDDO - IF( IPOS > IBUFLEN )THEN - CALL ABORT_TRANS(' SUSTAONL: SEND BUFFER TOO SMALL FOR GLOBAL INFO') - ENDIF - ILSEND = IPOS - - DO JA=1,N_REGIONS_NS - DO JB=1,N_REGIONS(JA) - CALL SET2PE(IRECV,JA,JB,0,0) - ILEN = (D%NLSTLAT(JA)-D%NFRSTLAT(JA)+1)*2 - ILENG(NPRCIDS(IRECV))=ILEN - ENDDO - ENDDO - IOFF(1)=0 - DO JJ=2,NPROC - IOFF(JJ)=IOFF(JJ-1)+ILENG(JJ-1) - ENDDO - ALLOCATE(ICOMBUFG(SUM(ILENG(:)))) - CALL MPL_ALLGATHERV(ICOMBUF(1:ILSEND),ICOMBUFG,ILENG,CDSTRING='SUSTAONL') - DO JA=1,N_REGIONS_NS - IGL1 = D%NFRSTLAT(JA) - IGL2 = D%NLSTLAT(JA) - DO JB=1,N_REGIONS(JA) - CALL SET2PE(IRECV,JA,JB,0,0) - IF(IRECV /= MYPROC) THEN - ILEN = (D%NLSTLAT(JA)-D%NFRSTLAT(JA)+1)*2 - IPOS = IOFF(NPRCIDS(IRECV)) - DO JGL=IGL1,IGL2 - IGL = D%NPTRFRSTLAT(JA)+JGL-IGL1 - IPOS = IPOS+1 - D%NSTA(IGL,JB) = ICOMBUFG(IPOS) - IPOS = IPOS+1 - D%NONL(IGL,JB) = ICOMBUFG(IPOS) - ENDDO - ENDIF - ENDDO - ENDDO - DEALLOCATE(ICOMBUFG) - - ELSE - - ITAG = MTAGPART - IPOS = 0 - DO JB=1,N_REGIONS(MY_REGION_NS) - DO JGL=1,D%NLSTLAT(MY_REGION_NS)-D%NFRSTLAT(MY_REGION_NS)+1 - IPOS = IPOS+1 - ICOMBUF(IPOS) = D%NSTA(D%NPTRFLOFF+JGL,JB) - IPOS = IPOS+1 - ICOMBUF(IPOS) = D%NONL(D%NPTRFLOFF+JGL,JB) - ENDDO - ENDDO - IF( IPOS > IBUFLEN )THEN - CALL ABORT_TRANS(' SUSTAONL: SEND BUFFER TOO SMALL FOR GLOBAL INFO') - ENDIF - ILSEND = IPOS - DO JA=1,N_REGIONS_NS - CALL SET2PE(ISEND,JA,MY_REGION_EW,0,0) - IF(ISEND /= MYPROC) THEN - CALL MPL_SEND(ICOMBUF(1:ILSEND),KDEST=NPRCIDS(ISEND),KTAG=ITAG, & - & CDSTRING='SUSTAONL:') - ENDIF - ENDDO - - DO JA=1,N_REGIONS_NS - CALL SET2PE(IRECV,JA,MY_REGION_EW,0,0) - IF(IRECV /= MYPROC) THEN - ILEN = (D%NLSTLAT(JA)-D%NFRSTLAT(JA)+1)*N_REGIONS(JA)*2 - CALL MPL_RECV(ICOMBUF(1:ILEN),KSOURCE=NPRCIDS(IRECV),KTAG=ITAG, & - & KOUNT=ILRECV,CDSTRING='SUSTAONL:') - IGL1 = D%NFRSTLAT(JA) - IGL2 = D%NLSTLAT(JA) - IPOS = 0 - DO JB=1,N_REGIONS(JA) - DO JGL=IGL1,IGL2 - IGL = D%NPTRFRSTLAT(JA)+JGL-IGL1 - IPOS = IPOS+1 - D%NSTA(IGL,JB) = ICOMBUF(IPOS) - IPOS = IPOS+1 - D%NONL(IGL,JB) = ICOMBUF(IPOS) - ENDDO - ENDDO - ENDIF - ENDDO - - ENDIF - CALL GSTATS(814,1) - CALL GSTATS_BARRIER2(795) -ELSE - DO JGL=1,R%NDGL - D%NSTA(JGL,1) = 1 - D%NONL(JGL,1) = G%NLOEN(JGL) - ENDDO -ENDIF - -! Confirm consistency of global partitioning, specifically testing for -! multiple assignments of same grid point and unassigned grid points - -LLABORT = .FALSE. -DO JGL=1,R%NDGL - DO JL=1,G%NLOEN(JGL) - ICHK(JL,JGL) = 1 - ENDDO -ENDDO -DO JA=1,N_REGIONS_NS - IGLOFF = D%NPTRFRSTLAT(JA) - DO JB=1,N_REGIONS(JA) - IGL1 = D%NFRSTLAT(JA) - IGL2 = D%NLSTLAT(JA) - DO JGL=IGL1,IGL2 - IGL = IGLOFF+JGL-IGL1 - DO JL=D%NSTA(IGL,JB),D%NSTA(IGL,JB)+D%NONL(IGL,JB)-1 - IF( ICHK(JL,JGL) /= 1 )THEN - WRITE(NOUT,'(" SUSTAONL : seta=",i4," setb=",i4,& - &" row=",I4," sta=",I4," INVALID GRID POINT")')& - &JA,JB,JGL,JL - WRITE(0,'(" SUSTAONL : seta=",i4," setb=",i4,& - &" ROW=",I4," sta=",I4," INVALID GRID POINT")')& - &JA,JB,JGL,JL - LLABORT = .TRUE. - ENDIF - ICHK(JL,JGL) = 2 - ENDDO - ENDDO - ENDDO -ENDDO -DO JGL=1,R%NDGL - DO JL=1,G%NLOEN(JGL) - IF( ICHK(JL,JGL) /= 2 )THEN - WRITE(NOUT,'(" SUSTAONL : row=",i4," sta=",i4,& - &" GRID POINT NOT ASSIGNED")') JGL,JL - LLABORT = .TRUE. - ENDIF - ENDDO -ENDDO -IF( LLABORT )THEN - WRITE(NOUT,'(" SUSTAONL : inconsistent partitioning")') - CALL ABORT_TRANS(' SUSTAONL: inconsistent partitioning') -ENDIF - - -IF (LLP1) THEN - WRITE(UNIT=NOUT,FMT='('' OUTPUT FROM ROUTINE SUSTAONL '')') - WRITE(UNIT=NOUT,FMT='('' '')') - WRITE(UNIT=NOUT,FMT='('' PARTITIONING INFORMATION '')') - WRITE(UNIT=NOUT,FMT='('' '')') - IPROCB = MIN(32,N_REGIONS_EW) - WRITE(UNIT=NOUT,FMT='(17X," SETB=",32(1X,I5))') (JB,JB=1,IPROCB) - DO JA=1,N_REGIONS_NS - IPROCB = MIN(32,N_REGIONS(JA)) - WRITE(UNIT=NOUT,FMT='('' '')') - IGLOFF = D%NPTRFRSTLAT(JA) - IGL1 = D%NFRSTLAT(JA) - IGL2 = D%NLSTLAT(JA) - DO JGL=IGL1,IGL2 - IGL=IGLOFF+JGL-IGL1 - WRITE(UNIT=NOUT,FMT='(" SETA=",I5," LAT=",I5," NSTA=",& - &32(1X,I5))') JA,JGL,(D%NSTA(IGL,JB),JB=1,IPROCB) - WRITE(UNIT=NOUT,FMT='(" SETA=",I5," LAT=",I5," D%NONL=",& - &32(1X,I5))') JA,JGL,(D%NONL(IGL,JB),JB=1,IPROCB) - ENDDO - WRITE(UNIT=NOUT,FMT='('' '')') - ENDDO - WRITE(UNIT=NOUT,FMT='('' '')') - WRITE(UNIT=NOUT,FMT='('' '')') -ENDIF - -! ------------------------------------------------------------------ - -END SUBROUTINE SUSTAONL -END MODULE SUSTAONL_MOD diff --git a/src/trans/gpu/internal/suwavedi_mod.F90 b/src/trans/gpu/internal/suwavedi_mod.F90 deleted file mode 100755 index 8f87010c6..000000000 --- a/src/trans/gpu/internal/suwavedi_mod.F90 +++ /dev/null @@ -1,186 +0,0 @@ -! (C) Copyright 1996- ECMWF. -! (C) Copyright 1996- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE SUWAVEDI_MOD -CONTAINS -SUBROUTINE SUWAVEDI(KSMAX,KTMAX,KPRTRW,KMYSETW,KASM0,KSPOLEGL,KPROCM,& - &KUMPP,KSPEC,KSPEC2,KSPEC2MX,KPOSSP,KMYMS,& - &KPTRMS,KALLMS,KDIM0G) - -!**** *SUWAVEDI * - Routine to initialize spectral wave distribution - -! Purpose. -! -------- -! Initialize arrays controlling spectral wave distribution - -!** Interface. -! ---------- -! *CALL* *SUWAVEDI * - -! Explicit arguments : -! -------------------- -! KSMAX - Spectral truncation limit (input) -! KTMAX - Overtruncation for KSMAX (input) -! KPRTRW - Number of processors in A-direction (input) -! KMYSETW - A-set for present processor (input) -! KASM0 - Offsets for spectral waves (output) -! KSPOLEGL - Local version of NSPOLEG (output) -! KPROCM - Where a certain spectral wave belongs (output) -! KUMPP - Number of spectral waves on this PE (output) -! KSPEC - Local version on NSPEC (output) -! KSPEC2 - Local version on NSPEC2 (output) -! KSPEC2MX - Maximum KSPEC2 across PEs (output) -! KPOSSP - Global spectral fields partitioning (output) -! KMYMS - This PEs spectral zonal wavenumbers (output) -! KPTRMS - Pointer to the first wave number of a given a-set (output) -! KALLMS - Wave numbers for all wave-set concatenated together -! to give all wave numbers in wave-set order (output) - -! Implicit arguments : NONE -! -------------------- - -! Method. -! ------- -! See documentation - -! Externals. NONE. -! ---------- - -! Reference. -! ---------- -! ECMWF Research Department documentation of the IFS - -! Author. -! ------- -! MPP Group *ECMWF* - -! Modifications. -! -------------- -! Original : 96-01-10 -! L.Isaksen: 96-02-02 - Calculation of KSPEC2MX added -! K.YESSAD : 97-02-18 - Add KTMAX, bug correction for KSPOLEGL. -! ------------------------------------------------------------------ - -USE EC_PARKIND, ONLY: JPIM - -IMPLICIT NONE - - -! DUMMY -INTEGER(KIND=JPIM),INTENT(IN) :: KSMAX -INTEGER(KIND=JPIM),INTENT(IN) :: KTMAX -INTEGER(KIND=JPIM),INTENT(IN) :: KPRTRW -INTEGER(KIND=JPIM),INTENT(IN) :: KMYSETW -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPEC -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPEC2 -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPEC2MX -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPOLEGL - -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KASM0(0:KSMAX) -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KPROCM(0:KSMAX) -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KUMPP(KPRTRW) -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KMYMS(KSMAX+1) -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KPOSSP(KPRTRW+1) -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KPTRMS(KPRTRW) -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KALLMS(KSMAX+1) -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KDIM0G(0:KSMAX) - -! LOCAL -INTEGER(KIND=JPIM) :: IK, IL, IND, IPOS, ISPEC2P, JA, JM,JMLOC,IM -INTEGER(KIND=JPIM) :: ISPOLEGL,ISPEC2MX,IASM0(0:KSMAX),IPROCM(0:KSMAX) -INTEGER(KIND=JPIM) :: IUMPP(KPRTRW),IMYMS(KSMAX+1),IPOSSP(KPRTRW+1) -INTEGER(KIND=JPIM) :: IPTRMS(KPRTRW),IALLMS(KSMAX+1),IDIM0G(0:KSMAX) -INTEGER(KIND=JPIM) :: ISPEC(KPRTRW),IC(KPRTRW) - - -! ----------------------------------------------------------------- - -!* 1. Initialize partitioning of wave numbers to PEs -! ---------------------------------------------- - -ISPEC(:) = 0 - -IUMPP(:) = 0 -IASM0(:) = -99 -ISPOLEGL = 0 - -IL = 1 -IND = 1 -IK = 0 -IPOS = 1 -DO JM=0,KSMAX - IK = IK + IND - IF (IK > KPRTRW) THEN - IK = KPRTRW - IND = -1 - ELSEIF (IK < 1) THEN - IK = 1 - IND = 1 - ENDIF - IPROCM(JM) = IK - ISPEC(IK) = ISPEC(IK)+KSMAX-JM+1 - IUMPP(IK) = IUMPP(IK)+1 - IF (IK == KMYSETW) THEN - ISPOLEGL = ISPOLEGL +KTMAX+1-JM+1 - IMYMS(IL) = JM - IASM0(JM) = IPOS - IPOS = IPOS+(KSMAX-JM+1)*2 - IL = IL+1 - ENDIF -ENDDO - -IPOSSP(1) = 1 -ISPEC2P = 2*ISPEC(1) -ISPEC2MX = ISPEC2P -IPTRMS(1) = 1 -DO JA=2,KPRTRW - IPOSSP(JA) = IPOSSP(JA-1)+ISPEC2P - ISPEC2P = 2*ISPEC(JA) - ISPEC2MX = MAX(ISPEC2MX,ISPEC2P) -! pointer to the first wave number of a given wave-set in NALLMS array - IPTRMS(JA) = IPTRMS(JA-1)+IUMPP(JA-1) -ENDDO -IPOSSP(KPRTRW+1) = IPOSSP(KPRTRW)+ISPEC2P - -! IALLMS : wave numbers for all wave-set concatenated together to give all -! wave numbers in wave-set order. -IC(:) = 0 -DO JM=0,KSMAX - IALLMS(IC(IPROCM(JM))+IPTRMS(IPROCM(JM))) = JM - IC(IPROCM(JM)) = IC(IPROCM(JM))+1 -ENDDO - -IPOS = 1 -DO JA=1,KPRTRW - DO JMLOC=1,IUMPP(JA) - IM = IALLMS(IPTRMS(JA)+JMLOC-1) - IDIM0G(IM) = IPOS - IPOS = IPOS+(KSMAX+1-IM)*2 - ENDDO -ENDDO - -IF(PRESENT(KSPEC)) KSPEC = ISPEC(KMYSETW) -IF(PRESENT(KSPEC2)) KSPEC2 = 2*ISPEC(KMYSETW) -IF(PRESENT(KSPEC2MX)) KSPEC2MX = ISPEC2MX -IF(PRESENT(KSPOLEGL)) KSPOLEGL = ISPOLEGL - -IF(PRESENT(KASM0)) KASM0(:) = IASM0(:) -IF(PRESENT(KPROCM)) KPROCM(:) = IPROCM(:) -IF(PRESENT(KUMPP)) KUMPP(:) = IUMPP(:) -IF(PRESENT(KMYMS)) KMYMS(:) = IMYMS(:) -IF(PRESENT(KPOSSP)) KPOSSP(:) = IPOSSP(:) -IF(PRESENT(KPTRMS)) KPTRMS(:) = IPTRMS(:) -IF(PRESENT(KALLMS)) KALLMS(:) = IALLMS(:) -IF(PRESENT(KDIM0G)) KDIM0G(:) = IDIM0G(:) - -END SUBROUTINE SUWAVEDI -END MODULE SUWAVEDI_MOD - - diff --git a/src/trans/gpu/internal/tpm_constants.F90 b/src/trans/gpu/internal/tpm_constants.F90 deleted file mode 100755 index 67ee2e571..000000000 --- a/src/trans/gpu/internal/tpm_constants.F90 +++ /dev/null @@ -1,20 +0,0 @@ -! (C) Copyright 2000- ECMWF. -! (C) Copyright 2000- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE TPM_CONSTANTS -USE PARKIND_ECTRANS, ONLY: JPRBT - -IMPLICIT NONE - -SAVE - -REAL(KIND=JPRBT) :: RA ! Radius of Earth - -END MODULE TPM_CONSTANTS diff --git a/src/trans/gpu/internal/tpm_ctl.F90 b/src/trans/gpu/internal/tpm_ctl.F90 deleted file mode 100755 index 6f218ab82..000000000 --- a/src/trans/gpu/internal/tpm_ctl.F90 +++ /dev/null @@ -1,41 +0,0 @@ -! (C) Copyright 2000- ECMWF. -! (C) Copyright 2000- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE TPM_CTL - -USE SHAREDMEM_MOD, ONLY: SHAREDMEM -IMPLICIT NONE - -SAVE - - -TYPE CTL_TYPE - -LOGICAL :: LREAD_LEGPOL = .FALSE. -LOGICAL :: LWRITE_LEGPOL = .FALSE. -CHARACTER(LEN=256) :: CLEGPOLFNAME='legpol_file' -CHARACTER(LEN=4) :: CIO_TYPE='file' -TYPE(SHAREDMEM) :: STORAGE - -END TYPE CTL_TYPE - - -TYPE(CTL_TYPE),ALLOCATABLE,TARGET :: CTL_RESOL(:) -TYPE(CTL_TYPE),POINTER :: C - - -END MODULE TPM_CTL - - - - - - - diff --git a/src/trans/gpu/internal/tpm_dim.F90 b/src/trans/gpu/internal/tpm_dim.F90 deleted file mode 100755 index 83f760913..000000000 --- a/src/trans/gpu/internal/tpm_dim.F90 +++ /dev/null @@ -1,58 +0,0 @@ -! (C) Copyright 2000- ECMWF. -! (C) Copyright 2000- Meteo-France. -! (C) Copyright 2022- NVIDIA. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE TPM_DIM - -! Module for dimensions. - -USE PARKIND1, ONLY: JPIM - -IMPLICIT NONE - -SAVE - -TYPE DIM_TYPE -! SPECTRAL SPACE DIMENSIONS - - INTEGER(KIND=JPIM) :: NSMAX ! Truncation order - INTEGER(KIND=JPIM) :: NTMAX ! Truncation order for tendencies - INTEGER(KIND=JPIM) :: NSPOLEG ! Number of Legandre polynomials - INTEGER(KIND=JPIM) :: NSPEC_G ! Number of complex spectral coefficients (global) - INTEGER(KIND=JPIM) :: NSPEC2_G ! 2*NSPEC_G - -! COLLOCATION GRID DIMENSIONS - - INTEGER(KIND=JPIM) :: NDGL ! Number of rows of latitudes - INTEGER(KIND=JPIM) :: NDLON ! Maximum number of longitude points (near equator) - INTEGER(KIND=JPIM) :: NDGNH ! Number of rows in northern hemisphere - -! Legendre transform dimensions - INTEGER(KIND=JPIM) :: NLEI1 ! R%NSMAX+4+MOD(R%NSMAX+4+1,2) - INTEGER(KIND=JPIM) :: NLEI3 ! R%NDGNH+MOD(R%NDGNH+2,2) - INTEGER(KIND=JPIM) :: NLED3 ! R%NTMAX+2+MOD(R%NTMAX+3,2) - INTEGER(KIND=JPIM) :: NLED4 ! R%NTMAX+3+MOD(R%NTMAX+4,2) - -! Width of E'-zone - INTEGER(KIND=JPIM) :: NNOEXTZL ! Longitude direction - INTEGER(KIND=JPIM) :: NNOEXTZG ! Latitude direction - -END TYPE DIM_TYPE - -TYPE(DIM_TYPE),ALLOCATABLE,TARGET :: DIM_RESOL(:) -TYPE(DIM_TYPE),POINTER :: R - -! flat copies of above -INTEGER(KIND=JPIM) :: R_NSMAX ! Truncation order -INTEGER(KIND=JPIM) :: R_NTMAX ! Truncation order for tendencies -INTEGER(KIND=JPIM) :: R_NDGNH ! Number of rows in northern hemisphere -INTEGER(KIND=JPIM) :: R_NDGL ! Number of rows of latitudes - -END MODULE TPM_DIM diff --git a/src/trans/gpu/internal/tpm_distr.F90 b/src/trans/gpu/internal/tpm_distr.F90 deleted file mode 100755 index be744bf40..000000000 --- a/src/trans/gpu/internal/tpm_distr.F90 +++ /dev/null @@ -1,195 +0,0 @@ -! (C) Copyright 2000- ECMWF. -! (C) Copyright 2000- Meteo-France. -! (C) Copyright 2022- NVIDIA. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE TPM_DISTR - -! Module for distributed memory environment. - -USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT - -IMPLICIT NONE - -SAVE - -!* Variables describing distributed memory parallelization - -INTEGER(KIND=JPIM) :: NPROC ! Number of processors (NPRGPNS*NPRGPEW) -INTEGER(KIND=JPIM) :: NPRGPNS ! No. of sets in N-S direction (grid-point space) -INTEGER(KIND=JPIM) :: NPRGPEW ! No. of sets in E-W direction (grid-point space) -INTEGER(KIND=JPIM) :: NPRTRW ! No. of sets in wave direction (spectral space) -INTEGER(KIND=JPIM) :: NPRTRV ! NPROC/NPRTRW -INTEGER(KIND=JPIM) :: NPRTRNS ! No. of sets in N-S direction (Fourier space) - ! (always equal to NPRTRW) -LOGICAL :: LEQ_REGIONS ! TRUE - Use new eq_regions partitioning - ! FALSE- Use old NPRGPNS x NPRGPEW partitioning -INTEGER(KIND=JPIM) :: MYPROC ! My processor number -INTEGER(KIND=JPIM) :: MYSETW ! My set number in wave direction (spectral space) -INTEGER(KIND=JPIM) :: MYSETV ! My set number in field direction(S.S and F.S) - -INTEGER(KIND=JPIM) :: MTAGLETR ! Tag -INTEGER(KIND=JPIM) :: MTAGML ! Tag -INTEGER(KIND=JPIM) :: MTAGLG ! Tag -INTEGER(KIND=JPIM) :: MTAGGL ! Tag -INTEGER(KIND=JPIM) :: MTAGPART ! Tag -INTEGER(KIND=JPIM) :: MTAGDISTSP ! Tag -INTEGER(KIND=JPIM) :: MTAGLM ! Tag -INTEGER(KIND=JPIM) :: MTAGDISTGP ! Tag - -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPRCIDS(:) ! Array containing the process ids - -TYPE DISTR_TYPE -LOGICAL :: LGRIDONLY ! TRUE - only grid space structures are available -LOGICAL :: LWEIGHTED_DISTR ! TRUE - weighted distribution -LOGICAL :: LSPLIT ! TRUE - latitudes are shared between a-sets -LOGICAL :: LCPNMONLY ! TRUE - Compute Legendre polynomials only, not FFTs - -! SPECTRAL SPACE - -INTEGER(KIND=JPIM) :: NUMP ! No. of spectral waves handled by this processor -INTEGER(KIND=JPIM) :: NSPEC ! No. of complex spectral coefficients (on this PE) -INTEGER(KIND=JPIM) :: NSPEC2 ! 2*NSPEC -INTEGER(KIND=JPIM) :: NSPEC2MX ! maximun NSPEC2 among all PEs -INTEGER(KIND=JPIM) :: NTPEC2 ! cf. NSPEC2 but for truncation NTMAX -INTEGER(KIND=JPIM) :: NUMTP ! cf. NUMP but for truncation NTMAX - -INTEGER(KIND=JPIM) :: NSPOLEGL ! No. of legendre polynomials on this PE -INTEGER(KIND=JPIM) :: NLEI3D ! (NLEI3-1)/NPRTRW+1 - -INTEGER(KIND=JPIM) ,ALLOCATABLE :: MYMS(:) ! Wave numbers handled by this PE -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NUMPP(:) ! No. of wave numbers each wave set is - ! responsible for -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPOSSP(:) ! Not needed in transform? -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPROCM(:) ! Process that does the calc. for certain - ! wavenumber M -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NDIM0G(:) ! Defines partitioning of global spectral - ! fields among PEs - -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NASM0(:) ! Address in a spectral array of (m, n=m) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NATM0(:) ! Same as NASM0 but for NTMAX -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NALLMS(:) ! Wave numbers for all a-set concatenated - ! together to give all wave numbers in a-set - ! order. Used when global spectral norms - ! have to be gathered. -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPTRMS(:) ! Pointer to the first wave number of a given - ! a-set in nallms array. - - -! Legendre polynomials - -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLATLS(:,:) ! First latitude for which each a-set,bset calcul. -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLATLE(:,:) ! Last latitude for which each a-set,bset calcul. - -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPMT(:) ! Adress for legendre polynomial for - ! given M (NTMAX) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPMS(:) ! Adress for legendre polynomial for - ! given M (NSMAX) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPMG(:) ! Global version of NPMS - -! FOURIER SPACE - -INTEGER(KIND=JPIM) :: NDGL_FS ! Number of rows of latitudes for which this process is - ! performing Fourier Space calculations - -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NSTAGTF(:) ! Offset for specific latitude in - ! Fourier/gridpoint buffer -INTEGER(KIND=JPIM) :: NLENGTF ! Second dimension of Fourier/gridpoint buffer - ! (sum of (NLOEN+3) over local latitudes) - -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NULTPP(:) ! No of lats. for each wave_set (F.S) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPROCL(:) ! Process responsible for each lat. (F.S) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPTRLS(:) ! Pointer to first lat. (F.S) - -! NSTAGT0B to NLENGT1B: help arrays for spectral to fourier space transposition -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NSTAGT0B(:) ! Start adresses for segments within buffer - ! (according to processors to whom data - ! is going to be sent) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NSTAGT1B(:) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPNTGTB0(:,:) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPNTGTB1(:,:) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLTSFTB(:) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLTSGTB(:) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: MSTABF(:) -INTEGER(KIND=JPIM) :: NLENGT0B ! dimension -INTEGER(KIND=JPIM) :: NLENGT1B ! dimension - -! GRIDPOINT SPACE - -INTEGER(KIND=JPIM) :: NDGL_GP ! D%NLSTLAT(MY_REGION_NS)-D%NFRSTLOFF -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NFRSTLAT(:) ! First lat of each a-set -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLSTLAT(:) ! Last lat of each a-set -INTEGER(KIND=JPIM) :: NFRSTLOFF ! Offset for first lat of own a-set - ! i.e. NFRSTLOFF=NFRSTLAT(MYSETA)-1 -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPTRLAT(:) ! Pointer to start of latitude -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPTRFRSTLAT(:) ! Pointer to the first latitude of each - ! a-set in NSTA and NONL arrays -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPTRLSTLAT(:) ! Pointer to the last latitude of each - ! a-set in NSTA and NONL arrays -INTEGER(KIND=JPIM) :: NPTRFLOFF ! Offset for pointer to the first latitude of own a-set - ! NSTA and NONL arrays, i.e. NPTRFRSTLAT(MYSETA)-1 -LOGICAL ,ALLOCATABLE :: LSPLITLAT(:) ! True if latitude is split over 2 a-sets - -! NSTA(R%NDGL+NPRGPNS-1,NPRGPEW) : Position of first grid column -! for the latitudes on a processor. The information is -! available for all processors. The b-sets are distinguished -! by the last dimension of NSTA(). The latitude band for -! each a-set is addressed by NPTRFRSTLAT(JASET), -! NPTRLSTLAT(JASET), and NPTRFLOFF=NPTRFRSTLAT(MYSETA) on -! this processors a-set. Each split latitude has two entries -! in NSTA(,:) which necessitates the rather complex -! addressing of NSTA(,:) and the overdimensioning of NSTA by -! NPRGPNS. -! NONL(R%NDGL+NPRGPNS-1,NPRGPEW) : Number of grid columns for -! the latitudes on a processor. Similar to NSTA() in data -! structure. -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NSTA(:,:) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NONL(:,:) - -INTEGER(KIND=JPIM) :: NGPTOT ! Total number of grid columns on this PE -INTEGER(KIND=JPIM) :: NGPTOTG ! Total number of grid columns on the Globe -INTEGER(KIND=JPIM) :: NGPTOTMX ! Maximum number of grid columns on any of the PEs -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NGPTOTL(:,:) ! Number of grid columns on each PE. - -REAL(KIND=JPRBT) ,ALLOCATABLE :: RWEIGHT(:) ! Weight per grid-point (if weighted distribution) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NPROCA_GP(:) ! Number of grid-points per a-set - -INTEGER(KIND=JPIM), ALLOCATABLE :: OFFSETS_GEMM1(:), OFFSETS_GEMM2(:) - -END TYPE DISTR_TYPE - -!flat versions of the above -INTEGER(KIND=JPIM) :: D_NUMP ! No. of spectral waves handled by this processor -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_MYMS(:) ! Wave numbers handled by this PE -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NSTAGT0B(:) ! Start adresses for segments within buffer - ! (according to processors to whom data - ! is going to be sent) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NSTAGT1B(:) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NPROCL(:) ! Process responsible for each lat. (F.S) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NPNTGTB1(:,:) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NASM0(:) ! Address in a spectral array of (m, n=m) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NSTAGTF(:) ! Offset for specific latitude in -INTEGER(KIND=JPIM) :: D_NDGL_FS ! Number of rows of latitudes for which this process is - ! performing Fourier Space calculations -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_MSTABF(:) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NPNTGTB0(:,:) -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NPROCM(:) ! Process that does the calc. for certain -INTEGER(KIND=JPIM) ,ALLOCATABLE :: D_NPTRLS(:) ! Pointer to first lat. (F.S) - - -! The offsets in the input and output arrays to the gemms. -! (1) are the offsets in the "inputs" of dirtrans ("outputs" invtrans) -! (2) are the offsets in the "outputs" of invtrans ("inputs" dirtrans) -INTEGER(KIND=JPIM), POINTER :: D_OFFSETS_GEMM1(:), D_OFFSETS_GEMM2(:) - -TYPE(DISTR_TYPE),ALLOCATABLE,TARGET :: DISTR_RESOL(:) -TYPE(DISTR_TYPE),POINTER :: D - -END MODULE TPM_DISTR - diff --git a/src/trans/gpu/internal/tpm_gen.F90 b/src/trans/gpu/internal/tpm_gen.F90 deleted file mode 100755 index 2a8b42c8c..000000000 --- a/src/trans/gpu/internal/tpm_gen.F90 +++ /dev/null @@ -1,45 +0,0 @@ -! (C) Copyright 2000- ECMWF. -! (C) Copyright 2000- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE TPM_GEN - -! Module for general control variables. - -USE PARKIND_ECTRANS, ONLY: JPIM - -IMPLICIT NONE - -SAVE - -INTEGER(KIND=JPIM) :: NOUT ! Unit number for "standard" output -INTEGER(KIND=JPIM) :: NERR ! Unit number for error messages -INTEGER(KIND=JPIM) :: NPRINTLEV ! Printing level, 0=no print, 1=standard,2=debug - -INTEGER(KIND=JPIM) :: MSETUP0 = 0 ! Control of setup calls -INTEGER(KIND=JPIM) :: NMAX_RESOL = 0 ! Maximum allowed number of resolutions -INTEGER(KIND=JPIM) :: NCUR_RESOL = 0 ! Current resolution -INTEGER(KIND=JPIM) :: NDEF_RESOL = 0 ! Number of defined resolutions -INTEGER(KIND=JPIM) :: NPROMATR ! Packet size for transform (in no of fields) - ! NPROMATR=0 means do all fields together (dflt) - -LOGICAL :: LALLOPERM ! Allocate some shared data structures permanently -LOGICAL :: LMPOFF ! true: switch off message passing -LOGICAL :: LSYNC_TRANS ! true: activate barriers in trmtol and trltom - -! Use of synchronization/blocking in Transpose (some networks do get flooded) -! 0 = Post IRECVs up-front, use ISENDs, use WAITANY to recv data (default) -! 1 = Use ISENDs, use blocking RECVs, add barrier at the end of each cycle -! 2 = Use buffered SENDs, use blocking RECVs, add barrier at the end of each cycle -INTEGER(KIND=JPIM) :: NTRANS_SYNC_LEVEL = 0 - -LOGICAL, ALLOCATABLE :: LENABLED(:) ! true: the resolution is enabled (it has been - ! initialised and has not been released afterward) - -END MODULE TPM_GEN diff --git a/src/trans/gpu/internal/tpm_geometry.F90 b/src/trans/gpu/internal/tpm_geometry.F90 deleted file mode 100755 index 93f888911..000000000 --- a/src/trans/gpu/internal/tpm_geometry.F90 +++ /dev/null @@ -1,45 +0,0 @@ -! (C) Copyright 2000- ECMWF. -! (C) Copyright 2000- Meteo-France. -! (C) Copyright 2022- NVIDIA. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE TPM_GEOMETRY - -! Module containing data describing Gaussian grid. - -USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT - -IMPLICIT NONE - -SAVE - -TYPE GEOM_TYPE -INTEGER(KIND=JPIM),ALLOCATABLE :: NLOEN(:) ! NUMBER OF POINTS ON A PARALLEL -INTEGER(KIND=JPIM),ALLOCATABLE :: NMEN(:) ! ASSOCIATED CUT-OFF WAVE NUMBER -INTEGER(KIND=JPIM),ALLOCATABLE :: NDGLU(:) ! NUMBER OF HEMISPERIC LATITUDES -! FOR A GIVEN WAVE NUMBER M - -LOGICAL :: LAM ! LAM geometry if T, Global geometry if F -LOGICAL :: LREDUCED_GRID ! Reduced Gaussian grid if T -! quadratic Gaussian grid otherwise. -REAL(KIND=JPRBT) :: RSTRET ! Stretching factor (for Legendre polynomials -! computed on stretched latitudes only) -END TYPE GEOM_TYPE - -!flat copies of the above -INTEGER(KIND=JPIM),ALLOCATABLE :: G_NDGLU(:) ! NUMBER OF HEMISPERIC LATITUDES -INTEGER(KIND=JPIM),ALLOCATABLE :: G_NMEN(:) ! ASSOCIATED CUT-OFF WAVE NUMBER -INTEGER(KIND=JPIM) :: G_NMEN_MAX -INTEGER(KIND=JPIM),ALLOCATABLE :: G_NLOEN(:) ! NUMBER OF POINTS ON A PARALLEL -INTEGER(KIND=JPIM) :: G_NLOEN_MAX - -TYPE(GEOM_TYPE),ALLOCATABLE,TARGET :: GEOM_RESOL(:) -TYPE(GEOM_TYPE),POINTER :: G - -END MODULE TPM_GEOMETRY diff --git a/src/trans/gpu/internal/tpm_pol.F90 b/src/trans/gpu/internal/tpm_pol.F90 deleted file mode 100755 index b1f7ed222..000000000 --- a/src/trans/gpu/internal/tpm_pol.F90 +++ /dev/null @@ -1,120 +0,0 @@ -! (C) Copyright 2000- ECMWF. -! (C) Copyright 2000- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE TPM_POL - -! MODIFICATIONS. -! -------------- -! R. El Khatib 17-Feb-2016 Optional allocation/computation of DDC/DDD/DDE -! since they are (big and) not used in supolf. - -USE EC_PARKIND, ONLY: JPRD, JPIM - -IMPLICIT NONE - -SAVE - -REAL(KIND=JPRD),ALLOCATABLE :: DDC(:,:), DDD(:,:), DDE(:,:) -REAL(KIND=JPRD),ALLOCATABLE :: DDA(:), DDI(:), DDH(:) - -REAL(KIND=JPRD),ALLOCATABLE :: DFA(:), DFB(:), DFF(:), DFG(:), DFI(:), DFH(:) - -CONTAINS -!====================================================================== -SUBROUTINE INI_POL(KNSMAX,LDFAST) - -INTEGER(KIND=JPIM), INTENT(IN) :: KNSMAX -LOGICAL, INTENT(IN), OPTIONAL :: LDFAST - -REAL(KIND=JPRD) :: DC,DD,DE -INTEGER(KIND=JPIM) :: KKN, KKM - -INTEGER(KIND=JPIM) :: JN, JM -LOGICAL :: LLFAST - -DC(KKN,KKM)=SQRT( (REAL(2*KKN+1,JPRD)*REAL(KKN+KKM-1,JPRD)& - &*REAL(KKN+KKM-3,JPRD))& - &/ (REAL(2*KKN-3,JPRD)*REAL(KKN+KKM,JPRD)& - &*REAL(KKN+KKM-2,JPRD)) ) -DD(KKN,KKM)=SQRT( (REAL(2*KKN+1,JPRD)*REAL(KKN+KKM-1,JPRD)& - &*REAL(KKN-KKM+1,JPRD))& - &/ (REAL(2*KKN-1,JPRD)*REAL(KKN+KKM,JPRD)& - &*REAL(KKN+KKM-2,JPRD)) ) -DE(KKN,KKM)=SQRT( (REAL(2*KKN+1,JPRD)*REAL(KKN-KKM,JPRD))& - &/ (REAL(2*KKN-1,JPRD)*REAL(KKN+KKM,JPRD)) ) - -IF (PRESENT(LDFAST)) THEN - LLFAST=LDFAST -ELSE - LLFAST=.FALSE. -ENDIF -IF (.NOT.LLFAST) ALLOCATE( DDC(0:KNSMAX,0:KNSMAX) ) -IF (.NOT.LLFAST) ALLOCATE( DDD(0:KNSMAX,0:KNSMAX) ) -IF (.NOT.LLFAST) ALLOCATE( DDE(0:KNSMAX,0:KNSMAX) ) - -ALLOCATE( DDA(0:KNSMAX) ) -ALLOCATE( DDI(0:KNSMAX) ) -ALLOCATE( DDH(0:KNSMAX) ) - -ALLOCATE( DFA(0:KNSMAX) ) -ALLOCATE( DFB(0:KNSMAX) ) -ALLOCATE( DFF(0:KNSMAX) ) -ALLOCATE( DFG(0:KNSMAX) ) -ALLOCATE( DFI(0:KNSMAX) ) -ALLOCATE( DFH(0:KNSMAX) ) - - -DO JN=1,KNSMAX - DFA(JN) = 1._JPRD/SQRT(REAL(JN*(JN+1),JPRD)) - DFB(JN) = SQRT(REAL(2*JN+1,JPRD)/REAL(JN*(JN+1),JPRD)) - DFF(JN) = REAL(2*JN-1,JPRD)/REAL(JN,JPRD) - DFG(JN) = REAL(JN-1,JPRD)/REAL(JN,JPRD) - DFI(JN) = REAL(JN,JPRD) - DFH(JN) = SQRT(REAL(2*JN+1,JPRD)/REAL(2*JN,JPRD)) -ENDDO - -IF (.NOT.LLFAST) THEN - DO JN=3,KNSMAX - DO JM=2,JN-1 - DDC(JM,JN) = DC(JN,JM) - DDD(JM,JN) = DD(JN,JM) - DDE(JM,JN) = DE(JN,JM) - ENDDO - ENDDO -ENDIF - -DO JN=1,KNSMAX - DDA(JN) = 1._JPRD/SQRT(REAL(JN*(JN+1),JPRD)) - DDI(JN) = REAL(JN,JPRD) - DDH(JN) = SQRT(REAL(2*JN+1,JPRD)/REAL(2*JN,JPRD)) -ENDDO - -END SUBROUTINE INI_POL - -SUBROUTINE END_POL - -IF (ALLOCATED (DDC) ) DEALLOCATE( DDC ) -IF (ALLOCATED (DDD) ) DEALLOCATE( DDD ) -IF (ALLOCATED (DDE) ) DEALLOCATE( DDE ) - -DEALLOCATE( DDA ) -DEALLOCATE( DDI ) -DEALLOCATE( DDH ) - -DEALLOCATE( DFA ) -DEALLOCATE( DFB ) -DEALLOCATE( DFF ) -DEALLOCATE( DFG ) -DEALLOCATE( DFI ) -DEALLOCATE( DFH ) - -END SUBROUTINE END_POL - -END MODULE TPM_POL diff --git a/src/trans/gpu/sharedmem/sharedmem.c b/src/trans/gpu/sharedmem/sharedmem.c deleted file mode 100644 index 29426cea7..000000000 --- a/src/trans/gpu/sharedmem/sharedmem.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * (C) Copyright 2015- ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - - -#include - -void sharedmem_malloc_bytes (void** ptr, size_t bytes) -{ - *ptr = malloc(bytes); -} - -void sharedmem_free(void** ptr) -{ - free(*ptr); -} - -void sharedmem_advance_bytes (void** ptr, size_t bytes) -{ - char** char_ptr = (char**)ptr; - *char_ptr += bytes; -} diff --git a/src/trans/gpu/sharedmem/sharedmem_mod.F90 b/src/trans/gpu/sharedmem/sharedmem_mod.F90 deleted file mode 100644 index bb28a489b..000000000 --- a/src/trans/gpu/sharedmem/sharedmem_mod.F90 +++ /dev/null @@ -1,314 +0,0 @@ -! (C) Copyright 2015- ECMWF. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE SHAREDMEM_MOD - -! Routines to allow use of shared memery segments in Fortran - - -! Willem Deconinck and Mats Hamrud *ECMWF* -! Original : July 2015 - - -USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR, C_INT, C_NULL_PTR,C_SIZE_T - -#ifdef __NEC__ -#define C_SIZEOF(x) INT(KIND(x),C_SIZE_T) -#endif - -IMPLICIT NONE - -PRIVATE - -PUBLIC :: SHAREDMEM -PUBLIC :: SHAREDMEM_ALLOCATE -PUBLIC :: SHAREDMEM_MALLOC_BYTES -PUBLIC :: SHAREDMEM_CREATE -PUBLIC :: SHAREDMEM_ASSOCIATE -PUBLIC :: SHAREDMEM_ADVANCE -PUBLIC :: SHAREDMEM_DELETE - -TYPE, BIND(C) :: SHAREDMEM -! Memory buffer - TYPE(C_PTR), PRIVATE :: BEGIN=C_NULL_PTR - INTEGER(C_SIZE_T), PRIVATE :: SIZE=0 ! IN BYTES - TYPE(C_PTR), PRIVATE :: CPTR=C_NULL_PTR - INTEGER(C_SIZE_T), PRIVATE :: OFFSET=0 ! IN BYTES -END TYPE SHAREDMEM - - -INTERFACE SHAREDMEM_ASSOCIATE -! Associate fortran scalars/arrays with memory segment - MODULE PROCEDURE SHAREDMEM_ASSOCIATE0_INT32 - MODULE PROCEDURE SHAREDMEM_ASSOCIATE0_REAL32 - MODULE PROCEDURE SHAREDMEM_ASSOCIATE0_REAL64 - MODULE PROCEDURE SHAREDMEM_ASSOCIATE1_INT32 - MODULE PROCEDURE SHAREDMEM_ASSOCIATE1_REAL32 - MODULE PROCEDURE SHAREDMEM_ASSOCIATE1_REAL64 - MODULE PROCEDURE SHAREDMEM_ASSOCIATE2_INT32 - MODULE PROCEDURE SHAREDMEM_ASSOCIATE2_REAL32 - MODULE PROCEDURE SHAREDMEM_ASSOCIATE2_REAL64 -END INTERFACE - - -INTERFACE - -! EXTERNAL C FUNCTIONS USED IN THIS MODULE -! ---------------------------------------- - - SUBROUTINE SHAREDMEM_ADVANCE_BYTES(CPTR,BYTES) BIND(C) - USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR, C_SIZE_T - TYPE(C_PTR) :: CPTR - INTEGER(C_SIZE_T), VALUE :: BYTES - END SUBROUTINE SHAREDMEM_ADVANCE_BYTES - - SUBROUTINE SHAREDMEM_MALLOC_BYTES(PTR,BYTES) BIND(C) - USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR, C_SIZE_T - TYPE(C_PTR) :: PTR - INTEGER(C_SIZE_T), VALUE :: BYTES - END SUBROUTINE SHAREDMEM_MALLOC_BYTES - - SUBROUTINE SHAREDMEM_FREE(PTR) BIND(C) - USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR - TYPE(C_PTR), INTENT(IN) :: PTR - END SUBROUTINE SHAREDMEM_FREE - -END INTERFACE - -CONTAINS -!========================================================================= -SUBROUTINE SHAREDMEM_CREATE(HANDLE,CPTR,BYTES) -! Create memory buffer object from c pointer -USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR, C_SIZE_T, C_F_POINTER -TYPE(SHAREDMEM), INTENT(OUT) :: HANDLE -TYPE(C_PTR) , INTENT(IN) :: CPTR -INTEGER(C_SIZE_T), INTENT(IN) :: BYTES -!------------------------------------------------------------------------ -HANDLE%BEGIN = CPTR -HANDLE%SIZE = BYTES -HANDLE%CPTR = HANDLE%BEGIN -HANDLE%OFFSET = 0 -END SUBROUTINE SHAREDMEM_CREATE -!========================================================================= -SUBROUTINE SHAREDMEM_ALLOCATE(HANDLE,BYTES) -! Create memory buffer object from Fortran -USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_SIZE_T -TYPE(SHAREDMEM), INTENT(OUT) :: HANDLE -INTEGER(C_SIZE_T), INTENT(IN) :: BYTES -INTEGER(C_SIZE_T) :: SIZE -!------------------------------------------------------------------------ -SIZE = BYTES -CALL SHAREDMEM_MALLOC_BYTES(HANDLE%BEGIN,SIZE) -HANDLE%SIZE = BYTES -HANDLE%CPTR = HANDLE%BEGIN -HANDLE%OFFSET = 0 -END SUBROUTINE SHAREDMEM_ALLOCATE -!========================================================================= -SUBROUTINE SHAREDMEM_DELETE(HANDLE) -! Free memory buffer -TYPE(SHAREDMEM), INTENT(OUT) :: HANDLE -CALL SHAREDMEM_FREE(HANDLE%BEGIN) -END SUBROUTINE SHAREDMEM_DELETE -!========================================================================= - -! PRIVATE SUBROUTINES -! ------------------- - -SUBROUTINE SHAREDMEM_ASSOCIATE0_INT32(HANDLE,VALUE,ADVANCE) - USE, INTRINSIC :: ISO_C_BINDING - TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE - INTEGER(C_INT), INTENT(OUT) :: VALUE - LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE - INTEGER(C_INT), POINTER :: FPTR(:) - INTEGER(C_INT) :: K - - CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/1/) ) - VALUE = FPTR(1) - - IF( PRESENT(ADVANCE) ) THEN - IF( ADVANCE ) THEN - CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,C_SIZEOF(K)) - HANDLE%OFFSET = HANDLE%OFFSET+C_SIZEOF(K) - ENDIF - ENDIF - -END SUBROUTINE SHAREDMEM_ASSOCIATE0_INT32 - -SUBROUTINE SHAREDMEM_ASSOCIATE0_REAL32(HANDLE,VALUE,ADVANCE) - USE, INTRINSIC :: ISO_C_BINDING - TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE - REAL(C_FLOAT), INTENT(OUT) :: VALUE - LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE - REAL(C_FLOAT), POINTER :: FPTR(:) - REAL(C_FLOAT) :: R - - CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/1/) ) - VALUE = FPTR(1) - - IF( PRESENT(ADVANCE) ) THEN - IF( ADVANCE ) THEN - CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,C_SIZEOF(R)) - HANDLE%OFFSET = HANDLE%OFFSET+C_SIZEOF(R) - - ENDIF - ENDIF - -END SUBROUTINE SHAREDMEM_ASSOCIATE0_REAL32 - -SUBROUTINE SHAREDMEM_ASSOCIATE0_REAL64(HANDLE,VALUE,ADVANCE) - USE, INTRINSIC :: ISO_C_BINDING - TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE - REAL(C_DOUBLE), INTENT(OUT) :: VALUE - LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE - REAL(C_DOUBLE), POINTER :: FPTR(:) - REAL(C_DOUBLE) :: R - - CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/1/) ) - VALUE = FPTR(1) - - IF( PRESENT(ADVANCE) ) THEN - IF( ADVANCE ) THEN - CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,C_SIZEOF(R)) - HANDLE%OFFSET = HANDLE%OFFSET+C_SIZEOF(R) - - ENDIF - ENDIF - -END SUBROUTINE SHAREDMEM_ASSOCIATE0_REAL64 - -SUBROUTINE SHAREDMEM_ASSOCIATE1_INT32(HANDLE,SIZE,FPTR,ADVANCE) - USE, INTRINSIC :: ISO_C_BINDING - TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE - INTEGER(C_INT), INTENT(IN) :: SIZE - INTEGER(KIND=C_INT), POINTER, INTENT(INOUT) :: FPTR(:) - LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE - INTEGER(C_INT) :: K - - CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/SIZE/) ) - - IF( PRESENT(ADVANCE) ) THEN - IF( ADVANCE ) THEN - CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,SIZE*C_SIZEOF(K)) - HANDLE%OFFSET = HANDLE%OFFSET+SIZE*C_SIZEOF(K) - ENDIF - ENDIF - -END SUBROUTINE SHAREDMEM_ASSOCIATE1_INT32 - - -SUBROUTINE SHAREDMEM_ASSOCIATE1_REAL32(HANDLE,SIZE,FPTR,ADVANCE) - USE, INTRINSIC :: ISO_C_BINDING - TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE - INTEGER(C_INT), INTENT(IN) :: SIZE - REAL(C_FLOAT), POINTER, INTENT(INOUT) :: FPTR(:) - LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE - REAL(C_FLOAT) :: R - - CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/SIZE/) ) - - IF( PRESENT(ADVANCE) ) THEN - IF( ADVANCE ) THEN - CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,SIZE*C_SIZEOF(R)) - HANDLE%OFFSET = HANDLE%OFFSET+SIZE*C_SIZEOF(R) - ENDIF - ENDIF - -END SUBROUTINE SHAREDMEM_ASSOCIATE1_REAL32 - - -SUBROUTINE SHAREDMEM_ASSOCIATE1_REAL64(HANDLE,SIZE,FPTR,ADVANCE) - USE, INTRINSIC :: ISO_C_BINDING - TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE - INTEGER(C_INT), INTENT(IN) :: SIZE - REAL(C_DOUBLE), POINTER, INTENT(INOUT) :: FPTR(:) - LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE - REAL(C_DOUBLE) :: R - - CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/SIZE/) ) - - IF( PRESENT(ADVANCE) ) THEN - IF( ADVANCE ) THEN - CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,SIZE*C_SIZEOF(R)) - HANDLE%OFFSET = HANDLE%OFFSET+SIZE*C_SIZEOF(R) - ENDIF - ENDIF - -END SUBROUTINE SHAREDMEM_ASSOCIATE1_REAL64 - -SUBROUTINE SHAREDMEM_ASSOCIATE2_INT32(HANDLE,DIM1,DIM2,FPTR,ADVANCE) - USE, INTRINSIC :: ISO_C_BINDING - TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE - INTEGER(C_INT), INTENT(IN) :: DIM1,DIM2 - INTEGER(C_INT), POINTER, INTENT(INOUT) :: FPTR(:,:) - LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE - INTEGER(C_INT) :: K - - CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/DIM1,DIM2/) ) - - IF( PRESENT(ADVANCE) ) THEN - IF( ADVANCE ) THEN - CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,DIM1*DIM2*C_SIZEOF(K)) - HANDLE%OFFSET = HANDLE%OFFSET+DIM1*DIM2*C_SIZEOF(K) - ENDIF - ENDIF - -END SUBROUTINE SHAREDMEM_ASSOCIATE2_INT32 - - -SUBROUTINE SHAREDMEM_ASSOCIATE2_REAL32(HANDLE,DIM1,DIM2,FPTR,ADVANCE) - USE, INTRINSIC :: ISO_C_BINDING - TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE - INTEGER(C_INT), INTENT(IN) :: DIM1,DIM2 - REAL(C_FLOAT), POINTER, INTENT(INOUT) :: FPTR(:,:) - LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE - REAL(C_FLOAT) :: R - - CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/DIM1,DIM2/) ) - - IF( PRESENT(ADVANCE) ) THEN - IF( ADVANCE ) THEN - CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,DIM1*DIM2*C_SIZEOF(R)) - HANDLE%OFFSET = HANDLE%OFFSET+DIM1*DIM2*C_SIZEOF(R) - ENDIF - ENDIF - -END SUBROUTINE SHAREDMEM_ASSOCIATE2_REAL32 - - -SUBROUTINE SHAREDMEM_ASSOCIATE2_REAL64(HANDLE,DIM1,DIM2,FPTR,ADVANCE) - USE, INTRINSIC :: ISO_C_BINDING - TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE - INTEGER(C_INT), INTENT(IN) :: DIM1,DIM2 - REAL(C_DOUBLE), POINTER, INTENT(INOUT) :: FPTR(:,:) - LOGICAL, OPTIONAL, INTENT(IN) :: ADVANCE - REAL(C_DOUBLE) :: R - - CALL C_F_POINTER ( HANDLE%CPTR , FPTR , (/DIM1,DIM2/) ) - - IF( PRESENT(ADVANCE) ) THEN - IF( ADVANCE ) THEN - CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,DIM1*DIM2*C_SIZEOF(R)) - HANDLE%OFFSET = HANDLE%OFFSET+DIM1*DIM2*C_SIZEOF(R) - ENDIF - ENDIF - -END SUBROUTINE SHAREDMEM_ASSOCIATE2_REAL64 - -SUBROUTINE SHAREDMEM_ADVANCE(HANDLE,BYTES) - USE, INTRINSIC :: ISO_C_BINDING - TYPE(SHAREDMEM), INTENT(INOUT) :: HANDLE - INTEGER(C_INT), INTENT(IN) :: BYTES - INTEGER(C_SIZE_T) :: SIZE - SIZE = BYTES - CALL SHAREDMEM_ADVANCE_BYTES(HANDLE%CPTR,SIZE) - HANDLE%OFFSET = HANDLE%OFFSET+BYTES -END SUBROUTINE SHAREDMEM_ADVANCE - -!============================================================================ -END MODULE SHAREDMEM_MOD From e53ea30dd7b959b1e0f87f7b3a2bac0cb28f371c Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 28 Aug 2024 14:58:07 +0000 Subject: [PATCH 31/86] Create precision independent library for common to trans_gpu --- src/trans/gpu/CMakeLists.txt | 100 +++++++++++------ .../buffered_allocator_mod.F90 | 2 +- src/trans/gpu/{internal => algor}/ext_acc.F90 | 0 .../growing_allocator_mod.F90 | 0 src/trans/gpu/algor/hicblas_mod.F90 | 2 +- src/trans/gpu/external/sugawc.F90 | 102 ------------------ 6 files changed, 67 insertions(+), 139 deletions(-) rename src/trans/gpu/{internal => algor}/buffered_allocator_mod.F90 (99%) rename src/trans/gpu/{internal => algor}/ext_acc.F90 (100%) rename src/trans/gpu/{internal => algor}/growing_allocator_mod.F90 (100%) delete mode 100755 src/trans/gpu/external/sugawc.F90 diff --git a/src/trans/gpu/CMakeLists.txt b/src/trans/gpu/CMakeLists.txt index 94fd94a23..cdae51409 100644 --- a/src/trans/gpu/CMakeLists.txt +++ b/src/trans/gpu/CMakeLists.txt @@ -8,16 +8,74 @@ ## Assemble sources +list( APPEND trans_gpu_common_src + algor/ext_acc.F90 + algor/c_hipmemgetinfo.cpp + algor/buffered_allocator_mod.F90 + algor/device_mod.F90 + algor/growing_allocator_mod.F90 + algor/hicblas_mod.F90 +) +if( HAVE_HIP ) + set( GPU_RUNTIME "HIP" ) + ectrans_declare_hip_sources( SOURCES_GLOB + algor/*.hip.cpp + ) + list( APPEND trans_gpu_common_src + algor/hicblas_gemm.hip.cpp + algor/hicfft.hip.cpp + ) +elseif( HAVE_CUDA ) + set( GPU_RUNTIME "CUDA" ) + set( ECTRANS_GPU_HIP_LIBRARIES CUDA::cufft CUDA::cublas nvhpcwrapnvtx CUDA::cudart ) + list( APPEND trans_gpu_common_src + algor/hicblas_gemm.cuda.cu + algor/hicfft.cuda.cu + ) +else() + ecbuild_info("warn: HIP and CUDA not found") +endif() + + +set( GPU_LIBRARY_TYPE SHARED ) +if( HAVE_GPU_STATIC ) + set( GPU_LIBRARY_TYPE STATIC ) +endif() + +ecbuild_add_library( + TARGET trans_gpu_common + TYPE ${GPU_LIBRARY_TYPE} + SOURCES ${trans_gpu_common_src} + LINKER_LANGUAGE Fortran + PUBLIC_INCLUDES $ + $ + $ + $ + PUBLIC_LIBS fiat ectrans_common + PRIVATE_LIBS ${ECTRANS_GPU_HIP_LIBRARIES} + $<${HAVE_ACC}:OpenACC::OpenACC_Fortran> + $<${HAVE_CUTLASS}:nvidia::cutlass::cutlass> + PRIVATE_DEFINITIONS ${GPU_RUNTIME}GPU + $<${HAVE_CUTLASS}:USE_CUTLASS> + $<${HAVE_CUTLASS_3XTF32}:USE_CUTLASS_3XTF32> + $<${HAVE_GPU_GRAPHS_GEMM}:USE_GRAPHS_GEMM> +) + +ectrans_target_fortran_module_directory( + TARGET trans_gpu_common + MODULE_DIRECTORY ${PROJECT_BINARY_DIR}/module/ectrans + INSTALL_DIRECTORY module/ectrans +) + + + ecbuild_list_add_pattern( LIST trans_src GLOB - algor/* internal/* external/* QUIET ) -ecbuild_list_exclude_pattern( LIST trans_src REGEX dilatation_mod.F90 ) - #if( NOT ${CMAKE_BUILD_TYPE_CAPS} STREQUAL DEBUG ) set_source_files_properties( internal/ftinv_mod.F90 PROPERTIES COMPILE_OPTIONS "-O2" ) ecbuild_info("warn: special compile flags ftinv_mod.F90") @@ -25,37 +83,14 @@ ecbuild_list_exclude_pattern( LIST trans_src REGEX dilatation_mod.F90 ecbuild_info("warn: special compile flags ftdir_mod.F90") #endif() -# Filter source list according to available GPU runtime -if( HAVE_HIP ) - set( GPU_RUNTIME "HIP" ) - ectrans_declare_hip_sources( SOURCES_GLOB - sharedmem/*.hip.cpp - algor/*.hip.cpp - internal/*.hip.cpp - external/*.hip.cpp - ) - ecbuild_list_exclude_pattern( LIST trans_src REGEX \.cu$ ) - ecbuild_list_exclude_pattern( LIST trans_src REGEX cuda_device_mod.F90 ) -elseif( HAVE_CUDA ) - set( GPU_RUNTIME "CUDA" ) - set( ECTRANS_GPU_HIP_LIBRARIES CUDA::cufft CUDA::cublas nvhpcwrapnvtx CUDA::cudart ) - ecbuild_list_exclude_pattern( LIST trans_src REGEX \.hip\.cpp ) - ecbuild_list_exclude_pattern( LIST trans_src REGEX hip_device_mod.F90 ) -else() - ecbuild_info("warn: HIP and CUDA not found") -endif() - foreach( prec dp sp ) if( HAVE_${prec} ) - set( GPU_LIBRARY_TYPE SHARED ) - if( HAVE_GPU_STATIC ) - set( GPU_LIBRARY_TYPE STATIC ) - endif() - ecbuild_add_library( TARGET trans_gpu_${prec} TYPE ${GPU_LIBRARY_TYPE} SOURCES ${trans_src} + algor/seefmm_mix.F90 + algor/wts500_mod.F90 LINKER_LANGUAGE Fortran PUBLIC_INCLUDES $ $ @@ -65,19 +100,14 @@ foreach( prec dp sp ) PUBLIC_LIBS parkind_${prec} fiat ectrans_common - PRIVATE_LIBS ${ECTRANS_GPU_HIP_LIBRARIES} - ${LAPACK_LIBRARIES} # we still have symbols in some files + PRIVATE_LIBS trans_gpu_common + ${ECTRANS_GPU_HIP_LIBRARIES} $<${HAVE_ACC}:OpenACC::OpenACC_Fortran> $<${HAVE_OMP}:OpenMP::OpenMP_Fortran> $<${HAVE_MPI}:MPI::MPI_Fortran> - $<${HAVE_CUTLASS}:nvidia::cutlass::cutlass> PRIVATE_DEFINITIONS ${GPU_OFFLOAD}GPU - ${GPU_RUNTIME}GPU ECTRANS_HAVE_MPI=${ectrans_HAVE_MPI} $<${HAVE_GPU_AWARE_MPI}:USE_GPU_AWARE_MPI> - $<${HAVE_CUTLASS}:USE_CUTLASS> - $<${HAVE_CUTLASS_3XTF32}:USE_CUTLASS_3XTF32> - $<${HAVE_GPU_GRAPHS_GEMM}:USE_GRAPHS_GEMM> ) ectrans_target_fortran_module_directory( diff --git a/src/trans/gpu/internal/buffered_allocator_mod.F90 b/src/trans/gpu/algor/buffered_allocator_mod.F90 similarity index 99% rename from src/trans/gpu/internal/buffered_allocator_mod.F90 rename to src/trans/gpu/algor/buffered_allocator_mod.F90 index fa891b8d2..6f8ce3331 100644 --- a/src/trans/gpu/internal/buffered_allocator_mod.F90 +++ b/src/trans/gpu/algor/buffered_allocator_mod.F90 @@ -8,7 +8,7 @@ #define ALIGN(I, A) (((I)+(A)-1)/(A)*(A)) MODULE BUFFERED_ALLOCATOR_MOD - USE PARKIND_ECTRANS, ONLY: JPIM + USE EC_PARKIND, ONLY: JPIM USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS USE ISO_C_BINDING, ONLY: C_INT8_T, C_SIZE_T, C_LOC, C_F_POINTER USE GROWING_ALLOCATOR_MOD, ONLY: GROWING_ALLOCATION_TYPE diff --git a/src/trans/gpu/internal/ext_acc.F90 b/src/trans/gpu/algor/ext_acc.F90 similarity index 100% rename from src/trans/gpu/internal/ext_acc.F90 rename to src/trans/gpu/algor/ext_acc.F90 diff --git a/src/trans/gpu/internal/growing_allocator_mod.F90 b/src/trans/gpu/algor/growing_allocator_mod.F90 similarity index 100% rename from src/trans/gpu/internal/growing_allocator_mod.F90 rename to src/trans/gpu/algor/growing_allocator_mod.F90 diff --git a/src/trans/gpu/algor/hicblas_mod.F90 b/src/trans/gpu/algor/hicblas_mod.F90 index 8e900856f..4ff5a8fef 100644 --- a/src/trans/gpu/algor/hicblas_mod.F90 +++ b/src/trans/gpu/algor/hicblas_mod.F90 @@ -16,7 +16,7 @@ MODULE HICBLAS_MOD -USE PARKIND1, ONLY: JPIM, JPRM, JPRD +USE EC_PARKIND, ONLY: JPIM, JPRM, JPRD USE GROWING_ALLOCATOR_MOD, ONLY: GROWING_ALLOCATION_TYPE USE OPENACC_LIB, ONLY: ACC_GET_HIP_STREAM diff --git a/src/trans/gpu/external/sugawc.F90 b/src/trans/gpu/external/sugawc.F90 deleted file mode 100755 index 59e3ecb12..000000000 --- a/src/trans/gpu/external/sugawc.F90 +++ /dev/null @@ -1,102 +0,0 @@ -! (C) Copyright 2000- ECMWF. -! (C) Copyright 2000- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -SUBROUTINE SUGAWC(KDGLG,PMU,PW) - -!**** *SUGAWC* - Compute Gaussian latitudes and weights - -! Purpose. -! -------- -! Compute Gaussian latitudes and weights. - -!** Interface. -! ---------- -! CALL SUGAWC(...) - -! Explicit arguments : -! -------------------- -! INPUT: -! KDGLG - number of latitudes. - -! OUTPUT: -! PMU - sine of Gaussian latitudes. -! PW - Gaussian weights. - -! Method. -! ------- - -! Externals. SUGAW -! ---------- - -! Author. -! ------- -! K. Yessad, from SUGAWA and SULEG (trans) -! Original : May 2012 - -! Modifications. -! -------------- -! F. Vana 05-Mar-2015 Support for single precision - -! ------------------------------------------------------------------ - -USE EC_PARKIND, ONLY: JPRD, JPIM - -!ifndef INTERFACE - -USE SUGAW_MOD, ONLY: SUGAW - -!endif INTERFACE - -! ------------------------------------------------------------------ - -IMPLICIT NONE - -INTEGER(KIND=JPIM) ,INTENT(IN) :: KDGLG -REAL(KIND=JPRD) ,INTENT(OUT) :: PMU(:) -REAL(KIND=JPRD) ,INTENT(OUT) :: PW(:) - -!ifndef INTERFACE - -REAL(KIND=JPRD) :: ZANM -INTEGER(KIND=JPIM) :: ISTART,IODD,JN,JGL -REAL(KIND=JPRD) :: ZFN(0:KDGLG,0:KDGLG) -REAL(KIND=JPRD) :: ZFNN - -! ------------------------------------------------------------------ - -! * preliminary calculations to compute input quantities ZANM and ZFN -! (k.y.: coded after what I found in tfl/module/suleg_mod.F90). -ISTART=1 -! Belousov, Swarztrauber use ZFN(0,0)=SQRT(2._JPRD) -! IFS normalisation chosen to be 0.5*Integral(Pnm**2) = 1 -ZFN(0,0)=2._JPRD -DO JN=ISTART,KDGLG - ZFNN=ZFN(0,0) - DO JGL=1,JN - ZFNN=ZFNN*SQRT(1._JPRD-0.25_JPRD/REAL(JGL**2,JPRD)) - ENDDO - IODD=MOD(JN,2) - ZFN(JN,JN)=ZFNN - DO JGL=2,JN-IODD,2 - ZFN(JN,JN-JGL)=ZFN(JN,JN-JGL+2)*REAL((JGL-1)*(2*JN-JGL+2),JPRD)/REAL(JGL*(2*JN-JGL+1),JPRD) - ENDDO -ENDDO - -ZANM=SQRT(REAL(2*KDGLG+1,JPRD)*REAL(KDGLG**2,JPRD)/REAL(2*KDGLG-1,JPRD)) - -! * call to SUGAW (output: PW, PMU): -CALL SUGAW(KDGLG,0,KDGLG,PMU,PW,ZANM,ZFN) - -! ------------------------------------------------------------------ - -!endif INTERFACE - -END SUBROUTINE SUGAWC - From 172ed07b863ed0ac341c10c693296d2aff7758aa Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 28 Aug 2024 15:38:13 +0000 Subject: [PATCH 32/86] Move generate_backend_includes to parent scope --- src/trans/CMakeLists.txt | 87 ++++++++++++++++++++++++++++++ src/trans/cpu/CMakeLists.txt | 82 ---------------------------- src/trans/{cpu => }/sedrenames.txt | 0 3 files changed, 87 insertions(+), 82 deletions(-) rename src/trans/{cpu => }/sedrenames.txt (100%) diff --git a/src/trans/CMakeLists.txt b/src/trans/CMakeLists.txt index c5c266758..e238c2988 100644 --- a/src/trans/CMakeLists.txt +++ b/src/trans/CMakeLists.txt @@ -6,6 +6,93 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. +function(generate_file) + set (options) + set (oneValueArgs INPUT OUTPUT BACKEND) + set (multiValueArgs) + cmake_parse_arguments(_PAR "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + set(output ${_PAR_OUTPUT}) + set(input ${_PAR_INPUT}) + set(backend ${_PAR_BACKEND}) + set(sed_rules ${PROJECT_SOURCE_DIR}/src/trans/sedrenames.txt) + + set( JPRB_dp JPRD ) + set( jprb_dp jprd ) + set( JPRB_sp JPRM ) + set( jprb_sp jprm ) + + add_custom_command( + OUTPUT ${output} + COMMAND cat ${sed_rules} | + sed -e "s/VARIANTDESIGNATOR/${backend}/g" | + sed -e "s/TYPEDESIGNATOR_UPPER/${JPRB_${backend}}/g" | + sed -e "s/TYPEDESIGNATOR_LOWER/${jprb_${backend}}/g" | + sed -rf - ${input} > ${output} + DEPENDS ${input} ${sed_rules} + COMMENT "Generating ${output}" + VERBATIM + ) +endfunction(generate_file) + + +function(generate_backend_includes) + set (options) + set (oneValueArgs BACKEND TARGET DESTINATION INCLUDE_DIRECTORY) + set (multiValueArgs) + cmake_parse_arguments(_PAR "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + set(destination ${_PAR_DESTINATION} ) + set(backend ${_PAR_BACKEND}) + + file(MAKE_DIRECTORY ${destination}) + file(MAKE_DIRECTORY ${destination}/trans_${backend}) + + ecbuild_list_add_pattern( LIST absolute_files GLOB ectrans/*.h SOURCE_DIR ${_PAR_INCLUDE_DIRECTORY} QUIET ) + set( files ) + foreach(file_i ${absolute_files}) + file(RELATIVE_PATH file_i ${_PAR_INCLUDE_DIRECTORY} ${file_i}) + list(APPEND files ${file_i}) + endforeach() + set( outfiles ) + foreach(file_i ${files}) + get_filename_component(outfile_name ${file_i} NAME) + get_filename_component(outfile_name_we ${file_i} NAME_WE) + get_filename_component(outfile_ext ${file_i} EXT) + get_filename_component(outfile_dir ${file_i} DIRECTORY) + if (${file_i} IN_LIST ectrans_common_includes) + configure_file(${_PAR_INCLUDE_DIRECTORY}/${file_i} ${destination}/${outfile_name}) + else() + set(outfile "${destination}/${outfile_name_we}_${backend}${outfile_ext}") + ecbuild_debug("Generate ${outfile}") + generate_file(BACKEND ${backend} INPUT ${_PAR_INCLUDE_DIRECTORY}/${file_i} OUTPUT ${outfile}) + list(APPEND outfiles ${outfile}) + string(TOUPPER ${outfile_name_we} OUTFILE_NAME_WE ) + ecbuild_debug("Generate ${destination}/trans_${backend}/${outfile_name}") + file(WRITE ${destination}/trans_${backend}/${outfile_name} "! Automatically generated interface header for backward compatibility of generic symbols !\n") + file(APPEND ${destination}/trans_${backend}/${outfile_name} "#if defined(${outfile_name_we})\n") + file(APPEND ${destination}/trans_${backend}/${outfile_name} "#undef ${outfile_name_we}\n") + file(APPEND ${destination}/trans_${backend}/${outfile_name} "#endif\n") + file(APPEND ${destination}/trans_${backend}/${outfile_name} "#if defined(${OUTFILE_NAME_WE})\n") + file(APPEND ${destination}/trans_${backend}/${outfile_name} "#undef ${OUTFILE_NAME_WE}\n") + file(APPEND ${destination}/trans_${backend}/${outfile_name} "#endif\n") + file(APPEND ${destination}/trans_${backend}/${outfile_name} "#include \"${outfile_name_we}_${backend}${outfile_ext}\"\n") + file(APPEND ${destination}/trans_${backend}/${outfile_name} "#define ${outfile_name_we} ${OUTFILE_NAME_WE}_${backend}\n") + file(APPEND ${destination}/trans_${backend}/${outfile_name} "#define ${OUTFILE_NAME_WE} ${OUTFILE_NAME_WE}_${backend}\n") + endif() + endforeach(file_i) + + add_custom_target(${_PAR_TARGET}_generate DEPENDS ${outfiles}) + ecbuild_add_library(TARGET ${_PAR_TARGET} TYPE INTERFACE) + add_dependencies(${_PAR_TARGET} ${_PAR_TARGET}_generate) + target_include_directories(${_PAR_TARGET} INTERFACE $) +endfunction(generate_backend_includes) + + + + + + add_subdirectory( common ) if( HAVE_CPU) diff --git a/src/trans/cpu/CMakeLists.txt b/src/trans/cpu/CMakeLists.txt index 46b5cccce..ec1e1cb4d 100644 --- a/src/trans/cpu/CMakeLists.txt +++ b/src/trans/cpu/CMakeLists.txt @@ -29,88 +29,6 @@ if(CMAKE_Fortran_COMPILER_ID MATCHES "Intel") endif() endif() -function(generate_file) - set (options) - set (oneValueArgs INPUT OUTPUT BACKEND) - set (multiValueArgs) - cmake_parse_arguments(_PAR "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - set(output ${_PAR_OUTPUT}) - set(input ${_PAR_INPUT}) - set(backend ${_PAR_BACKEND}) - - set( JPRB_dp JPRD ) - set( jprb_dp jprd ) - set( JPRB_sp JPRM ) - set( jprb_sp jprm ) - - add_custom_command( - OUTPUT ${output} - COMMAND cat ${CMAKE_CURRENT_SOURCE_DIR}/sedrenames.txt | - sed -e "s/VARIANTDESIGNATOR/${backend}/g" | - sed -e "s/TYPEDESIGNATOR_UPPER/${JPRB_${backend}}/g" | - sed -e "s/TYPEDESIGNATOR_LOWER/${jprb_${backend}}/g" | - sed -rf - ${input} > ${output} - DEPENDS ${input} ${CMAKE_CURRENT_SOURCE_DIR}/sedrenames.txt - COMMENT "Generating ${output}" - VERBATIM - ) -endfunction(generate_file) - - -function(generate_backend_includes) - set (options) - set (oneValueArgs BACKEND TARGET DESTINATION INCLUDE_DIRECTORY) - set (multiValueArgs) - cmake_parse_arguments(_PAR "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - set(destination ${_PAR_DESTINATION} ) - set(backend ${_PAR_BACKEND}) - - file(MAKE_DIRECTORY ${destination}) - file(MAKE_DIRECTORY ${destination}/trans_${backend}) - - ecbuild_list_add_pattern( LIST absolute_files GLOB ectrans/*.h SOURCE_DIR ${_PAR_INCLUDE_DIRECTORY} QUIET ) - set( files ) - foreach(file_i ${absolute_files}) - file(RELATIVE_PATH file_i ${_PAR_INCLUDE_DIRECTORY} ${file_i}) - list(APPEND files ${file_i}) - endforeach() - set( outfiles ) - foreach(file_i ${files}) - get_filename_component(outfile_name ${file_i} NAME) - get_filename_component(outfile_name_we ${file_i} NAME_WE) - get_filename_component(outfile_ext ${file_i} EXT) - get_filename_component(outfile_dir ${file_i} DIRECTORY) - if (${file_i} IN_LIST ectrans_common_includes) - configure_file(${_PAR_INCLUDE_DIRECTORY}/${file_i} ${destination}/${outfile_name}) - else() - set(outfile "${destination}/${outfile_name_we}_${backend}${outfile_ext}") - ecbuild_debug("Generate ${outfile}") - generate_file(BACKEND ${backend} INPUT ${_PAR_INCLUDE_DIRECTORY}/${file_i} OUTPUT ${outfile}) - list(APPEND outfiles ${outfile}) - string(TOUPPER ${outfile_name_we} OUTFILE_NAME_WE ) - ecbuild_debug("Generate ${destination}/trans_${backend}/${outfile_name}") - file(WRITE ${destination}/trans_${backend}/${outfile_name} "! Automatically generated interface header for backward compatibility of generic symbols !\n") - file(APPEND ${destination}/trans_${backend}/${outfile_name} "#if defined(${outfile_name_we})\n") - file(APPEND ${destination}/trans_${backend}/${outfile_name} "#undef ${outfile_name_we}\n") - file(APPEND ${destination}/trans_${backend}/${outfile_name} "#endif\n") - file(APPEND ${destination}/trans_${backend}/${outfile_name} "#if defined(${OUTFILE_NAME_WE})\n") - file(APPEND ${destination}/trans_${backend}/${outfile_name} "#undef ${OUTFILE_NAME_WE}\n") - file(APPEND ${destination}/trans_${backend}/${outfile_name} "#endif\n") - file(APPEND ${destination}/trans_${backend}/${outfile_name} "#include \"${outfile_name_we}_${backend}${outfile_ext}\"\n") - file(APPEND ${destination}/trans_${backend}/${outfile_name} "#define ${outfile_name_we} ${OUTFILE_NAME_WE}_${backend}\n") - file(APPEND ${destination}/trans_${backend}/${outfile_name} "#define ${OUTFILE_NAME_WE} ${OUTFILE_NAME_WE}_${backend}\n") - endif() - endforeach(file_i) - - add_custom_target(${_PAR_TARGET}_generate DEPENDS ${outfiles}) - ecbuild_add_library(TARGET ${_PAR_TARGET} TYPE INTERFACE) - add_dependencies(${_PAR_TARGET} ${_PAR_TARGET}_generate) - target_include_directories(${_PAR_TARGET} INTERFACE $) -endfunction(generate_backend_includes) - - function(generate_backend_sources) set (options) diff --git a/src/trans/cpu/sedrenames.txt b/src/trans/sedrenames.txt similarity index 100% rename from src/trans/cpu/sedrenames.txt rename to src/trans/sedrenames.txt From 6fd75a4f51bf52de10978946de44b3505fd43ffe Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 28 Aug 2024 15:38:27 +0000 Subject: [PATCH 33/86] Generate backend includes for trans_gpu --- src/trans/gpu/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/trans/gpu/CMakeLists.txt b/src/trans/gpu/CMakeLists.txt index cdae51409..d0f1499f1 100644 --- a/src/trans/gpu/CMakeLists.txt +++ b/src/trans/gpu/CMakeLists.txt @@ -83,8 +83,13 @@ ecbuild_list_add_pattern( LIST trans_src ecbuild_info("warn: special compile flags ftdir_mod.F90") #endif() +set( BUILD_INTERFACE_INCLUDE_DIR ${CMAKE_BINARY_DIR}/include/ectrans ) + foreach( prec dp sp ) if( HAVE_${prec} ) + + generate_backend_includes(BACKEND gpu_${prec} TARGET ectrans_gpu_${prec}_includes DESTINATION ${BUILD_INTERFACE_INCLUDE_DIR} INCLUDE_DIRECTORY ${PROJECT_SOURCE_DIR}/src/trans/include ) + ecbuild_add_library( TARGET trans_gpu_${prec} TYPE ${GPU_LIBRARY_TYPE} From 8302de9adf66aeca8f0bf65de67217626be4e3e0 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 29 Aug 2024 13:08:43 +0000 Subject: [PATCH 34/86] Make ectrans_gpu_sp and ectrans_gpu_dp library independent of parkinds trans_gpu_sp and trans_gpu_dp are backwards compatible via typedefs --- cmake/ectrans_compile_options.cmake | 6 +- src/trans/CMakeLists.txt | 4 + src/trans/gpu/CMakeLists.txt | 119 ++++++++++++++++++--------- src/trans/gpu/internal/tpm_stats.F90 | 2 +- src/trans/sedrenames.txt | 10 +++ 5 files changed, 98 insertions(+), 43 deletions(-) diff --git a/cmake/ectrans_compile_options.cmake b/cmake/ectrans_compile_options.cmake index 37b4171ec..03cbf0972 100644 --- a/cmake/ectrans_compile_options.cmake +++ b/cmake/ectrans_compile_options.cmake @@ -44,8 +44,8 @@ if( NOT DEFINED ECTRANS_HAVE_CONTIGUOUS_ISSUE ) endif() macro( ectrans_add_compile_options ) - set( options ) - set( single_value_args FLAGS ) + set( options NOFAIL ) + set( single_value_args FLAGS) set( multi_value_args SOURCES ) cmake_parse_arguments( _PAR "${options}" "${single_value_args}" "${multi_value_args}" ${_FIRST_ARG} ${ARGN} ) if(_PAR_UNPARSED_ARGUMENTS) @@ -59,7 +59,7 @@ macro( ectrans_add_compile_options ) endif() foreach( _file ${_PAR_SOURCES} ) ecbuild_warn("Adding custom compile flags for file ${_file} : [${_PAR_FLAGS}]") - if( NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${_file} ) + if( NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${_file} AND NOT _PAR_NOFAIL) ecbuild_error("${_file} does not exist") endif() set_source_files_properties( ${_file} PROPERTIES COMPILE_FLAGS "${_PAR_FLAGS}" ) diff --git a/src/trans/CMakeLists.txt b/src/trans/CMakeLists.txt index e238c2988..4430652e9 100644 --- a/src/trans/CMakeLists.txt +++ b/src/trans/CMakeLists.txt @@ -21,6 +21,10 @@ function(generate_file) set( jprb_dp jprd ) set( JPRB_sp JPRM ) set( jprb_sp jprm ) + set( JPRB_gpu_dp JPRD ) + set( jprb_gpu_dp jprd ) + set( JPRB_gpu_sp JPRM ) + set( jprb_gpu_sp jprm ) add_custom_command( OUTPUT ${output} diff --git a/src/trans/gpu/CMakeLists.txt b/src/trans/gpu/CMakeLists.txt index d0f1499f1..1ce856ab1 100644 --- a/src/trans/gpu/CMakeLists.txt +++ b/src/trans/gpu/CMakeLists.txt @@ -15,6 +15,7 @@ list( APPEND trans_gpu_common_src algor/device_mod.F90 algor/growing_allocator_mod.F90 algor/hicblas_mod.F90 + internal/tpm_stats.F90 ) if( HAVE_HIP ) set( GPU_RUNTIME "HIP" ) @@ -43,7 +44,7 @@ if( HAVE_GPU_STATIC ) endif() ecbuild_add_library( - TARGET trans_gpu_common + TARGET ectrans_gpu_common TYPE ${GPU_LIBRARY_TYPE} SOURCES ${trans_gpu_common_src} LINKER_LANGUAGE Fortran @@ -62,51 +63,84 @@ ecbuild_add_library( ) ectrans_target_fortran_module_directory( - TARGET trans_gpu_common + TARGET ectrans_gpu_common MODULE_DIRECTORY ${PROJECT_BINARY_DIR}/module/ectrans INSTALL_DIRECTORY module/ectrans ) -ecbuild_list_add_pattern( LIST trans_src - GLOB - internal/* - external/* - QUIET - ) +function(generate_backend_sources) + set (options) + set (oneValueArgs BACKEND DESTINATION OUTPUT) + set (multiValueArgs) + + cmake_parse_arguments(_PAR "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + set(backend ${_PAR_BACKEND}) + set(destination ${_PAR_DESTINATION}) + file(MAKE_DIRECTORY ${destination}/algor) + file(MAKE_DIRECTORY ${destination}/internal) + file(MAKE_DIRECTORY ${destination}/external) + + ecbuild_list_add_pattern( LIST files + GLOB + internal/* + external/* + QUIET + ) + list( APPEND files + algor/seefmm_mix.F90 + algor/wts500_mod.F90 + ) + ecbuild_list_exclude_pattern( LIST files REGEX + parkind_ectrans.F90 + tpm_stats.F90 + ) + + set(outfiles) + foreach(file_i ${files}) + get_filename_component(outfile_name ${file_i} NAME) + get_filename_component(outfile_name_we ${file_i} NAME_WE) + get_filename_component(outfile_ext ${file_i} EXT) + get_filename_component(outfile_dir ${file_i} DIRECTORY) + set(outfile "${destination}/${file_i}") + ecbuild_debug("Generate ${outfile}") + generate_file(BACKEND ${backend} INPUT ${CMAKE_CURRENT_SOURCE_DIR}/${file_i} OUTPUT ${outfile}) + list(APPEND outfiles ${outfile}) + endforeach(file_i) + set(${_PAR_OUTPUT} ${outfiles} PARENT_SCOPE) +endfunction(generate_backend_sources) -#if( NOT ${CMAKE_BUILD_TYPE_CAPS} STREQUAL DEBUG ) - set_source_files_properties( internal/ftinv_mod.F90 PROPERTIES COMPILE_OPTIONS "-O2" ) - ecbuild_info("warn: special compile flags ftinv_mod.F90") - set_source_files_properties( internal/ftdir_mod.F90 PROPERTIES COMPILE_OPTIONS "-O2" ) - ecbuild_info("warn: special compile flags ftdir_mod.F90") -#endif() set( BUILD_INTERFACE_INCLUDE_DIR ${CMAKE_BINARY_DIR}/include/ectrans ) foreach( prec dp sp ) if( HAVE_${prec} ) + set(GENERATED_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated/ectrans_gpu_${prec}) + generate_backend_includes(BACKEND gpu_${prec} TARGET ectrans_gpu_${prec}_includes DESTINATION ${BUILD_INTERFACE_INCLUDE_DIR} INCLUDE_DIRECTORY ${PROJECT_SOURCE_DIR}/src/trans/include ) + generate_backend_sources( BACKEND gpu_${prec} OUTPUT ectrans_gpu_${prec}_src DESTINATION ${GENERATED_SOURCE_DIR}) + + #if( NOT ${CMAKE_BUILD_TYPE_CAPS} STREQUAL DEBUG ) + set_source_files_properties( ${GENERATED_SOURCE_DIR}/internal/ftinv_mod.F90 PROPERTIES COMPILE_OPTIONS "-O2" ) + ecbuild_info("warn: special compile flags ftinv_mod.F90") + set_source_files_properties( ${GENERATED_SOURCE_DIR}/internal/ftdir_mod.F90 PROPERTIES COMPILE_OPTIONS "-O2" ) + ecbuild_info("warn: special compile flags ftdir_mod.F90") + #endif() + ecbuild_add_library( - TARGET trans_gpu_${prec} + TARGET ectrans_gpu_${prec} TYPE ${GPU_LIBRARY_TYPE} - SOURCES ${trans_src} - algor/seefmm_mix.F90 - algor/wts500_mod.F90 + SOURCES ${ectrans_gpu_${prec}_src} LINKER_LANGUAGE Fortran PUBLIC_INCLUDES $ $ - $ $ $ - PUBLIC_LIBS parkind_${prec} - fiat - ectrans_common - PRIVATE_LIBS trans_gpu_common - ${ECTRANS_GPU_HIP_LIBRARIES} + PUBLIC_LIBS ectrans_common ectrans_gpu_common ectrans_gpu_${prec}_includes + PRIVATE_LIBS ${ECTRANS_GPU_HIP_LIBRARIES} $<${HAVE_ACC}:OpenACC::OpenACC_Fortran> $<${HAVE_OMP}:OpenMP::OpenMP_Fortran> $<${HAVE_MPI}:MPI::MPI_Fortran> @@ -116,18 +150,18 @@ foreach( prec dp sp ) ) ectrans_target_fortran_module_directory( - TARGET trans_gpu_${prec} - MODULE_DIRECTORY ${PROJECT_BINARY_DIR}/module/trans_gpu_${prec} - INSTALL_DIRECTORY module/trans_gpu_${prec} + TARGET ectrans_gpu_${prec} + MODULE_DIRECTORY ${PROJECT_BINARY_DIR}/module/ectrans_gpu_${prec} + INSTALL_DIRECTORY module/ectrans_gpu_${prec} ) if( prec STREQUAL sp ) - target_compile_definitions( trans_gpu_${prec} PRIVATE TRANS_SINGLE PARKINDTRANS_SINGLE ) + target_compile_definitions( ectrans_gpu_${prec} PRIVATE TRANS_SINGLE PARKINDTRANS_SINGLE ) endif() if( HAVE_OMP AND CMAKE_Fortran_COMPILER_ID MATCHES Cray ) # Propagate flags as link options for downstream targets. Only required for Cray - target_link_options( trans_gpu_${prec} INTERFACE + target_link_options( ectrans_gpu_${prec} INTERFACE $<$:SHELL:${OpenMP_Fortran_FLAGS}> $<$:SHELL:${OpenMP_Fortran_FLAGS}> $<$:SHELL:${OpenMP_Fortran_FLAGS}> ) @@ -135,17 +169,24 @@ foreach( prec dp sp ) if( HAVE_ACC AND CMAKE_Fortran_COMPILER_ID MATCHES NVHPC ) # Propagate flags as link options for downstream targets. Only required for NVHPC - target_link_options( trans_gpu_${prec} INTERFACE - $<$:SHELL:${OpenACC_Fortran_FLAGS} -gpu=pinned> - $<$:SHELL:${OpenACC_Fortran_FLAGS} -gpu=pinned> - $<$:SHELL:${OpenACC_Fortran_FLAGS} -gpu=pinned> ) + target_link_options( ectrans_gpu_${prec} INTERFACE + $<$:SHELL:${OpenACC_Fortran_FLAGS}> + $<$:SHELL:${OpenACC_Fortran_FLAGS}> + $<$:SHELL:${OpenACC_Fortran_FLAGS}> ) endif() - ## Install trans_gpu_${prec} interface - file( GLOB trans_interface ${PROJECT_SOURCE_DIR}/src/trans/include/ectrans/* ) - install( - FILES ${trans_interface} - DESTINATION include/ectrans/trans_gpu_${prec} - ) + # This interface library is for backward compatibility, and provides the older includes + ecbuild_add_library( TARGET trans_gpu_${prec} TYPE INTERFACE ) + target_include_directories( trans_gpu_${prec} INTERFACE $ ) + target_include_directories( trans_gpu_${prec} INTERFACE $ ) + target_link_libraries( trans_gpu_${prec} INTERFACE fiat ectrans_gpu_${prec} parkind_${prec}) + + + # ## Install trans_gpu_${prec} interface + # file( GLOB trans_interface ${PROJECT_SOURCE_DIR}/src/trans/include/ectrans/* ) + # install( + # FILES ${trans_interface} + # DESTINATION include/ectrans/trans_gpu_${prec} + # ) endif() endforeach() diff --git a/src/trans/gpu/internal/tpm_stats.F90 b/src/trans/gpu/internal/tpm_stats.F90 index 492f8e505..1b11c6e8c 100644 --- a/src/trans/gpu/internal/tpm_stats.F90 +++ b/src/trans/gpu/internal/tpm_stats.F90 @@ -31,7 +31,7 @@ SUBROUTINE GSTATS_LABEL_NVTX(KNUM,CTYPE,CDESC) END SUBROUTINE SUBROUTINE GSTATS_NVTX(KNUM,KSWITCH) - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE EC_PARKIND, ONLY: JPIM #if defined(__NVCOMPILER) USE NVTX, ONLY: NVTXSTARTRANGE, NVTXENDRANGE #endif diff --git a/src/trans/sedrenames.txt b/src/trans/sedrenames.txt index e46b72e68..157c26d6c 100644 --- a/src/trans/sedrenames.txt +++ b/src/trans/sedrenames.txt @@ -47,6 +47,7 @@ s/GATH_GRID( *($|\(| |\*))/GATH_GRID_VARIANTDESIGNATOR\1/g s/GATH_SPEC_CONTROL_MOD/GATH_SPEC_CONTROL_MOD_VARIANTDESIGNATOR/g s/gath_spec( *($|\(| |\*))/gath_spec_VARIANTDESIGNATOR\1/g s/GATH_SPEC( *($|\(| |\*))/GATH_SPEC_VARIANTDESIGNATOR\1/g +s/GPNORM_TRANS_GPU( *($|\(| |\*))/GPNORM_TRANS_GPU_VARIANTDESIGNATOR\1/g s/GPNORM_TRANS_CTL_MOD/GPNORM_TRANS_CTL_MOD_VARIANTDESIGNATOR/g s/gpnorm_trans( *($|\(| |\*))/gpnorm_trans_VARIANTDESIGNATOR\1/g s/GPNORM_TRANS( *($|\(| |\*))/GPNORM_TRANS_VARIANTDESIGNATOR\1/g @@ -60,6 +61,8 @@ s/inv_trans( *($|\(| |\*))/inv_trans_VARIANTDESIGNATOR\1/g s/INV_TRANS( *($|\(| |\*))/INV_TRANS_VARIANTDESIGNATOR\1/g s/inv_transad( *($|\(| |\*))/inv_transad_VARIANTDESIGNATOR\1/g s/INV_TRANSAD/INV_TRANSAD_VARIANTDESIGNATOR/g +s/jprbt/TYPEDESIGNATOR_LOWER/g +s/JPRBT/TYPEDESIGNATOR_UPPER/g s/jprb/TYPEDESIGNATOR_LOWER/g s/JPRB/TYPEDESIGNATOR_UPPER/g s/JPRH/JPRD/g @@ -80,6 +83,8 @@ s/LTINVAD_MOD/LTINVAD_MOD_VARIANTDESIGNATOR/g s/parkind1/ec_parkind/g s/PARKIND1/EC_PARKIND/g s/PARKIND2/EC_PARKIND/g +s/parkind_ectrans/ec_parkind/g +s/PARKIND_ECTRANS/ec_parkind/g s/PRE_SULEG_MOD/PRE_SULEG_MOD_VARIANTDESIGNATOR/g s/PREPSNM_MOD/PREPSNM_MOD_VARIANTDESIGNATOR/g s/PRFI1_MOD/PRFI1_MOD_VARIANTDESIGNATOR/g @@ -107,9 +112,12 @@ s/SPNORMD_MOD/SPNORMD_MOD_VARIANTDESIGNATOR/g s/SPNSDE_MOD/SPNSDE_MOD_VARIANTDESIGNATOR/g s/SPNSDEAD_MOD/SPNSDEAD_MOD_VARIANTDESIGNATOR/g s/SUFFT_MOD/SUFFT_MOD_VARIANTDESIGNATOR/g +s/SUMP_TRANS_MOD/SUMP_TRANS_MOD_VARIANTDESIGNATOR/g s/SULEG_MOD/SULEG_MOD_VARIANTDESIGNATOR/g s/SUTRLE_MOD/SUTRLE_MOD_VARIANTDESIGNATOR/g +s/TPM_HICFFT/TPM_HICFFT_VARIANTDESIGNATOR/g s/TPM_FFTW/TPM_FFTW_VARIANTDESIGNATOR/g +s/TPM_FFT/TPM_FFT_VARIANTDESIGNATOR/g s/TPM_FIELDS/TPM_FIELDS_VARIANTDESIGNATOR/g s/TPM_FLT/TPM_FLT_VARIANTDESIGNATOR/g s/TPM_TRANS/TPM_TRANS_VARIANTDESIGNATOR/g @@ -124,6 +132,8 @@ s/TRGTOL_MOD/TRGTOL_MOD_VARIANTDESIGNATOR/g s/TRLTOG_MOD/TRLTOG_MOD_VARIANTDESIGNATOR/g s/TRLTOM_MOD/TRLTOM_MOD_VARIANTDESIGNATOR/g s/TRMTOL_MOD/TRMTOL_MOD_VARIANTDESIGNATOR/g +s/TRMTOL_PACK_UNPACK/TRMTOL_PACK_UNPACK_VARIANTDESIGNATOR/g +s/TRLTOM_PACK_UNPACK/TRLTOM_PACK_UNPACK_VARIANTDESIGNATOR/g s/UPDSP_MOD/UPDSP_MOD_VARIANTDESIGNATOR/g s/UPDSPAD_MOD/UPDSPAD_MOD_VARIANTDESIGNATOR/g s/UPDSPB_MOD/UPDSPB_MOD_VARIANTDESIGNATOR/g From d6ea6dda54f6deb8e0bf2ef0a7eeca747118b7ec Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 29 Aug 2024 15:15:05 +0000 Subject: [PATCH 35/86] one module directory --- src/trans/cpu/CMakeLists.txt | 4 ++-- src/trans/gpu/CMakeLists.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/trans/cpu/CMakeLists.txt b/src/trans/cpu/CMakeLists.txt index ec1e1cb4d..05787dcbb 100644 --- a/src/trans/cpu/CMakeLists.txt +++ b/src/trans/cpu/CMakeLists.txt @@ -83,8 +83,8 @@ foreach( prec dp sp ) ectrans_target_fortran_module_directory( TARGET ectrans_${prec} - MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/ectrans_${prec} - INSTALL_DIRECTORY module/ectrans_${prec} + MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/ectrans + INSTALL_DIRECTORY module/ectrans ) set( FFTW_LINK PRIVATE ) diff --git a/src/trans/gpu/CMakeLists.txt b/src/trans/gpu/CMakeLists.txt index 1ce856ab1..2ec020d3f 100644 --- a/src/trans/gpu/CMakeLists.txt +++ b/src/trans/gpu/CMakeLists.txt @@ -151,8 +151,8 @@ foreach( prec dp sp ) ectrans_target_fortran_module_directory( TARGET ectrans_gpu_${prec} - MODULE_DIRECTORY ${PROJECT_BINARY_DIR}/module/ectrans_gpu_${prec} - INSTALL_DIRECTORY module/ectrans_gpu_${prec} + MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/ectrans + INSTALL_DIRECTORY module/ectrans ) if( prec STREQUAL sp ) From a94e2717d2eadebd0e4b07a785d312903c15fecb Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Fri, 30 Aug 2024 15:19:34 +0000 Subject: [PATCH 36/86] Make external/ini_spec_dist common --- src/trans/common/CMakeLists.txt | 4 +- .../external/ini_spec_dist.F90 | 2 +- src/trans/cpu/external/ini_spec_dist.F90 | 96 ------------------- src/trans/include/ectrans/ini_spec_dist.h | 2 +- src/trans/sedrenames.txt | 2 - 5 files changed, 5 insertions(+), 101 deletions(-) rename src/trans/{gpu => common}/external/ini_spec_dist.F90 (99%) delete mode 100644 src/trans/cpu/external/ini_spec_dist.F90 diff --git a/src/trans/common/CMakeLists.txt b/src/trans/common/CMakeLists.txt index 4ccf6d898..ac7c21139 100644 --- a/src/trans/common/CMakeLists.txt +++ b/src/trans/common/CMakeLists.txt @@ -43,11 +43,13 @@ list( APPEND ectrans_common_src internal/sump_trans_preleg_mod.F90 external/get_current.F90 external/setup_trans0.F90 + external/ini_spec_dist.F90 ${CMAKE_CURRENT_BINARY_DIR}/internal/ectrans_version_mod.F90 ) list( APPEND ectrans_common_includes ectrans/get_current.h ectrans/setup_trans0.h + ectrans/ini_spec_dist.h ) ecbuild_add_library( @@ -72,4 +74,4 @@ if( HAVE_OMP ) target_link_libraries( ectrans_common PRIVATE OpenMP::OpenMP_Fortran ) endif() -set( ectrans_common_includes ${ectrans_common_includes} PARENT_SCOPE ) \ No newline at end of file +set( ectrans_common_includes ${ectrans_common_includes} PARENT_SCOPE ) diff --git a/src/trans/gpu/external/ini_spec_dist.F90 b/src/trans/common/external/ini_spec_dist.F90 similarity index 99% rename from src/trans/gpu/external/ini_spec_dist.F90 rename to src/trans/common/external/ini_spec_dist.F90 index cde0805f0..d19f8098b 100755 --- a/src/trans/gpu/external/ini_spec_dist.F90 +++ b/src/trans/common/external/ini_spec_dist.F90 @@ -54,7 +54,7 @@ SUBROUTINE INI_SPEC_DIST(KSMAX,KTMAX,KPRTRW,KMYSETW,KASM0,KSPOLEGL,KPROCM,& ! ------------------------------------------------------------------ -USE PARKIND1, ONLY: JPIM, JPRB +USE EC_PARKIND, ONLY: JPIM !ifndef INTERFACE diff --git a/src/trans/cpu/external/ini_spec_dist.F90 b/src/trans/cpu/external/ini_spec_dist.F90 deleted file mode 100644 index c3c820a04..000000000 --- a/src/trans/cpu/external/ini_spec_dist.F90 +++ /dev/null @@ -1,96 +0,0 @@ -! (C) Copyright 2000- ECMWF. -! (C) Copyright 2000- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -SUBROUTINE INI_SPEC_DIST(KSMAX,KTMAX,KPRTRW,KMYSETW,KASM0,KSPOLEGL,KPROCM,& - &KUMPP,KSPEC,KSPEC2,KSPEC2MX,KPOSSP,KMYMS,KPTRMS,KALLMS) - - -!**** *INI_SPEC_DIST* - Initialize spectral wave distribution - -! Purpose. -! -------- -! Initialize arrays controlling spectral wave distribution - -!** Interface. -! ---------- -! CALL INI_SPEC_DIST(...) - -! Explicit arguments : -! -------------------- -! KSMAX - spectral truncation required -! KTMAX - Overtruncation for KSMAX (input) -! KPRTRW - Number of processors in A-direction (input) -! KMYSETW - A-set for present processor (input) -! KASM0 - Offsets for spectral waves (output) -! KSPOLEGL - Local version of NSPOLEG (output) -! KPROCM - Where a certain spectral wave belongs (output) -! KUMPP - Number of spectral waves on this PE (output) -! KSPEC - Local version on NSPEC (output) -! KSPEC2 - Local version on NSPEC2 (output) -! KSPEC2MX - Maximum KSPEC2 across PEs (output) -! KPOSSP - Global spectral fields partitioning (output) -! KMYMS - This PEs spectral zonal wavenumbers (output) -! KPTRMS - Pointer to the first wave number of a given a-set (output) -! KALLMS - Wave numbers for all wave-set concatenated together -! to give all wave numbers in wave-set order (output) - -! Implicit arguments : NONE -! -------------------- - -! Method. -! ------- -! See documentation - -! Externals. SUWAVEDI -! ---------- -! Original : 00-03-03 - -! ------------------------------------------------------------------ - -USE PARKIND1 ,ONLY : JPIM ,JPRB -!ifndef INTERFACE -USE SUWAVEDI_MOD ,ONLY : SUWAVEDI -USE YOMHOOK ,ONLY : LHOOK, DR_HOOK, JPHOOK -!endif INTERFACE - -IMPLICIT NONE - -INTEGER(KIND=JPIM),INTENT(IN) :: KSMAX -INTEGER(KIND=JPIM),INTENT(IN) :: KTMAX -INTEGER(KIND=JPIM),INTENT(IN) :: KPRTRW -INTEGER(KIND=JPIM),INTENT(IN) :: KMYSETW -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPEC -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPEC2 -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPEC2MX -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KSPOLEGL - -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KASM0(0:KSMAX) -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KPROCM(0:KSMAX) -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KUMPP(KPRTRW) -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KPOSSP(KPRTRW+1) -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KMYMS(KSMAX+1) -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KPTRMS(KPRTRW) -INTEGER(KIND=JPIM),OPTIONAL,INTENT(OUT) :: KALLMS(KSMAX+1) - -!ifndef INTERFACE -REAL(KIND=JPHOOK) :: ZHOOK_HANDLE - -! ------------------------------------------------------------------ - -IF (LHOOK) CALL DR_HOOK('INI_SPEC_DIST',0,ZHOOK_HANDLE) - -CALL SUWAVEDI(KSMAX,KTMAX,KPRTRW,KMYSETW,KASM0,KSPOLEGL,KPROCM,& - &KUMPP,KSPEC,KSPEC2,KSPEC2MX,KPOSSP,KMYMS,KPTRMS,KALLMS) - -IF (LHOOK) CALL DR_HOOK('INI_SPEC_DIST',1,ZHOOK_HANDLE) - -!endif INTERFACE - -END SUBROUTINE INI_SPEC_DIST diff --git a/src/trans/include/ectrans/ini_spec_dist.h b/src/trans/include/ectrans/ini_spec_dist.h index d2e832ca3..7f17cf6a2 100644 --- a/src/trans/include/ectrans/ini_spec_dist.h +++ b/src/trans/include/ectrans/ini_spec_dist.h @@ -55,7 +55,7 @@ SUBROUTINE INI_SPEC_DIST(KSMAX,KTMAX,KPRTRW,KMYSETW,KASM0,KSPOLEGL,KPROCM,& ! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM ,JPRB +USE EC_PARKIND, ONLY: JPIM IMPLICIT NONE INTEGER(KIND=JPIM),INTENT(IN) :: KSMAX diff --git a/src/trans/sedrenames.txt b/src/trans/sedrenames.txt index 157c26d6c..d824c0b94 100644 --- a/src/trans/sedrenames.txt +++ b/src/trans/sedrenames.txt @@ -51,8 +51,6 @@ s/GPNORM_TRANS_GPU( *($|\(| |\*))/GPNORM_TRANS_GPU_VARIANTDESIGNATOR\1/g s/GPNORM_TRANS_CTL_MOD/GPNORM_TRANS_CTL_MOD_VARIANTDESIGNATOR/g s/gpnorm_trans( *($|\(| |\*))/gpnorm_trans_VARIANTDESIGNATOR\1/g s/GPNORM_TRANS( *($|\(| |\*))/GPNORM_TRANS_VARIANTDESIGNATOR\1/g -s/ini_spec_dist( *($|\(| |\*))/ini_spec_dist_VARIANTDESIGNATOR\1/g -s/INI_SPEC_DIST/INI_SPEC_DIST_VARIANTDESIGNATOR/g s/INIGPTR_MOD/INIGPTR_MOD_VARIANTDESIGNATOR/g s/INTERPOL_DECOMP_MOD/INTERPOL_DECOMP_MOD_VARIANTDESIGNATOR/g s/INV_TRANS_CTL_MOD/INV_TRANS_CTL_MOD_VARIANTDESIGNATOR/g From 6ca3d22769ac87a1fdc2e43ceed501654db0808f Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Mon, 2 Sep 2024 10:52:24 +0000 Subject: [PATCH 37/86] Bring back diagnostic prints when NPRINTLEV>0 --- src/trans/cpu/external/setup_trans.F90 | 13 ++++++++++-- src/trans/gpu/external/setup_trans.F90 | 28 +++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/src/trans/cpu/external/setup_trans.F90 b/src/trans/cpu/external/setup_trans.F90 index 887eb20d5..bd394ddd1 100644 --- a/src/trans/cpu/external/setup_trans.F90 +++ b/src/trans/cpu/external/setup_trans.F90 @@ -98,7 +98,8 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& ! R. El Khatib 07-Mar-2016 Better flexibility for Legendre polynomials computation in stretched mode ! ------------------------------------------------------------------ -USE EC_PARKIND ,ONLY : JPIM ,JPRD +USE PARKIND1, ONLY: JPIM, JPRD, JPRB + ! only use of JPRB is for diagnostic print of backend precision USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_PTR, C_INT,C_ASSOCIATED,C_SIZE_T !ifndef INTERFACE @@ -169,7 +170,15 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& ENDIF LLP1 = NPRINTLEV>0 LLP2 = NPRINTLEV>1 -IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SETUP_TRANS (CPU) ===' +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SETUP_TRANS ===' +IF(LLP1) THEN + IF (JPRB == JPRD) THEN + WRITE(NOUT,'(A)') "CPU double precision version" + ELSE + WRITE(NOUT,'(A)') "CPU single precision version" + ENDIF + WRITE(NOUT,'(A)') +ENDIF ! Allocate resolution dependent structures IF(.NOT. ALLOCATED(DIM_RESOL)) THEN diff --git a/src/trans/gpu/external/setup_trans.F90 b/src/trans/gpu/external/setup_trans.F90 index bbaa3ed44..bdb59be24 100755 --- a/src/trans/gpu/external/setup_trans.F90 +++ b/src/trans/gpu/external/setup_trans.F90 @@ -196,7 +196,33 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& ENDIF LLP1 = NPRINTLEV>0 LLP2 = NPRINTLEV>1 -IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SETUP_TRANS (GPU) ===' +IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SETUP_TRANS ===' +IF(LLP1) THEN + IF (JPRBT == JPRD) THEN + WRITE(NOUT,'(A)') "GPU double precision version, with following compile-time options : " + ELSE + WRITE(NOUT,'(A)') "GPU single precision version, with following compile-time options : " + ENDIF +#ifdef ACCGPU + WRITE(NOUT,'(A)') " - OpenACC-based offload" +#else + WRITE(NOUT,'(A)') " - OpenMP-based offload" +#endif +#ifdef USE_GPU_AWARE_MPI + WRITE(NOUT,'(A)') " - GPU-aware MPI" +#endif +#ifdef USE_GRAPHS_GEMM + WRITE(NOUT,'(A)') " - graph-based GEMM scheduling" +#endif +#ifdef USE_CUTLASS + WRITE(NOUT,'(A)') " - Cutlass-based GEMM operations" +#endif +#ifdef USE_3XTF32 + WRITE(NOUT,'(A)') " - tensor-core usage for 32b Cutlass operations" +#endif + WRITE(NOUT,'(A)') +ENDIF + ! Allocate resolution dependent structures IF(.NOT. ALLOCATED(DIM_RESOL)) THEN From 231bb02c337fcd3fb76f19652d447166debd002f Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Mon, 2 Sep 2024 15:07:32 +0000 Subject: [PATCH 38/86] Avoid future surprises with GPU defines for common and specific gpu libraries --- src/trans/gpu/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/trans/gpu/CMakeLists.txt b/src/trans/gpu/CMakeLists.txt index 2ec020d3f..a07ef97ec 100644 --- a/src/trans/gpu/CMakeLists.txt +++ b/src/trans/gpu/CMakeLists.txt @@ -56,7 +56,7 @@ ecbuild_add_library( PRIVATE_LIBS ${ECTRANS_GPU_HIP_LIBRARIES} $<${HAVE_ACC}:OpenACC::OpenACC_Fortran> $<${HAVE_CUTLASS}:nvidia::cutlass::cutlass> - PRIVATE_DEFINITIONS ${GPU_RUNTIME}GPU + PRIVATE_DEFINITIONS ${GPU_RUNTIME}GPU ${GPU_OFFLOAD}GPU $<${HAVE_CUTLASS}:USE_CUTLASS> $<${HAVE_CUTLASS_3XTF32}:USE_CUTLASS_3XTF32> $<${HAVE_GPU_GRAPHS_GEMM}:USE_GRAPHS_GEMM> @@ -144,7 +144,7 @@ foreach( prec dp sp ) $<${HAVE_ACC}:OpenACC::OpenACC_Fortran> $<${HAVE_OMP}:OpenMP::OpenMP_Fortran> $<${HAVE_MPI}:MPI::MPI_Fortran> - PRIVATE_DEFINITIONS ${GPU_OFFLOAD}GPU + PRIVATE_DEFINITIONS ${GPU_RUNTIME}GPU ${GPU_OFFLOAD}GPU ECTRANS_HAVE_MPI=${ectrans_HAVE_MPI} $<${HAVE_GPU_AWARE_MPI}:USE_GPU_AWARE_MPI> ) From 6557cbbfb11ece1e98cb963aaf607dff425ac448 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Mon, 2 Sep 2024 15:08:15 +0000 Subject: [PATCH 39/86] Move tpm_hicfft.F90 to gpu_common --- src/trans/gpu/CMakeLists.txt | 2 ++ src/trans/gpu/internal/tpm_hicfft.F90 | 9 ++++----- src/trans/sedrenames.txt | 1 - 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/trans/gpu/CMakeLists.txt b/src/trans/gpu/CMakeLists.txt index a07ef97ec..37f21db0d 100644 --- a/src/trans/gpu/CMakeLists.txt +++ b/src/trans/gpu/CMakeLists.txt @@ -16,6 +16,7 @@ list( APPEND trans_gpu_common_src algor/growing_allocator_mod.F90 algor/hicblas_mod.F90 internal/tpm_stats.F90 + internal/tpm_hicfft.F90 ) if( HAVE_HIP ) set( GPU_RUNTIME "HIP" ) @@ -95,6 +96,7 @@ function(generate_backend_sources) ecbuild_list_exclude_pattern( LIST files REGEX parkind_ectrans.F90 tpm_stats.F90 + tpm_hicfft.F90 ) set(outfiles) diff --git a/src/trans/gpu/internal/tpm_hicfft.F90 b/src/trans/gpu/internal/tpm_hicfft.F90 index 696d2b0b6..9ae63df00 100755 --- a/src/trans/gpu/internal/tpm_hicfft.F90 +++ b/src/trans/gpu/internal/tpm_hicfft.F90 @@ -20,7 +20,6 @@ MODULE TPM_HICFFT ! HICFFT abstraction for CUDA and HIP August 2023 B. Reuter USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT, C_PTR, C_LOC, C_FLOAT, C_DOUBLE - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT USE GROWING_ALLOCATOR_MOD, ONLY: GROWING_ALLOCATION_TYPE IMPLICIT NONE @@ -46,7 +45,7 @@ MODULE TPM_HICFFT SUBROUTINE EXECUTE_DIR_FFT_FLOAT(PREEL_REAL,PREEL_COMPLEX,KFIELD,LOENS,OFFSETS,ALLOC) - USE PARKIND_ECTRANS ,ONLY : JPIM + USE EC_PARKIND ,ONLY : JPIM IMPLICIT NONE @@ -78,7 +77,7 @@ SUBROUTINE EXECUTE_DIR_FFT_FLOAT_C(PREEL_REAL,PREEL_COMPLEX,KFIELD,LOENS,OFFSETS END SUBROUTINE EXECUTE_DIR_FFT_FLOAT SUBROUTINE EXECUTE_DIR_FFT_DOUBLE(PREEL_REAL,PREEL_COMPLEX,KFIELD,LOENS,OFFSETS,ALLOC) - USE PARKIND_ECTRANS ,ONLY : JPIM + USE EC_PARKIND ,ONLY : JPIM IMPLICIT NONE @@ -111,7 +110,7 @@ SUBROUTINE EXECUTE_DIR_FFT_DOUBLE_C(PREEL_REAL,PREEL_COMPLEX,KFIELD,LOENS,OFFSET END SUBROUTINE EXECUTE_DIR_FFT_DOUBLE SUBROUTINE EXECUTE_INV_FFT_FLOAT(PREEL_COMPLEX,PREEL_REAL,KFIELD,LOENS,OFFSETS,ALLOC) - USE PARKIND_ECTRANS ,ONLY : JPIM + USE EC_PARKIND ,ONLY : JPIM IMPLICIT NONE @@ -143,7 +142,7 @@ SUBROUTINE EXECUTE_INV_FFT_FLOAT_C(PREEL_COMPLEX,PREEL_REAL,KFIELD,LOENS,OFFSETS END SUBROUTINE SUBROUTINE EXECUTE_INV_FFT_DOUBLE(PREEL_COMPLEX,PREEL_REAL,KFIELD,LOENS,OFFSETS,ALLOC) - USE PARKIND_ECTRANS ,ONLY : JPIM + USE EC_PARKIND ,ONLY : JPIM IMPLICIT NONE diff --git a/src/trans/sedrenames.txt b/src/trans/sedrenames.txt index d824c0b94..e6af0c801 100644 --- a/src/trans/sedrenames.txt +++ b/src/trans/sedrenames.txt @@ -113,7 +113,6 @@ s/SUFFT_MOD/SUFFT_MOD_VARIANTDESIGNATOR/g s/SUMP_TRANS_MOD/SUMP_TRANS_MOD_VARIANTDESIGNATOR/g s/SULEG_MOD/SULEG_MOD_VARIANTDESIGNATOR/g s/SUTRLE_MOD/SUTRLE_MOD_VARIANTDESIGNATOR/g -s/TPM_HICFFT/TPM_HICFFT_VARIANTDESIGNATOR/g s/TPM_FFTW/TPM_FFTW_VARIANTDESIGNATOR/g s/TPM_FFT/TPM_FFT_VARIANTDESIGNATOR/g s/TPM_FIELDS/TPM_FIELDS_VARIANTDESIGNATOR/g From e09127c1606c446c62f79a540fb08d020b2adaba Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 11 Sep 2024 16:14:56 +0000 Subject: [PATCH 40/86] Move shuffle_mod.F90 to common --- src/trans/common/CMakeLists.txt | 1 + .../{gpu => common}/internal/shuffle_mod.F90 | 4 +- src/trans/cpu/internal/shuffle_mod.F90 | 135 ------------------ src/trans/sedrenames.txt | 1 - 4 files changed, 3 insertions(+), 138 deletions(-) rename src/trans/{gpu => common}/internal/shuffle_mod.F90 (98%) delete mode 100644 src/trans/cpu/internal/shuffle_mod.F90 diff --git a/src/trans/common/CMakeLists.txt b/src/trans/common/CMakeLists.txt index ac7c21139..aea3a9348 100644 --- a/src/trans/common/CMakeLists.txt +++ b/src/trans/common/CMakeLists.txt @@ -31,6 +31,7 @@ list( APPEND ectrans_common_src internal/pe2set_mod.F90 internal/set2pe_mod.F90 internal/eq_regions_mod.F90 + internal/shuffle_mod.F90 internal/sump_trans0_mod.F90 internal/sustaonl_mod.F90 internal/sumplat_mod.F90 diff --git a/src/trans/gpu/internal/shuffle_mod.F90 b/src/trans/common/internal/shuffle_mod.F90 similarity index 98% rename from src/trans/gpu/internal/shuffle_mod.F90 rename to src/trans/common/internal/shuffle_mod.F90 index 8ef2ee06c..b59c61d9b 100755 --- a/src/trans/gpu/internal/shuffle_mod.F90 +++ b/src/trans/common/internal/shuffle_mod.F90 @@ -55,8 +55,8 @@ SUBROUTINE SHUFFLE(KF_UV_G,KF_SCALARS_G,KSHFUV_G,KIVSETUV,KSHFSC_G,KIVSETSC,& ! Original : 01-01-03 ! ------------------------------------------------------------------ -USE PARKIND1, ONLY: JPIM -USE TPM_DISTR, ONLY: NPRTRV +USE EC_PARKIND, ONLY: JPIM +USE TPM_DISTR, ONLY: NPRTRV ! IMPLICIT NONE diff --git a/src/trans/cpu/internal/shuffle_mod.F90 b/src/trans/cpu/internal/shuffle_mod.F90 deleted file mode 100644 index 7c1ec14c0..000000000 --- a/src/trans/cpu/internal/shuffle_mod.F90 +++ /dev/null @@ -1,135 +0,0 @@ -! (C) Copyright 2001- ECMWF. -! (C) Copyright 2001- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE SHUFFLE_MOD -CONTAINS -SUBROUTINE SHUFFLE(KF_UV_G,KF_SCALARS_G,KSHFUV_G,KIVSETUV,KSHFSC_G,KIVSETSC,& - & KVSETUV,KVSETSC) - -!**** *SHUFFLE* - Re-shuffle fields for load balancing - -! Purpose. -! -------- -! Re-shuffle fields for load balancing if NPRTRV>1. Note that the -! relative order of the local spectral fields has to maintained. - -!** Interface. -! ---------- -! CALL SHUFFLE(...) - -! Explicit arguments : -! -------------------- -! KF_UV_G - global number of spectral u-v fields -! KF_SCALARS_G - global number of scalar spectral fields -! KSHFUV_G - reshuffling index for uv fields -! KIVSETUV - reshuffled KVSETUV -! KSHFSC_G - reshuffling index for scalar fields -! KIVSETSC - reshuffled KVSETSC -! KVSETUV(:) - indicating which 'b-set' in spectral space owns a -! vor/div field. Equivalant to NBSETLEV in the IFS. -! The length of KVSETUV should be the GLOBAL number -! of u/v fields which is the dimension of u and v releated -! fields in grid-point space. -! KVESETSC(:) - indicating which 'b-set' in spectral space owns a -! scalar field. As for KVSETUV this argument is required -! if the total number of processors is greater than -! the number of processors used for distribution in -! spectral wave space. - -! Externals. NONE -! ---------- - -! Author. -! ------- -! Mats Hamrud *ECMWF* - -! Modifications. -! -------------- -! Original : 01-01-03 - -! ------------------------------------------------------------------ -USE PARKIND1 ,ONLY : JPIM - -USE TPM_DISTR ,ONLY : NPRTRV -! - -IMPLICIT NONE - -! Declaration of arguments - -INTEGER(KIND=JPIM), INTENT(IN) :: KF_UV_G,KF_SCALARS_G -INTEGER(KIND=JPIM), INTENT(OUT) :: KSHFUV_G(:),KSHFSC_G(:) -INTEGER(KIND=JPIM), INTENT(OUT) :: KIVSETUV(:),KIVSETSC(:) -INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:) -INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC(:) - -INTEGER(KIND=JPIM) :: IHELP(MAX(KF_UV_G,KF_SCALARS_G),NPRTRV),IHELPC(NPRTRV) -INTEGER(KIND=JPIM) :: IDW,J - -! ------------------------------------------------------------------ - -IF(NPRTRV > 1) THEN - IHELP(:,:) = 0 - IHELPC(:) = 0 - DO J=1,KF_UV_G - IHELPC(KVSETUV(J)) = IHELPC(KVSETUV(J))+1 - IHELP(IHELPC(KVSETUV(J)),KVSETUV(J)) = J - ENDDO - IDW = KF_UV_G+1 - DO - DO J=NPRTRV,1,-1 - IF(IHELPC(J) > 0) THEN - IDW = IDW-1 - KSHFUV_G(IDW) = IHELP(IHELPC(J),J) - IHELPC(J) =IHELPC(J)-1 - ENDIF - ENDDO - IF(IDW == 1) EXIT - ENDDO - - IHELP(:,:) = 0 - IHELPC(:) = 0 - DO J=1,KF_SCALARS_G - IHELPC(KVSETSC(J)) = IHELPC(KVSETSC(J))+1 - IHELP(IHELPC(KVSETSC(J)),KVSETSC(J)) = J - ENDDO - IDW = KF_SCALARS_G+1 - DO - DO J=NPRTRV,1,-1 - IF(IHELPC(J) > 0) THEN - IDW = IDW-1 - KSHFSC_G(IDW) = IHELP(IHELPC(J),J) - IHELPC(J) =IHELPC(J)-1 - ENDIF - ENDDO - IF(IDW == 1) EXIT - ENDDO - - DO J=1,KF_UV_G - KIVSETUV(J) = KVSETUV(KSHFUV_G(J)) - ENDDO - DO J=1,KF_SCALARS_G - KIVSETSC(J) = KVSETSC(KSHFSC_G(J)) - ENDDO -ELSE - DO J=1,KF_UV_G - KSHFUV_G(J) = J - KIVSETUV(J) = 1 - ENDDO - DO J=1,KF_SCALARS_G - KSHFSC_G(J) = J - KIVSETSC(J) = 1 - ENDDO -ENDIF - -! ------------------------------------------------------------------ - -END SUBROUTINE SHUFFLE -END MODULE SHUFFLE_MOD diff --git a/src/trans/sedrenames.txt b/src/trans/sedrenames.txt index e6af0c801..f39bb20a1 100644 --- a/src/trans/sedrenames.txt +++ b/src/trans/sedrenames.txt @@ -101,7 +101,6 @@ s/SETUP_DIMS_MOD/SETUP_DIMS_MOD_VARIANTDESIGNATOR/g s/SETUP_GEOM_MOD/SETUP_GEOM_MOD_VARIANTDESIGNATOR/g s/SETUP_TRANS( *($|\(| |\*))/SETUP_TRANS_VARIANTDESIGNATOR\1/g s/setup_trans( *($|\(| |\*|\.h))/setup_trans_VARIANTDESIGNATOR\1/g -s/SHUFFLE_MOD/SHUFFLE_MOD_VARIANTDESIGNATOR/g s/specnorm/specnorm_VARIANTDESIGNATOR/g s/SPECNORM/SPECNORM_VARIANTDESIGNATOR/g s/SPNORM_CTL_MOD/SPNORM_CTL_MOD_VARIANTDESIGNATOR/g From 6339b26821458d51c8069977f5f636ddbea97f3f Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 12 Sep 2024 12:58:38 +0000 Subject: [PATCH 41/86] Fix typo --- src/trans/cpu/internal/set_resol_mod.F90 | 2 +- src/trans/gpu/internal/set_resol_mod.F90 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/trans/cpu/internal/set_resol_mod.F90 b/src/trans/cpu/internal/set_resol_mod.F90 index 315799b98..d5ed9ef93 100644 --- a/src/trans/cpu/internal/set_resol_mod.F90 +++ b/src/trans/cpu/internal/set_resol_mod.F90 @@ -31,7 +31,7 @@ SUBROUTINE SET_RESOL(KRESOL,LDSETUP) INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL LOGICAL ,OPTIONAL, INTENT(IN) :: LDSETUP -! Local varaibles +! Local variables INTEGER(KIND=JPIM) :: IRESOL LOGICAL :: LLSETUP diff --git a/src/trans/gpu/internal/set_resol_mod.F90 b/src/trans/gpu/internal/set_resol_mod.F90 index f75583499..d197c5462 100755 --- a/src/trans/gpu/internal/set_resol_mod.F90 +++ b/src/trans/gpu/internal/set_resol_mod.F90 @@ -31,7 +31,7 @@ SUBROUTINE SET_RESOL(KRESOL,LDSETUP) INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KRESOL LOGICAL ,OPTIONAL, INTENT(IN) :: LDSETUP -! Local varaibles +! Local variables INTEGER(KIND=JPIM) :: IRESOL LOGICAL :: LLSETUP From 2b44f2a94eac74b5a088ab42782ab8beb235c937 Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Thu, 12 Sep 2024 14:10:34 +0000 Subject: [PATCH 42/86] Have timing statistics only summarised for non-warm-up iterations --- src/programs/ectrans-benchmark.F90 | 73 ++++++++++++++---------------- 1 file changed, 34 insertions(+), 39 deletions(-) diff --git a/src/programs/ectrans-benchmark.F90 b/src/programs/ectrans-benchmark.F90 index 962e5a0d9..7701fd1f5 100644 --- a/src/programs/ectrans-benchmark.F90 +++ b/src/programs/ectrans-benchmark.F90 @@ -51,6 +51,9 @@ program ectrans_benchmark ! Number of points in top/bottom latitudes integer(kind=jpim), parameter :: min_octa_points = 20 +! Number of warm up steps (for which timing statistics should be ignored) +integer(kind=jpim), parameter :: n_warm_up = 2 + integer(kind=jpim) :: istack, getstackusage real(kind=jprd), dimension(1) :: zmaxerr(5), zerr(5) real(kind=jprd) :: zmaxerrg @@ -575,26 +578,16 @@ program ectrans_benchmark if (iters <= 0) call abor1('ectrans_benchmark:iters <= 0') -allocate(ztstep(iters+2)) -allocate(ztstep1(iters+2)) -allocate(ztstep2(iters+2)) - -ztstepavg = 0._jprd -ztstepmax = 0._jprd -ztstepmin = 9999999999999999._jprd -ztstepavg1 = 0._jprd -ztstepmax1 = 0._jprd -ztstepmin1 = 9999999999999999._jprd -ztstepavg2 = 0._jprd -ztstepmax2 = 0._jprd -ztstepmin2 = 9999999999999999._jprd +allocate(ztstep(iters+n_warm_up)) +allocate(ztstep1(iters+n_warm_up)) +allocate(ztstep2(iters+n_warm_up)) if (verbosity >= 1 .and. myproc == 1) then write(nout,'(a)') '======= Start of spectral transforms =======' write(nout,'(" ")') endif -ztloop = timef() + !=================================================================================================== ! Do spectral transform loop @@ -602,11 +595,15 @@ program ectrans_benchmark gstats_lstats = .false. -write(nout,'(a,i5,a)') 'Running for ', iters, ' iterations with 2 extra warm-up iterations' +write(nout,'(a,i5,a,i5,a)') 'Running for ', iters, ' iterations with', n_warm_up, & + & 'extra warm-up iterations' write(nout,'(" ")') -do jstep = 1, iters+2 - if (jstep == 3) gstats_lstats = .true. +do jstep = 1, iters+n_warm_up + if (jstep == n_warm_up + 1) then + gstats_lstats = .true. + ztloop = timef() + endif call gstats(3,0) ztstep(jstep) = timef() @@ -690,24 +687,8 @@ program ectrans_benchmark call gstats(5,1) ztstep2(jstep) = (timef() - ztstep2(jstep))/1000.0_jprd - !================================================================================================= - ! Calculate timings - !================================================================================================= - ztstep(jstep) = (timef() - ztstep(jstep))/1000.0_jprd - ztstepavg = ztstepavg + ztstep(jstep) - ztstepmin = min(ztstep(jstep), ztstepmin) - ztstepmax = max(ztstep(jstep), ztstepmax) - - ztstepavg1 = ztstepavg1 + ztstep1(jstep) - ztstepmin1 = min(ztstep1(jstep), ztstepmin1) - ztstepmax1 = max(ztstep1(jstep), ztstepmax1) - - ztstepavg2 = ztstepavg2 + ztstep2(jstep) - ztstepmin2 = min(ztstep2(jstep), ztstepmin2) - ztstepmax2 = max(ztstep2(jstep), ztstepmax2) - !================================================================================================= ! Print norms !================================================================================================= @@ -858,6 +839,20 @@ program ectrans_benchmark endif endif +!=================================================================================================== +! Calculate timings +!=================================================================================================== + +ztstepavg = sum(ztstep(n_warm_up+1:)) +ztstepmin = minval(ztstep(n_warm_up+1:)) +ztstepmax = maxval(ztstep(n_warm_up+1:)) +ztstepavg1 = sum(ztstep1(n_warm_up+1:)) +ztstepmin1 = minval(ztstep1(n_warm_up+1:)) +ztstepmax1 = maxval(ztstep1(n_warm_up+1:)) +ztstepavg2 = sum(ztstep2(n_warm_up+1:)) +ztstepmin2 = minval(ztstep2(n_warm_up+1:)) +ztstepmax2 = maxval(ztstep2(n_warm_up+1:)) + if (luse_mpi) then call mpl_allreduce(ztloop, 'sum', ldreprod=.false.) call mpl_allreduce(ztstep, 'sum', ldreprod=.false.) @@ -876,18 +871,18 @@ program ectrans_benchmark call mpl_allreduce(ztstepmin2, 'min', ldreprod=.false.) endif -ztstepavg = (ztstepavg/real(nproc,jprb))/real(iters+2,jprd) +ztstepavg = (ztstepavg/real(nproc,jprb))/real(iters,jprd) ztloop = ztloop/real(nproc,jprd) ztstep(:) = ztstep(:)/real(nproc,jprd) -ztstepmed = get_median(ztstep) +ztstepmed = get_median(ztstep(n_warm_up+1:)) -ztstepavg1 = (ztstepavg1/real(nproc,jprb))/real(iters+2,jprd) +ztstepavg1 = (ztstepavg1/real(nproc,jprb))/real(iters,jprd) ztstep1(:) = ztstep1(:)/real(nproc,jprd) -ztstepmed1 = get_median(ztstep1) +ztstepmed1 = get_median(ztstep1(n_warm_up+1:)) -ztstepavg2 = (ztstepavg2/real(nproc,jprb))/real(iters+2,jprd) +ztstepavg2 = (ztstepavg2/real(nproc,jprb))/real(iters,jprd) ztstep2(:) = ztstep2(:)/real(nproc,jprd) -ztstepmed2 = get_median(ztstep2) +ztstepmed2 = get_median(ztstep2(n_warm_up+1:)) write(nout,'(a)') '======= Start of time step stats =======' write(nout,'(" ")') From 1346320e6e78cd4913c9b970dcf3353ba793f191 Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Thu, 12 Sep 2024 08:30:12 +0000 Subject: [PATCH 43/86] Remove unused import --- src/trans/gpu/internal/ledir_mod.F90 | 2 +- src/trans/gpu/internal/leinv_mod.F90 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/trans/gpu/internal/ledir_mod.F90 b/src/trans/gpu/internal/ledir_mod.F90 index 4399304e2..7655ab632 100755 --- a/src/trans/gpu/internal/ledir_mod.F90 +++ b/src/trans/gpu/internal/ledir_mod.F90 @@ -100,7 +100,7 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) USE TPM_GEOMETRY, ONLY: G_NDGLU USE TPM_FIELDS, ONLY: ZAA,ZAS,ZAA0,ZAS0,KMLOC0 USE TPM_DISTR, ONLY: D_NUMP, D_MYMS, D_OFFSETS_GEMM1, D_OFFSETS_GEMM2 - USE HICBLAS_MOD, ONLY: HIP_GEMM_BATCHED, HIP_DGEMM_BATCHED_OVERLOAD, & + USE HICBLAS_MOD, ONLY: HIP_DGEMM_BATCHED_OVERLOAD, & & HIP_DGEMM_GROUPED_OVERLOAD, HIP_SGEMM_GROUPED_OVERLOAD USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX diff --git a/src/trans/gpu/internal/leinv_mod.F90 b/src/trans/gpu/internal/leinv_mod.F90 index 3e3397bde..5f62d0a7a 100755 --- a/src/trans/gpu/internal/leinv_mod.F90 +++ b/src/trans/gpu/internal/leinv_mod.F90 @@ -99,7 +99,7 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) USE TPM_GEOMETRY, ONLY: G_NDGLU USE TPM_FIELDS, ONLY: ZAA, ZAS, ZAA0, ZAS0, KMLOC0 USE TPM_DISTR, ONLY: D_NUMP, D_MYMS, MYPROC, D_OFFSETS_GEMM1, D_OFFSETS_GEMM2 - USE HICBLAS_MOD, ONLY: HIP_GEMM_BATCHED, HIP_DGEMM_BATCHED_OVERLOAD, & + USE HICBLAS_MOD, ONLY: HIP_DGEMM_BATCHED_OVERLOAD, & & HIP_DGEMM_GROUPED_OVERLOAD, HIP_SGEMM_GROUPED_OVERLOAD USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM From 9341927c06deed635580aa51fd89c83bfec4ada7 Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Thu, 12 Sep 2024 08:31:38 +0000 Subject: [PATCH 44/86] Remove unused interface --- src/trans/gpu/algor/hicblas_mod.F90 | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/trans/gpu/algor/hicblas_mod.F90 b/src/trans/gpu/algor/hicblas_mod.F90 index 4ff5a8fef..1afcba10a 100644 --- a/src/trans/gpu/algor/hicblas_mod.F90 +++ b/src/trans/gpu/algor/hicblas_mod.F90 @@ -22,12 +22,6 @@ MODULE HICBLAS_MOD IMPLICIT NONE - INTERFACE HIP_GEMM_BATCHED - MODULE PROCEDURE HIP_DGEMM_BATCHED_OVERLOAD - MODULE PROCEDURE HIP_SGEMM_BATCHED_OVERLOAD - MODULE PROCEDURE HIP_DGEMM_GROUPED_OVERLOAD - MODULE PROCEDURE HIP_SGEMM_GROUPED_OVERLOAD - END INTERFACE HIP_GEMM_BATCHED ! ! Define the interfaces to HIP/CUDA C code via a common wrapper interface From 5e822f64082f865f261a183a5797682762740f8d Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Thu, 12 Sep 2024 08:32:12 +0000 Subject: [PATCH 45/86] Rename blas routines for consistency --- src/trans/gpu/algor/hicblas_gemm.hip.cpp | 4 ++-- src/trans/gpu/algor/hicblas_mod.F90 | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/trans/gpu/algor/hicblas_gemm.hip.cpp b/src/trans/gpu/algor/hicblas_gemm.hip.cpp index 34a68c4e3..48e8ef2af 100644 --- a/src/trans/gpu/algor/hicblas_gemm.hip.cpp +++ b/src/trans/gpu/algor/hicblas_gemm.hip.cpp @@ -290,7 +290,7 @@ void hipblas_sgemm_wrapper (char transa, char transb, } -void blas_sgemm_wrapper_grouped(int blas_id, char transa, char transb, +void hipblas_sgemm_wrapper_grouped(int blas_id, char transa, char transb, int m, int *n, int *k, float alpha, const float *A, int lda, int *offsetsA, const float *B, int ldb, int *offsetsB, float beta, @@ -311,7 +311,7 @@ void blas_sgemm_wrapper_grouped(int blas_id, char transa, char transb, #endif } -void blas_dgemm_wrapper_grouped(int blas_id, char transa, char transb, +void hipblas_dgemm_wrapper_grouped(int blas_id, char transa, char transb, int m, int *n, int *k, double alpha, const double *A, int lda, int *offsetsA, const double *B, int ldb, int *offsetsB, double beta, diff --git a/src/trans/gpu/algor/hicblas_mod.F90 b/src/trans/gpu/algor/hicblas_mod.F90 index 1afcba10a..1b136cd80 100644 --- a/src/trans/gpu/algor/hicblas_mod.F90 +++ b/src/trans/gpu/algor/hicblas_mod.F90 @@ -172,7 +172,7 @@ SUBROUTINE HIP_DGEMM_GROUPED( & & BETA, & & C, LDC, OFFSETC, & & BATCHCOUNT, STREAM, ALLOC & -&) BIND(C, NAME='blas_dgemm_wrapper_grouped') +&) BIND(C, NAME='hipblas_dgemm_wrapper_grouped') USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_DOUBLE, C_SIZE_T, C_PTR CHARACTER(1,C_CHAR), VALUE :: CTA, CTB INTEGER(C_INT), VALUE :: BLAS_ID, M, LDA, LDB, LDC, BATCHCOUNT @@ -191,7 +191,7 @@ SUBROUTINE HIP_SGEMM_GROUPED( & & BETA, & & C, LDC, OFFSETC, & & BATCHCOUNT, STREAM, ALLOC & -&) BIND(C, NAME='blas_sgemm_wrapper_grouped') +&) BIND(C, NAME='hipblas_sgemm_wrapper_grouped') USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_FLOAT, C_SIZE_T, C_PTR CHARACTER(1,C_CHAR), VALUE :: CTA, CTB INTEGER(C_INT), VALUE :: BLAS_ID, M, LDA, LDB, LDC, BATCHCOUNT From 03085edb4acaf262fe76ed48455ae2b2320d55c9 Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Thu, 12 Sep 2024 08:33:00 +0000 Subject: [PATCH 46/86] Remove unused finalize routines --- src/trans/gpu/algor/hicblas_gemm.hip.cpp | 27 ------------------------ src/trans/gpu/algor/hicblas_mod.F90 | 10 --------- 2 files changed, 37 deletions(-) diff --git a/src/trans/gpu/algor/hicblas_gemm.hip.cpp b/src/trans/gpu/algor/hicblas_gemm.hip.cpp index 48e8ef2af..9d6178bed 100644 --- a/src/trans/gpu/algor/hicblas_gemm.hip.cpp +++ b/src/trans/gpu/algor/hicblas_gemm.hip.cpp @@ -324,30 +324,3 @@ void hipblas_dgemm_wrapper_grouped(int blas_id, char transa, char transb, growing_allocator); } } - -extern "C" void hipblasSgemmBatched_finalize () -{ - -#ifdef FALSE - if (hip_alreadyAllocated_sgemm){ - - hipFree(Aarray_sgemm_hip); - hipFree(Barray_sgemm_hip); - hipFree(Carray_sgemm_hip); - - hipFree(d_Aarray_sgemm_hip); - hipFree(d_Barray_sgemm_hip); - hipFree(d_Carray_sgemm_hip); - - } -#endif - - if (hip_alreadyAllocated_sgemm_handle){ - hipblasDestroy(handle_hip_sgemm); - } - if (hip_alreadyAllocated_dgemm_handle){ - hipblasDestroy(handle_hip_dgemm); - } - -} - diff --git a/src/trans/gpu/algor/hicblas_mod.F90 b/src/trans/gpu/algor/hicblas_mod.F90 index 1b136cd80..a83ee1082 100644 --- a/src/trans/gpu/algor/hicblas_mod.F90 +++ b/src/trans/gpu/algor/hicblas_mod.F90 @@ -106,11 +106,6 @@ SUBROUTINE HIP_DGEMM_STRIDED_BATCHED(& END SUBROUTINE HIP_DGEMM_STRIDED_BATCHED END INTERFACE -INTERFACE - SUBROUTINE HIP_DGEMM_BATCHED_FINALIZE() BIND(C,NAME='hipblasDgemmBatched_finalize') - END SUBROUTINE HIP_DGEMM_BATCHED_FINALIZE -END INTERFACE - INTERFACE SUBROUTINE HIP_SGEMM_BATCHED( & & CTA, CTB, & @@ -157,11 +152,6 @@ SUBROUTINE HIP_SGEMM_STRIDED_BATCHED(& END SUBROUTINE HIP_SGEMM_STRIDED_BATCHED END INTERFACE -INTERFACE - SUBROUTINE HIP_SGEMM_BATCHED_FINALIZE() BIND(C,NAME='hipblasSgemmBatched_finalize') - END SUBROUTINE HIP_SGEMM_BATCHED_FINALIZE -END INTERFACE - INTERFACE SUBROUTINE HIP_DGEMM_GROUPED( & & BLAS_ID, CTA, CTB, & From 29787ca7e3d7f7c29b0ce0de95adfba59da98213 Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Thu, 12 Sep 2024 08:34:35 +0000 Subject: [PATCH 47/86] Remove interfaces with no matching C function --- src/trans/gpu/algor/hicblas_mod.F90 | 86 ----------------------------- 1 file changed, 86 deletions(-) diff --git a/src/trans/gpu/algor/hicblas_mod.F90 b/src/trans/gpu/algor/hicblas_mod.F90 index a83ee1082..104916e04 100644 --- a/src/trans/gpu/algor/hicblas_mod.F90 +++ b/src/trans/gpu/algor/hicblas_mod.F90 @@ -8,8 +8,6 @@ ! #if defined CUDAGPU -#define hipblasSgemm 'cublasSgemm' -#define hipblasDgemm 'cublasDgemm' #define ACC_GET_HIP_STREAM ACC_GET_CUDA_STREAM #define OPENACC_LIB OPENACC #endif @@ -22,44 +20,6 @@ MODULE HICBLAS_MOD IMPLICIT NONE - -! -! Define the interfaces to HIP/CUDA C code via a common wrapper interface -! -interface hip_gemm -! -! void hipblasSgemm (char transa, char transb, int m, int n, -! int k, float alpha, const float *A, int lda, -! const float *B, int ldb, float beta, float *C, int ldc) -! -SUBROUTINE HIP_SGEMM(CTA, CTB, M, N, K,& -ALPHA, A, LDA, B, LDB, BETA, C, LDC) BIND(C,NAME='hipblasSgemm') -USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_FLOAT -CHARACTER(1,C_CHAR),VALUE :: CTA, CTB -INTEGER(C_INT), VALUE :: M,N,K,LDA,LDB,LDC -REAL(C_FLOAT), VALUE :: ALPHA,BETA -REAL(C_FLOAT), DIMENSION(LDA,*) :: A -REAL(C_FLOAT), DIMENSION(LDB,*) :: B -REAL(C_FLOAT), DIMENSION(LDC,*) :: C -END SUBROUTINE HIP_SGEMM - -! -! void hipblasDgemm (char transa, char transb, int m, int n, -! int k, double alpha, const double *A, int lda, -! const double *B, int ldb, double beta, double *C, int ldc) -! -SUBROUTINE HIP_DGEMM(CTA, CTB, M, N, K,& -ALPHA, A, LDA, B, LDB, BETA, C, LDC) BIND(C,NAME='hipblasDgemm') -USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_DOUBLE -CHARACTER(1,C_CHAR),VALUE :: CTA, CTB -INTEGER(C_INT), VALUE :: M,N,K,LDA,LDB,LDC -REAL(C_DOUBLE), VALUE :: ALPHA,BETA -REAL(C_DOUBLE), DIMENSION(LDA,*) :: A -REAL(C_DOUBLE), DIMENSION(LDB,*) :: B -REAL(C_DOUBLE), DIMENSION(LDC,*) :: C -END SUBROUTINE HIP_DGEMM -END INTERFACE - INTERFACE SUBROUTINE HIP_DGEMM_BATCHED( & & CTA, CTB, & @@ -83,29 +43,6 @@ SUBROUTINE HIP_DGEMM_BATCHED( & END SUBROUTINE HIP_DGEMM_BATCHED END INTERFACE -INTERFACE - SUBROUTINE HIP_DGEMM_STRIDED_BATCHED(& - & CTA, CTB, & - & M, N, K, & - & ALPHA, & - & A, LDA, TDA, & - & B, LDB, TDB, & - & BETA, & - & C, LDC, TDC, & - & BATCHCOUNT, STREAM & - &) BIND(C, NAME='hipblasDgemmStridedBatched_wrapper') - USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_DOUBLE, C_SIZE_T - CHARACTER(1,C_CHAR), VALUE :: CTA, CTB - INTEGER(C_INT), VALUE :: M, N, K, LDA, LDB, LDC, BATCHCOUNT - INTEGER(C_INT), VALUE :: TDA,TDB,TDC - REAL(C_DOUBLE), VALUE :: ALPHA, BETA - REAL(C_DOUBLE), DIMENSION(LDA,*) :: A - REAL(C_DOUBLE), DIMENSION(LDB,*) :: B - REAL(C_DOUBLE), DIMENSION(LDC,*) :: C - INTEGER(KIND=C_SIZE_T) :: STREAM - END SUBROUTINE HIP_DGEMM_STRIDED_BATCHED -END INTERFACE - INTERFACE SUBROUTINE HIP_SGEMM_BATCHED( & & CTA, CTB, & @@ -129,29 +66,6 @@ SUBROUTINE HIP_SGEMM_BATCHED( & END SUBROUTINE HIP_SGEMM_BATCHED END INTERFACE -INTERFACE - SUBROUTINE HIP_SGEMM_STRIDED_BATCHED(& - & CTA, CTB, & - & M, N, K, & - & ALPHA, & - & A, LDA, TDA, & - & B, LDB, TDB, & - & BETA, & - & C, LDC, TDC, & - & BATCHCOUNT, STREAM & - &) BIND(C, NAME='hipblasSgemmStridedBatched_wrapper') - USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_FLOAT, C_SIZE_T - CHARACTER(1,C_CHAR), VALUE :: CTA, CTB - INTEGER(C_INT), VALUE :: M, N, K, LDA, LDB, LDC, BATCHCOUNT - INTEGER(C_INT), VALUE :: TDA,TDB,TDC - REAL(C_FLOAT), VALUE :: ALPHA, BETA - REAL(C_FLOAT), DIMENSION(LDA,*) :: A - REAL(C_FLOAT), DIMENSION(LDB,*) :: B - REAL(C_FLOAT), DIMENSION(LDC,*) :: C - INTEGER(KIND=C_SIZE_T) :: STREAM - END SUBROUTINE HIP_SGEMM_STRIDED_BATCHED -END INTERFACE - INTERFACE SUBROUTINE HIP_DGEMM_GROUPED( & & BLAS_ID, CTA, CTB, & From 9b810a687a91e3035a626cf8bdc421bfd7dbf8d3 Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Thu, 12 Sep 2024 08:43:43 +0000 Subject: [PATCH 48/86] Fix indentation --- src/trans/gpu/algor/hicblas_mod.F90 | 426 ++++++++++++++-------------- 1 file changed, 212 insertions(+), 214 deletions(-) diff --git a/src/trans/gpu/algor/hicblas_mod.F90 b/src/trans/gpu/algor/hicblas_mod.F90 index 104916e04..988e1b3ef 100644 --- a/src/trans/gpu/algor/hicblas_mod.F90 +++ b/src/trans/gpu/algor/hicblas_mod.F90 @@ -21,53 +21,51 @@ MODULE HICBLAS_MOD IMPLICIT NONE INTERFACE - SUBROUTINE HIP_DGEMM_BATCHED( & - & CTA, CTB, & - & M, N, K, & - & ALPHA, & - & A, LDA, TDA, & - & B, LDB, TDB, & - & BETA, & - & C, LDC, TDC, & - & BATCHCOUNT, STREAM, ALLOC & - &) BIND(C, NAME='hipblas_dgemm_wrapper') - USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_DOUBLE, C_SIZE_T, C_PTR - CHARACTER(1,C_CHAR), VALUE :: CTA, CTB - INTEGER(C_INT), VALUE :: M, N, K, LDA, LDB, LDC, TDA, TDB, TDC, BATCHCOUNT - REAL(C_DOUBLE), VALUE :: ALPHA,BETA - REAL(C_DOUBLE), DIMENSION(LDA,*) :: A - REAL(C_DOUBLE), DIMENSION(LDB,*) :: B - REAL(C_DOUBLE), DIMENSION(LDC,*) :: C - INTEGER(KIND=C_SIZE_T) :: STREAM - TYPE(C_PTR), INTENT(IN), VALUE :: ALLOC - END SUBROUTINE HIP_DGEMM_BATCHED -END INTERFACE + SUBROUTINE HIP_DGEMM_BATCHED( & + & CTA, CTB, & + & M, N, K, & + & ALPHA, & + & A, LDA, TDA, & + & B, LDB, TDB, & + & BETA, & + & C, LDC, TDC, & + & BATCHCOUNT, STREAM, ALLOC & + &) BIND(C, NAME='hipblas_dgemm_wrapper') + USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_DOUBLE, C_SIZE_T, C_PTR + CHARACTER(1,C_CHAR), VALUE :: CTA, CTB + INTEGER(C_INT), VALUE :: M, N, K, LDA, LDB, LDC, TDA, TDB, TDC, BATCHCOUNT + REAL(C_DOUBLE), VALUE :: ALPHA,BETA + REAL(C_DOUBLE), DIMENSION(LDA,*) :: A + REAL(C_DOUBLE), DIMENSION(LDB,*) :: B + REAL(C_DOUBLE), DIMENSION(LDC,*) :: C + INTEGER(KIND=C_SIZE_T) :: STREAM + TYPE(C_PTR), INTENT(IN), VALUE :: ALLOC + END SUBROUTINE HIP_DGEMM_BATCHED -INTERFACE - SUBROUTINE HIP_SGEMM_BATCHED( & - & CTA, CTB, & - & M, N, K, & - & ALPHA, & - & A, LDA, TDA, & - & B, LDB, TDB, & - & BETA, & - & C, LDC, TDC, & - & BATCHCOUNT, STREAM, ALLOC & - &) BIND(C, NAME='hipblas_sgemm_wrapper') - USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_FLOAT, C_SIZE_T, C_PTR - CHARACTER(1,C_CHAR), VALUE :: CTA, CTB - INTEGER(C_INT), VALUE :: M, N, K, LDA, LDB, LDC, TDA, TDB, TDC, BATCHCOUNT - REAL(C_FLOAT), VALUE :: ALPHA, BETA - REAL(C_FLOAT), DIMENSION(LDA,*) :: A - REAL(C_FLOAT), DIMENSION(LDB,*) :: B - REAL(C_FLOAT), DIMENSION(LDC,*) :: C - INTEGER(KIND=C_SIZE_T) :: STREAM - TYPE(C_PTR), INTENT(IN), VALUE :: ALLOC - END SUBROUTINE HIP_SGEMM_BATCHED + SUBROUTINE HIP_SGEMM_BATCHED( & + & CTA, CTB, & + & M, N, K, & + & ALPHA, & + & A, LDA, TDA, & + & B, LDB, TDB, & + & BETA, & + & C, LDC, TDC, & + & BATCHCOUNT, STREAM, ALLOC & + &) BIND(C, NAME='hipblas_sgemm_wrapper') + USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_FLOAT, C_SIZE_T, C_PTR + CHARACTER(1,C_CHAR), VALUE :: CTA, CTB + INTEGER(C_INT), VALUE :: M, N, K, LDA, LDB, LDC, TDA, TDB, TDC, BATCHCOUNT + REAL(C_FLOAT), VALUE :: ALPHA, BETA + REAL(C_FLOAT), DIMENSION(LDA,*) :: A + REAL(C_FLOAT), DIMENSION(LDB,*) :: B + REAL(C_FLOAT), DIMENSION(LDC,*) :: C + INTEGER(KIND=C_SIZE_T) :: STREAM + TYPE(C_PTR), INTENT(IN), VALUE :: ALLOC + END SUBROUTINE HIP_SGEMM_BATCHED END INTERFACE INTERFACE -SUBROUTINE HIP_DGEMM_GROUPED( & + SUBROUTINE HIP_DGEMM_GROUPED( & & BLAS_ID, CTA, CTB, & & M, N, K, & & ALPHA, & @@ -76,7 +74,7 @@ SUBROUTINE HIP_DGEMM_GROUPED( & & BETA, & & C, LDC, OFFSETC, & & BATCHCOUNT, STREAM, ALLOC & -&) BIND(C, NAME='hipblas_dgemm_wrapper_grouped') + &) BIND(C, NAME='hipblas_dgemm_wrapper_grouped') USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_DOUBLE, C_SIZE_T, C_PTR CHARACTER(1,C_CHAR), VALUE :: CTA, CTB INTEGER(C_INT), VALUE :: BLAS_ID, M, LDA, LDB, LDC, BATCHCOUNT @@ -85,8 +83,9 @@ SUBROUTINE HIP_DGEMM_GROUPED( & REAL(C_DOUBLE) :: A(*), B(*), C(*) INTEGER(KIND=C_SIZE_T) :: STREAM TYPE(C_PTR), INTENT(IN), VALUE :: ALLOC -END SUBROUTINE HIP_DGEMM_GROUPED -SUBROUTINE HIP_SGEMM_GROUPED( & + END SUBROUTINE HIP_DGEMM_GROUPED + + SUBROUTINE HIP_SGEMM_GROUPED( & & BLAS_ID, CTA, CTB, & & M, N, K, & & ALPHA, & @@ -95,7 +94,7 @@ SUBROUTINE HIP_SGEMM_GROUPED( & & BETA, & & C, LDC, OFFSETC, & & BATCHCOUNT, STREAM, ALLOC & -&) BIND(C, NAME='hipblas_sgemm_wrapper_grouped') + &) BIND(C, NAME='hipblas_sgemm_wrapper_grouped') USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_FLOAT, C_SIZE_T, C_PTR CHARACTER(1,C_CHAR), VALUE :: CTA, CTB INTEGER(C_INT), VALUE :: BLAS_ID, M, LDA, LDB, LDC, BATCHCOUNT @@ -104,202 +103,201 @@ SUBROUTINE HIP_SGEMM_GROUPED( & REAL(C_FLOAT) :: A(*), B(*), C(*) INTEGER(KIND=C_SIZE_T) :: STREAM TYPE(C_PTR), INTENT(IN), VALUE :: ALLOC -END SUBROUTINE HIP_SGEMM_GROUPED + END SUBROUTINE HIP_SGEMM_GROUPED END INTERFACE CONTAINS SUBROUTINE HIP_DGEMM_BATCHED_OVERLOAD( & - & TRANSA, TRANSB, & - & M, N, K, & - & ALPHA, & - & AARRAY, LDA, STRIDEA, & - & BARRAY, LDB, STRIDEB, & - & BETA, & - & CARRAY, LDC, STRIDEC, & - & BATCHCOUNT, STREAM, ALLOC) - USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_LONG, C_LOC - CHARACTER(1,C_CHAR), VALUE :: TRANSA, TRANSB - INTEGER(KIND=JPIM) :: M - INTEGER(KIND=JPIM) :: N - INTEGER(KIND=JPIM) :: K - REAL(KIND=JPRD) :: ALPHA - REAL(KIND=JPRD), DIMENSION(:) :: AARRAY - INTEGER(KIND=JPIM) :: LDA - INTEGER(KIND=JPIM) :: STRIDEA - REAL(KIND=JPRD), DIMENSION(:,:) :: BARRAY - INTEGER(KIND=JPIM) :: LDB - INTEGER(KIND=JPIM) :: STRIDEB - REAL(KIND=JPRD) :: BETA - REAL(KIND=JPRD), DIMENSION(:) :: CARRAY - INTEGER(KIND=JPIM) :: LDC - INTEGER(KIND=JPIM) :: STRIDEC - INTEGER(KIND=JPIM) :: BATCHCOUNT - INTEGER(KIND=C_INT) :: STREAM - TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC + & TRANSA, TRANSB, & + & M, N, K, & + & ALPHA, & + & AARRAY, LDA, STRIDEA, & + & BARRAY, LDB, STRIDEB, & + & BETA, & + & CARRAY, LDC, STRIDEC, & + & BATCHCOUNT, STREAM, ALLOC) + USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_LONG, C_LOC + CHARACTER(1,C_CHAR), VALUE :: TRANSA, TRANSB + INTEGER(KIND=JPIM) :: M + INTEGER(KIND=JPIM) :: N + INTEGER(KIND=JPIM) :: K + REAL(KIND=JPRD) :: ALPHA + REAL(KIND=JPRD), DIMENSION(:) :: AARRAY + INTEGER(KIND=JPIM) :: LDA + INTEGER(KIND=JPIM) :: STRIDEA + REAL(KIND=JPRD), DIMENSION(:,:) :: BARRAY + INTEGER(KIND=JPIM) :: LDB + INTEGER(KIND=JPIM) :: STRIDEB + REAL(KIND=JPRD) :: BETA + REAL(KIND=JPRD), DIMENSION(:) :: CARRAY + INTEGER(KIND=JPIM) :: LDC + INTEGER(KIND=JPIM) :: STRIDEC + INTEGER(KIND=JPIM) :: BATCHCOUNT + INTEGER(KIND=C_INT) :: STREAM + TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC - INTEGER(KIND=C_LONG) :: HIP_STREAM + INTEGER(KIND=C_LONG) :: HIP_STREAM - HIP_STREAM = INT(ACC_GET_HIP_STREAM(STREAM), C_LONG) + HIP_STREAM = INT(ACC_GET_HIP_STREAM(STREAM), C_LONG) #if defined(_CRAYFTN) - !$ACC HOST_DATA USE_DEVICE(AARRAY,BARRAY,CARRAY) + !$ACC HOST_DATA USE_DEVICE(AARRAY,BARRAY,CARRAY) #endif - CALL HIP_DGEMM_BATCHED( & - & TRANSA, TRANSB, & - & M, N, K, & - & ALPHA, & - & AARRAY, LDA, STRIDEA, & - & BARRAY, LDB, STRIDEB, & - & BETA, & - & CARRAY, LDC, STRIDEC, & - & BATCHCOUNT, HIP_STREAM, C_LOC(ALLOC)) + CALL HIP_DGEMM_BATCHED( & + & TRANSA, TRANSB, & + & M, N, K, & + & ALPHA, & + & AARRAY, LDA, STRIDEA, & + & BARRAY, LDB, STRIDEB, & + & BETA, & + & CARRAY, LDC, STRIDEC, & + & BATCHCOUNT, HIP_STREAM, C_LOC(ALLOC)) #if defined(_CRAYFTN) - !$ACC END HOST_DATA + !$ACC END HOST_DATA #endif - END SUBROUTINE HIP_DGEMM_BATCHED_OVERLOAD - - SUBROUTINE HIP_SGEMM_BATCHED_OVERLOAD( & - & TRANSA, TRANSB, & - & M, N, K, & - & ALPHA, & - & AARRAY, LDA, STRIDEA, & - & BARRAY, LDB, STRIDEB, & - & BETA, & - & CARRAY, LDC, STRIDEC, & - & BATCHCOUNT, STREAM, ALLOC) - USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_LONG, C_LOC - CHARACTER(1,C_CHAR), VALUE :: TRANSA, TRANSB - INTEGER(KIND=JPIM) :: M - INTEGER(KIND=JPIM) :: N - INTEGER(KIND=JPIM) :: K - REAL(KIND=JPRM) :: ALPHA - REAL(KIND=JPRM), DIMENSION(:) :: AARRAY - INTEGER(KIND=JPIM) :: LDA - INTEGER(KIND=JPIM) :: STRIDEA - REAL(KIND=JPRM), DIMENSION(*) :: BARRAY - INTEGER(KIND=JPIM) :: LDB - INTEGER(KIND=JPIM) :: STRIDEB - REAL(KIND=JPRM) :: BETA - REAL(KIND=JPRM), DIMENSION(:) :: CARRAY - INTEGER(KIND=JPIM) :: LDC - INTEGER(KIND=JPIM) :: STRIDEC - INTEGER(KIND=JPIM) :: BATCHCOUNT - INTEGER(KIND=C_INT) :: STREAM - TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC +END SUBROUTINE HIP_DGEMM_BATCHED_OVERLOAD - INTEGER(KIND=C_LONG) :: HIP_STREAM +SUBROUTINE HIP_SGEMM_BATCHED_OVERLOAD( & + & TRANSA, TRANSB, & + & M, N, K, & + & ALPHA, & + & AARRAY, LDA, STRIDEA, & + & BARRAY, LDB, STRIDEB, & + & BETA, & + & CARRAY, LDC, STRIDEC, & + & BATCHCOUNT, STREAM, ALLOC) + USE ISO_C_BINDING, ONLY: C_CHAR, C_INT, C_LONG, C_LOC + CHARACTER(1,C_CHAR), VALUE :: TRANSA, TRANSB + INTEGER(KIND=JPIM) :: M + INTEGER(KIND=JPIM) :: N + INTEGER(KIND=JPIM) :: K + REAL(KIND=JPRM) :: ALPHA + REAL(KIND=JPRM), DIMENSION(:) :: AARRAY + INTEGER(KIND=JPIM) :: LDA + INTEGER(KIND=JPIM) :: STRIDEA + REAL(KIND=JPRM), DIMENSION(*) :: BARRAY + INTEGER(KIND=JPIM) :: LDB + INTEGER(KIND=JPIM) :: STRIDEB + REAL(KIND=JPRM) :: BETA + REAL(KIND=JPRM), DIMENSION(:) :: CARRAY + INTEGER(KIND=JPIM) :: LDC + INTEGER(KIND=JPIM) :: STRIDEC + INTEGER(KIND=JPIM) :: BATCHCOUNT + INTEGER(KIND=C_INT) :: STREAM + TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC - HIP_STREAM = INT(ACC_GET_HIP_STREAM(STREAM), C_LONG) + INTEGER(KIND=C_LONG) :: HIP_STREAM - CALL HIP_SGEMM_BATCHED( & - & TRANSA, TRANSB, & - & M, N, K, & - & ALPHA, & - & AARRAY, LDA, STRIDEA, & - & BARRAY, LDB, STRIDEB, & - & BETA, & - & CARRAY, LDC, STRIDEC, & - & BATCHCOUNT, HIP_STREAM, C_LOC(ALLOC)) - END SUBROUTINE HIP_SGEMM_BATCHED_OVERLOAD + HIP_STREAM = INT(ACC_GET_HIP_STREAM(STREAM), C_LONG) + CALL HIP_SGEMM_BATCHED( & + & TRANSA, TRANSB, & + & M, N, K, & + & ALPHA, & + & AARRAY, LDA, STRIDEA, & + & BARRAY, LDB, STRIDEB, & + & BETA, & + & CARRAY, LDC, STRIDEC, & + & BATCHCOUNT, HIP_STREAM, C_LOC(ALLOC)) +END SUBROUTINE HIP_SGEMM_BATCHED_OVERLOAD - SUBROUTINE HIP_DGEMM_GROUPED_OVERLOAD( & - & BLAS_ID, TRANSA, TRANSB, & - & M, N, K, & - & ALPHA, & - & AARRAY, LDA, OFFSETA, & - & BARRAY, LDB, OFFSETB, & - & BETA, & - & CARRAY, LDC, OFFSETC, & - & BATCHCOUNT, STREAM, ALLOC) - USE ISO_C_BINDING, ONLY: C_INT, C_CHAR, C_LONG, C_LOC - INTEGER(KIND=C_INT), INTENT(IN) :: BLAS_ID - CHARACTER(1,C_CHAR), VALUE :: TRANSA, TRANSB - INTEGER(KIND=JPIM) :: M - INTEGER(KIND=JPIM) :: N(:) - INTEGER(KIND=JPIM) :: K(:) - REAL(KIND=JPRD) :: ALPHA - REAL(KIND=JPRD), DIMENSION(:) :: AARRAY - INTEGER(KIND=JPIM) :: LDA - INTEGER(KIND=JPIM) :: OFFSETA(:) - REAL(KIND=JPRD), DIMENSION(*) :: BARRAY - INTEGER(KIND=JPIM) :: LDB - INTEGER(KIND=JPIM) :: OFFSETB(:) - REAL(KIND=JPRD) :: BETA - REAL(KIND=JPRD), DIMENSION(:) :: CARRAY - INTEGER(KIND=JPIM) :: LDC - INTEGER(KIND=JPIM) :: OFFSETC(:) - INTEGER(KIND=JPIM) :: BATCHCOUNT - INTEGER(KIND=C_INT) :: STREAM - TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC +SUBROUTINE HIP_DGEMM_GROUPED_OVERLOAD( & + & BLAS_ID, TRANSA, TRANSB, & + & M, N, K, & + & ALPHA, & + & AARRAY, LDA, OFFSETA, & + & BARRAY, LDB, OFFSETB, & + & BETA, & + & CARRAY, LDC, OFFSETC, & + & BATCHCOUNT, STREAM, ALLOC) + USE ISO_C_BINDING, ONLY: C_INT, C_CHAR, C_LONG, C_LOC + INTEGER(KIND=C_INT), INTENT(IN) :: BLAS_ID + CHARACTER(1,C_CHAR), VALUE :: TRANSA, TRANSB + INTEGER(KIND=JPIM) :: M + INTEGER(KIND=JPIM) :: N(:) + INTEGER(KIND=JPIM) :: K(:) + REAL(KIND=JPRD) :: ALPHA + REAL(KIND=JPRD), DIMENSION(:) :: AARRAY + INTEGER(KIND=JPIM) :: LDA + INTEGER(KIND=JPIM) :: OFFSETA(:) + REAL(KIND=JPRD), DIMENSION(*) :: BARRAY + INTEGER(KIND=JPIM) :: LDB + INTEGER(KIND=JPIM) :: OFFSETB(:) + REAL(KIND=JPRD) :: BETA + REAL(KIND=JPRD), DIMENSION(:) :: CARRAY + INTEGER(KIND=JPIM) :: LDC + INTEGER(KIND=JPIM) :: OFFSETC(:) + INTEGER(KIND=JPIM) :: BATCHCOUNT + INTEGER(KIND=C_INT) :: STREAM + TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC - INTEGER(KIND=C_LONG) :: HIP_STREAM + INTEGER(KIND=C_LONG) :: HIP_STREAM - HIP_STREAM = INT(ACC_GET_HIP_STREAM(STREAM), C_LONG) + HIP_STREAM = INT(ACC_GET_HIP_STREAM(STREAM), C_LONG) - CALL HIP_DGEMM_GROUPED( & - & BLAS_ID, TRANSA, TRANSB, & - & M, N, K, & - & ALPHA, & - & AARRAY, LDA, OFFSETA, & - & BARRAY, LDB, OFFSETB, & - & BETA, & - & CARRAY, LDC, OFFSETC, & - & BATCHCOUNT, HIP_STREAM, C_LOC(ALLOC)) + CALL HIP_DGEMM_GROUPED( & + & BLAS_ID, TRANSA, TRANSB, & + & M, N, K, & + & ALPHA, & + & AARRAY, LDA, OFFSETA, & + & BARRAY, LDB, OFFSETB, & + & BETA, & + & CARRAY, LDC, OFFSETC, & + & BATCHCOUNT, HIP_STREAM, C_LOC(ALLOC)) - END SUBROUTINE HIP_DGEMM_GROUPED_OVERLOAD +END SUBROUTINE HIP_DGEMM_GROUPED_OVERLOAD - SUBROUTINE HIP_SGEMM_GROUPED_OVERLOAD(& - & BLAS_ID, TRANSA, TRANSB, & - & M, N, K, & - & ALPHA, & - & AARRAY, LDA, OFFSETA, & - & BARRAY, LDB, OFFSETB, & - & BETA, & - & CARRAY, LDC, OFFSETC, & - & BATCHCOUNT, STREAM, ALLOC) - USE ISO_C_BINDING, ONLY: C_INT, C_CHAR, C_LONG, C_LOC - INTEGER(KIND=C_INT), INTENT(IN) :: BLAS_ID - CHARACTER(1,C_CHAR), VALUE :: TRANSA, TRANSB - INTEGER(KIND=JPIM) :: M - INTEGER(KIND=JPIM) :: N(:) - INTEGER(KIND=JPIM) :: K(:) - REAL(KIND=JPRM) :: ALPHA - REAL(KIND=JPRM), DIMENSION(:) :: AARRAY - INTEGER(KIND=JPIM) :: LDA - INTEGER(KIND=JPIM) :: OFFSETA(:) - REAL(KIND=JPRM), DIMENSION(:,:,:) :: BARRAY - INTEGER(KIND=JPIM) :: LDB - INTEGER(KIND=JPIM) :: OFFSETB(:) - REAL(KIND=JPRM) :: BETA - REAL(KIND=JPRM), DIMENSION(:) :: CARRAY - INTEGER(KIND=JPIM) :: LDC - INTEGER(KIND=JPIM) :: OFFSETC(:) - INTEGER(KIND=JPIM) :: BATCHCOUNT - INTEGER(KIND=C_INT) :: STREAM - TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC +SUBROUTINE HIP_SGEMM_GROUPED_OVERLOAD(& + & BLAS_ID, TRANSA, TRANSB, & + & M, N, K, & + & ALPHA, & + & AARRAY, LDA, OFFSETA, & + & BARRAY, LDB, OFFSETB, & + & BETA, & + & CARRAY, LDC, OFFSETC, & + & BATCHCOUNT, STREAM, ALLOC) + USE ISO_C_BINDING, ONLY: C_INT, C_CHAR, C_LONG, C_LOC + INTEGER(KIND=C_INT), INTENT(IN) :: BLAS_ID + CHARACTER(1,C_CHAR), VALUE :: TRANSA, TRANSB + INTEGER(KIND=JPIM) :: M + INTEGER(KIND=JPIM) :: N(:) + INTEGER(KIND=JPIM) :: K(:) + REAL(KIND=JPRM) :: ALPHA + REAL(KIND=JPRM), DIMENSION(:) :: AARRAY + INTEGER(KIND=JPIM) :: LDA + INTEGER(KIND=JPIM) :: OFFSETA(:) + REAL(KIND=JPRM), DIMENSION(:,:,:) :: BARRAY + INTEGER(KIND=JPIM) :: LDB + INTEGER(KIND=JPIM) :: OFFSETB(:) + REAL(KIND=JPRM) :: BETA + REAL(KIND=JPRM), DIMENSION(:) :: CARRAY + INTEGER(KIND=JPIM) :: LDC + INTEGER(KIND=JPIM) :: OFFSETC(:) + INTEGER(KIND=JPIM) :: BATCHCOUNT + INTEGER(KIND=C_INT) :: STREAM + TYPE(GROWING_ALLOCATION_TYPE), INTENT(IN) :: ALLOC - INTEGER(KIND=C_LONG) :: HIP_STREAM + INTEGER(KIND=C_LONG) :: HIP_STREAM - HIP_STREAM = INT(ACC_GET_HIP_STREAM(STREAM), C_LONG) + HIP_STREAM = INT(ACC_GET_HIP_STREAM(STREAM), C_LONG) #if defined(_CRAYFTN) - !$ACC HOST_DATA USE_DEVICE(AARRAY,BARRAY,CARRAY) + !$ACC HOST_DATA USE_DEVICE(AARRAY,BARRAY,CARRAY) #endif - CALL HIP_SGEMM_GROUPED( & - & BLAS_ID, TRANSA, TRANSB, & - & M, N, K, & - & ALPHA, & - & AARRAY, LDA, OFFSETA, & - & BARRAY, LDB, OFFSETB, & - & BETA, & - & CARRAY, LDC, OFFSETC, & - & BATCHCOUNT, HIP_STREAM, C_LOC(ALLOC)) + CALL HIP_SGEMM_GROUPED( & + & BLAS_ID, TRANSA, TRANSB, & + & M, N, K, & + & ALPHA, & + & AARRAY, LDA, OFFSETA, & + & BARRAY, LDB, OFFSETB, & + & BETA, & + & CARRAY, LDC, OFFSETC, & + & BATCHCOUNT, HIP_STREAM, C_LOC(ALLOC)) #if defined(_CRAYFTN) - !$ACC END HOST_DATA + !$ACC END HOST_DATA #endif - END SUBROUTINE HIP_SGEMM_GROUPED_OVERLOAD +END SUBROUTINE HIP_SGEMM_GROUPED_OVERLOAD END MODULE HICBLAS_MOD From 8aa61a413d1d9971fb63fb402c35f19c4a7d55ba Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 12 Sep 2024 13:32:06 +0000 Subject: [PATCH 49/86] Remove unused tpm_fft.F90 from gpu code path --- src/trans/cpu/internal/set_resol_mod.F90 | 4 +-- src/trans/cpu/internal/suleg_mod.F90 | 6 ++-- src/trans/gpu/external/setup_trans.F90 | 2 -- src/trans/gpu/external/trans_end.F90 | 5 ---- src/trans/gpu/internal/dealloc_resol_mod.F90 | 11 -------- src/trans/gpu/internal/set_resol_mod.F90 | 2 -- src/trans/gpu/internal/tpm_fft.F90 | 29 -------------------- 7 files changed, 5 insertions(+), 54 deletions(-) delete mode 100755 src/trans/gpu/internal/tpm_fft.F90 diff --git a/src/trans/cpu/internal/set_resol_mod.F90 b/src/trans/cpu/internal/set_resol_mod.F90 index d5ed9ef93..1862d41e0 100644 --- a/src/trans/cpu/internal/set_resol_mod.F90 +++ b/src/trans/cpu/internal/set_resol_mod.F90 @@ -19,8 +19,8 @@ SUBROUTINE SET_RESOL(KRESOL,LDSETUP) USE TPM_GEOMETRY ,ONLY : G, GEOM_RESOL USE TPM_FIELDS ,ONLY : F, FIELDS_RESOL USE TPM_FFTW ,ONLY : TW, FFTW_RESOL -USE TPM_FLT ,ONLY : S, FLT_RESOL -USE TPM_CTL ,ONLY : C, CTL_RESOL +USE TPM_FLT ,ONLY : S, FLT_RESOL +USE TPM_CTL ,ONLY : C, CTL_RESOL USE ABORT_TRANS_MOD ,ONLY : ABORT_TRANS ! diff --git a/src/trans/cpu/internal/suleg_mod.F90 b/src/trans/cpu/internal/suleg_mod.F90 index c51a3574d..67122f6d1 100644 --- a/src/trans/cpu/internal/suleg_mod.F90 +++ b/src/trans/cpu/internal/suleg_mod.F90 @@ -1003,9 +1003,9 @@ SUBROUTINE SULEG S%FA(IMLOC)%RPNMS(JGL,ILS-JI+1)=ZRCVBUFV((JGL-1)*ILS+JI,JSETV) ENDDO ENDDO - ENDIF - ENDDO - !$OMP END PARALLEL DO + ENDIF + ENDDO + !$OMP END PARALLEL DO CALL GSTATS(1251,1) IF( ALLOCATED(ZSNDBUFV) ) DEALLOCATE(ZSNDBUFV) diff --git a/src/trans/gpu/external/setup_trans.F90 b/src/trans/gpu/external/setup_trans.F90 index bdb59be24..74e8fb5b7 100755 --- a/src/trans/gpu/external/setup_trans.F90 +++ b/src/trans/gpu/external/setup_trans.F90 @@ -116,7 +116,6 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& & G_NLOEN_MAX USE TPM_FIELDS, ONLY: FIELDS_RESOL, F, F_RW, F_RN, F_RLAPIN, F_RACTHE, ZEPSNM, & & ZAA, ZAS, ZAA0, ZAS0, KMLOC0 -USE TPM_FFT, ONLY: T, FFT_RESOL USE TPM_FLT, ONLY: FLT_RESOL, S USE TPM_CTL, ONLY: CTL_RESOL, C USE SET_RESOL_MOD, ONLY: SET_RESOL @@ -231,7 +230,6 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& ALLOCATE(FIELDS_RESOL(NMAX_RESOL)) ALLOCATE(GEOM_RESOL(NMAX_RESOL)) ALLOCATE(DISTR_RESOL(NMAX_RESOL)) - ALLOCATE(FFT_RESOL(NMAX_RESOL)) ALLOCATE(FLT_RESOL(NMAX_RESOL)) ALLOCATE(CTL_RESOL(NMAX_RESOL)) GEOM_RESOL(:)%LAM=.FALSE. diff --git a/src/trans/gpu/external/trans_end.F90 b/src/trans/gpu/external/trans_end.F90 index 20a037629..d7081c94e 100755 --- a/src/trans/gpu/external/trans_end.F90 +++ b/src/trans/gpu/external/trans_end.F90 @@ -53,7 +53,6 @@ SUBROUTINE TRANS_END(CDMODE) & D_NPROCM, D_NPTRLS USE TPM_GEOMETRY, ONLY: G, GEOM_RESOL, G_NDGLU, G_NMEN, G_NMEN_MAX,G_NLOEN, G_NLOEN_MAX USE TPM_FIELDS, ONLY: F, FIELDS_RESOL, F_RW, ZEPSNM, ZAA, ZAS, ZAA0, ZAS0 -USE TPM_FFT, ONLY: T, FFT_RESOL USE TPM_CTL, ONLY: C, CTL_RESOL USE TPM_FLT, ONLY: S, FLT_RESOL USE TPM_TRANS, ONLY: FOUBUF, FOUBUF_IN @@ -110,10 +109,6 @@ SUBROUTINE TRANS_END(CDMODE) NULLIFY(D) IF( ALLOCATED(DISTR_RESOL) ) DEALLOCATE(DISTR_RESOL) - !TPM_FFT - NULLIFY(T) - IF( ALLOCATED(FFT_RESOL) ) DEALLOCATE(FFT_RESOL) - !TPM_FLT NULLIFY(S) IF( ALLOCATED(FLT_RESOL) ) DEALLOCATE(FLT_RESOL) diff --git a/src/trans/gpu/internal/dealloc_resol_mod.F90 b/src/trans/gpu/internal/dealloc_resol_mod.F90 index cdbb4eada..8c8726506 100755 --- a/src/trans/gpu/internal/dealloc_resol_mod.F90 +++ b/src/trans/gpu/internal/dealloc_resol_mod.F90 @@ -47,7 +47,6 @@ SUBROUTINE DEALLOC_RESOL(KRESOL) USE TPM_DISTR, ONLY: D, NPRTRV USE TPM_GEOMETRY, ONLY: G USE TPM_FIELDS, ONLY: F -USE TPM_FFT, ONLY: T USE TPM_FLT, ONLY: S USE TPM_CTL, ONLY: C USE SEEFMM_MIX, ONLY: FREE_SEEFMM @@ -135,16 +134,6 @@ SUBROUTINE DEALLOC_RESOL(KRESOL) IF(ALLOCATED(D%MSTABF)) DEALLOCATE(D%MSTABF) IF(ALLOCATED(D%NSTAGTF)) DEALLOCATE(D%NSTAGTF) - !TPM_FFT - IF (.NOT.D%LCPNMONLY) THEN - IF( ASSOCIATED(T) ) THEN - IF( ALLOCATED(T%TRIGS) ) DEALLOCATE(T%TRIGS) - IF( ALLOCATED(T%NFAX) ) DEALLOCATE(T%NFAX) -!! IF( ALLOCATED(T%LUSEFFT992)) DEALLOCATE(T%LUSEFFT992) - ENDIF - ENDIF - - !TPM_FIELDS IF(ALLOCATED(F%RMU)) DEALLOCATE(F%RMU) IF(ALLOCATED(F%RW)) DEALLOCATE(F%RW) diff --git a/src/trans/gpu/internal/set_resol_mod.F90 b/src/trans/gpu/internal/set_resol_mod.F90 index d197c5462..c7367bdd6 100755 --- a/src/trans/gpu/internal/set_resol_mod.F90 +++ b/src/trans/gpu/internal/set_resol_mod.F90 @@ -18,7 +18,6 @@ SUBROUTINE SET_RESOL(KRESOL,LDSETUP) USE TPM_DISTR, ONLY: D, DISTR_RESOL USE TPM_GEOMETRY, ONLY: G, GEOM_RESOL USE TPM_FIELDS, ONLY: F, FIELDS_RESOL -USE TPM_FFT, ONLY: T, FFT_RESOL USE TPM_FLT, ONLY: S, FLT_RESOL USE TPM_CTL, ONLY: C, CTL_RESOL USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS @@ -60,7 +59,6 @@ SUBROUTINE SET_RESOL(KRESOL,LDSETUP) F => FIELDS_RESOL(NCUR_RESOL) G => GEOM_RESOL(NCUR_RESOL) D => DISTR_RESOL(NCUR_RESOL) - T => FFT_RESOL(NCUR_RESOL) S => FLT_RESOL(NCUR_RESOL) C => CTL_RESOL(NCUR_RESOL) ENDIF diff --git a/src/trans/gpu/internal/tpm_fft.F90 b/src/trans/gpu/internal/tpm_fft.F90 deleted file mode 100755 index dece21b08..000000000 --- a/src/trans/gpu/internal/tpm_fft.F90 +++ /dev/null @@ -1,29 +0,0 @@ -! (C) Copyright 2000- ECMWF. -! (C) Copyright 2000- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE TPM_FFT -USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT - -! Module for Fourier transforms. - -IMPLICIT NONE - -SAVE - -TYPE FFT_TYPE - REAL(KIND=JPRBT) ,ALLOCATABLE :: TRIGS(:,:) ! list of trigonometric function values - INTEGER(KIND=JPIM),ALLOCATABLE :: NFAX(:,:) ! list of factors of truncation -END TYPE FFT_TYPE - -TYPE(FFT_TYPE),ALLOCATABLE,TARGET :: FFT_RESOL(:) -TYPE(FFT_TYPE),POINTER :: T - - -END MODULE TPM_FFT From c5c386b95cee9f181e6e010cce80bca2eb31e581 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 12 Sep 2024 12:14:28 +0000 Subject: [PATCH 50/86] Remove unused variable FLT_TYPE::MAXCOLS --- src/trans/cpu/internal/tpm_flt.F90 | 2 -- src/trans/gpu/internal/tpm_flt.F90 | 5 ----- 2 files changed, 7 deletions(-) diff --git a/src/trans/cpu/internal/tpm_flt.F90 b/src/trans/cpu/internal/tpm_flt.F90 index ccb20fa12..77adae8ac 100644 --- a/src/trans/cpu/internal/tpm_flt.F90 +++ b/src/trans/cpu/internal/tpm_flt.F90 @@ -36,8 +36,6 @@ MODULE TPM_FLT INTEGER(KIND=JPIM) :: ISLD ! starting latitude dual ! Butterfly - -INTEGER(KIND=JPIM) :: MAXCOLS TYPE(BUTTERFLY_STRUCT) :: YBUT_STRUCT_S,YBUT_STRUCT_A END TYPE FLT_TYPE diff --git a/src/trans/gpu/internal/tpm_flt.F90 b/src/trans/gpu/internal/tpm_flt.F90 index 58e910667..019abcca0 100755 --- a/src/trans/gpu/internal/tpm_flt.F90 +++ b/src/trans/gpu/internal/tpm_flt.F90 @@ -33,11 +33,6 @@ MODULE TPM_FLT REAL(KIND=JPRBT) ,POINTER :: RPNMWI(:,:) ! special weights REAL(KIND=JPRBT) ,POINTER :: RPNMWO(:,:) ! special weights INTEGER(KIND=JPIM) :: ISLD ! starting latitude dual - -! Butterfly - -INTEGER(KIND=JPIM) :: MAXCOLS - END TYPE FLT_TYPE TYPE FLT_TYPE_WRAP From 51a372693bd7739d7d7f38c1933fecb18fec6bdd Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 12 Sep 2024 08:44:07 +0000 Subject: [PATCH 51/86] Remove unused flattened F_RN for GPU --- src/trans/gpu/external/setup_trans.F90 | 10 +++------- src/trans/gpu/internal/tpm_fields.F90 | 1 - src/trans/gpu/internal/vdtuv_mod.F90 | 6 +++--- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/src/trans/gpu/external/setup_trans.F90 b/src/trans/gpu/external/setup_trans.F90 index 74e8fb5b7..afbaa0429 100755 --- a/src/trans/gpu/external/setup_trans.F90 +++ b/src/trans/gpu/external/setup_trans.F90 @@ -114,7 +114,7 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& & MYPROC, D_OFFSETS_GEMM1, D_OFFSETS_GEMM2 USE TPM_GEOMETRY, ONLY: G, GEOM_RESOL, G_NDGLU, G_NMEN, G_NMEN_MAX, G_NLOEN, & & G_NLOEN_MAX -USE TPM_FIELDS, ONLY: FIELDS_RESOL, F, F_RW, F_RN, F_RLAPIN, F_RACTHE, ZEPSNM, & +USE TPM_FIELDS, ONLY: FIELDS_RESOL, F, F_RW, F_RLAPIN, F_RACTHE, ZEPSNM, & & ZAA, ZAS, ZAA0, ZAS0, KMLOC0 USE TPM_FLT, ONLY: FLT_RESOL, S USE TPM_CTL, ONLY: CTL_RESOL, C @@ -574,7 +574,6 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& ALLOCATE(G_NLOEN(SIZE(G%NLOEN))) ALLOCATE(F_RW(SIZE(F%RW))) -ALLOCATE(F_RN(-1:SIZE(F%RN)-2)) ALLOCATE(F_RLAPIN(-1:SIZE(F%RLAPIN)-2)) ALLOCATE(F_RACTHE(SIZE(F%RACTHE))) @@ -686,21 +685,18 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& DO I=1,SIZE(F%RACTHE) F_RACTHE(I)=F%RACTHE(I) END DO -DO I=-1,SIZE(F%RN)-2 - F_RN(I)=F%RN(I) -END DO #ifdef ACCGPU !$ACC ENTER DATA COPYIN(R_NSMAX,R_NTMAX,R_NDGL,R_NDGNH,D_NSTAGT0B,D_NSTAGT1B,& !$ACC& D_NPNTGTB1,D_NPROCL,D_NUMP,D_NDGL_FS,D_MYMS,D_NASM0,D_NSTAGTF,D_MSTABF,& !$ACC& D_NPNTGTB0,D_NPROCM,D_NPTRLS,G_NDGLU,G_NMEN,G_NMEN_MAX,G_NLOEN,& -!$ACC& G_NLOEN_MAX,F_RW,F_RLAPIN,F_RN,F_RACTHE) +!$ACC& G_NLOEN_MAX,F_RW,F_RLAPIN,F_RACTHE) #endif #ifdef OMPGPU !$OMP TARGET ENTER DATA MAP(TO:R_NSMAX,R_NTMAX,R_NDGL,R_NDGNH,D_NSTAGT0B,D_NSTAGT1B) !$OMP TARGET ENTER DATA MAP(TO:D_NPNTGTB1,D_NPROCL,D_NUMP,D_NDGL_FS,D_MYMS,D_NASM0,D_NSTAGTF,D_MSTABF) !$OMP TARGET ENTER DATA MAP(TO:D_NPNTGTB0,D_NPROCM,D_NPTRLS,G_NDGLU,G_NMEN,G_NMEN_MAX,G_NLOEN) -!$OMP TARGET ENTER DATA MAP(TO:G_NLOEN_MAX,F_RW,F_RLAPIN,F_RN,F_RACTHE) +!$OMP TARGET ENTER DATA MAP(TO:G_NLOEN_MAX,F_RW,F_RLAPIN,F_RACTHE) #endif WRITE(NOUT,*) '===GPU arrays successfully allocated' diff --git a/src/trans/gpu/internal/tpm_fields.F90 b/src/trans/gpu/internal/tpm_fields.F90 index f7ecd425a..4b063f7d4 100755 --- a/src/trans/gpu/internal/tpm_fields.F90 +++ b/src/trans/gpu/internal/tpm_fields.F90 @@ -36,7 +36,6 @@ MODULE TPM_FIELDS !flat copies of the above REAL(KIND=JPRBT) ,ALLOCATABLE :: F_RW(:) ! Weights of the Gaussian quadrature -REAL(KIND=JPRBT) ,ALLOCATABLE :: F_RN(:) ! n (to avoid integer to real conversion) REAL(KIND=JPRBT) ,ALLOCATABLE :: F_RLAPIN(:) ! eigen-values of the inverse Laplace operator REAL(KIND=JPRBT) ,ALLOCATABLE :: F_RACTHE(:) ! eigen-values of the inverse Laplace operator diff --git a/src/trans/gpu/internal/vdtuv_mod.F90 b/src/trans/gpu/internal/vdtuv_mod.F90 index f008914d3..7cdb33ce9 100755 --- a/src/trans/gpu/internal/vdtuv_mod.F90 +++ b/src/trans/gpu/internal/vdtuv_mod.F90 @@ -15,7 +15,7 @@ SUBROUTINE VDTUV(KFIELD,PEPSNM,PVOR,PDIV,PU,PV) USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT USE TPM_DIM, ONLY: R, R_NTMAX -USE TPM_FIELDS, ONLY: F, F_RLAPIN, F_RN +USE TPM_FIELDS, ONLY: F, F_RLAPIN USE TPM_DISTR, ONLY: D, D_NUMP, D_MYMS !**** *VDTUV* - Compute U,V in spectral space @@ -87,14 +87,14 @@ SUBROUTINE VDTUV(KFIELD,PEPSNM,PVOR,PDIV,PU,PV) #ifdef ACCGPU !$ACC DATA & -!$ACC& PRESENT(R_NTMAX,D_MYMS,D_NUMP,F_RLAPIN,F_RN) & +!$ACC& PRESENT(R_NTMAX,D_MYMS,D_NUMP,F_RLAPIN) & !$ACC& PRESENT(PEPSNM, PVOR, PDIV) & !$ACC& PRESENT(PU, PV) #endif #ifdef OMPGPU !$OMP TARGET DATA & !$OMP& MAP (PRESENT,ALLOC:ZEPSNM, ZN, ZLAPIN) & -!$OMP& MAP (TO:R_NSMAX, D_MYMS,D_NUMP,F_RLAPIN,F_RN) & +!$OMP& MAP (TO:R_NSMAX, D_MYMS,D_NUMP,F_RLAPIN) & !$OMP& MAP(PRESENT,ALLOC:ZEPSNM, PVOR, PDIV) & !$OMP& MAP(PRESENT,ALLOC:PU, PV) #endif From 71a00c1707978e9ae17e7593fdd6f97fc43d6e95 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 12 Sep 2024 07:10:00 +0000 Subject: [PATCH 52/86] Make F%RW (gaussian weights) double precision --- src/trans/cpu/internal/gpnorm_trans_ctl_mod.F90 | 4 ++-- src/trans/cpu/internal/ledir_mod.F90 | 6 +++--- src/trans/cpu/internal/ledirad_mod.F90 | 4 ++-- src/trans/cpu/internal/tpm_fields.F90 | 2 +- src/trans/gpu/external/trans_inq.F90 | 4 ++-- src/trans/gpu/internal/tpm_fields.F90 | 4 ++-- src/trans/gpu/internal/trltom_pack_unpack.F90 | 8 ++++---- 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/trans/cpu/internal/gpnorm_trans_ctl_mod.F90 b/src/trans/cpu/internal/gpnorm_trans_ctl_mod.F90 index cd4f43346..249afb20c 100644 --- a/src/trans/cpu/internal/gpnorm_trans_ctl_mod.F90 +++ b/src/trans/cpu/internal/gpnorm_trans_ctl_mod.F90 @@ -80,7 +80,7 @@ SUBROUTINE GPNORM_TRANS_CTL(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,PW) INTEGER(KIND=JPIM),INTENT(IN) :: KFIELDS INTEGER(KIND=JPIM),INTENT(IN) :: KPROMA LOGICAL ,INTENT(IN) :: LDAVE_ONLY -REAL(KIND=JPRB) ,INTENT(IN) :: PW(R%NDGL) +REAL(KIND=JPRD) ,INTENT(IN) :: PW(R%NDGL) !ifndef INTERFACE @@ -216,7 +216,7 @@ SUBROUTINE GPNORM_TRANS_CTL(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,PW) DO JGL=IBEG,IEND IGL = D%NPTRLS(MYSETW) + JGL - 1 DO JF=1,IF_FS - ZAVE(JF,JGL)=ZAVE(JF,JGL)*PW(IGL)/G%NLOEN(IGL) + ZAVE(JF,JGL)=ZAVE(JF,JGL)*REAL(PW(IGL),JPRB)/G%NLOEN(IGL) ENDDO ENDDO diff --git a/src/trans/cpu/internal/ledir_mod.F90 b/src/trans/cpu/internal/ledir_mod.F90 index 9c0e4e009..16cd783c5 100644 --- a/src/trans/cpu/internal/ledir_mod.F90 +++ b/src/trans/cpu/internal/ledir_mod.F90 @@ -75,7 +75,7 @@ SUBROUTINE LEDIR(KM,KMLOC,KFC,KIFC,KSL,KDGLU,KLED2,PAIA,PSIA,POA1,PW) INTEGER(KIND=JPIM), INTENT(IN) :: KDGLU INTEGER(KIND=JPIM), INTENT(IN) :: KLED2 -REAL(KIND=JPRB), INTENT(IN) :: PW(KDGLU+KSL-1) +REAL(KIND=JPRD), INTENT(IN) :: PW(KDGLU+KSL-1) REAL(KIND=JPRB), INTENT(IN) :: PSIA(:,:), PAIA(:,:) REAL(KIND=JPRB), INTENT(OUT) :: POA1(:,:) @@ -189,7 +189,7 @@ SUBROUTINE LEDIR(KM,KMLOC,KFC,KIFC,KSL,KDGLU,KLED2,PAIA,PSIA,POA1,PW) DO JK=1,KFC,ISKIP IFLD=IFLD+1 DO J=1,KDGLU - ZB(J,IFLD)=PSIA(JK,ISL+J-1)*PW(ISL+J-1) + ZB(J,IFLD)=PSIA(JK,ISL+J-1)*REAL(PW(ISL+J-1),JPRB) ENDDO ENDDO @@ -213,7 +213,7 @@ SUBROUTINE LEDIR(KM,KMLOC,KFC,KIFC,KSL,KDGLU,KLED2,PAIA,PSIA,POA1,PW) DO JK=1,KFC,ISKIP IFLD=IFLD+1 DO J=1,KDGLU - ZB_D(J,IFLD)=PSIA(JK,ISL+J-1)*PW(ISL+J-1) + ZB_D(J,IFLD)=PSIA(JK,ISL+J-1)*REAL(PW(ISL+J-1),JPRB) ENDDO ENDDO DO I3=1,I1 diff --git a/src/trans/cpu/internal/ledirad_mod.F90 b/src/trans/cpu/internal/ledirad_mod.F90 index 96decfe87..de43a1435 100644 --- a/src/trans/cpu/internal/ledirad_mod.F90 +++ b/src/trans/cpu/internal/ledirad_mod.F90 @@ -148,7 +148,7 @@ SUBROUTINE LEDIRAD(KM,KMLOC,KFC,KIFC,KDGLU,KLED2,PAIA,PSIA,POA1) DO JK=1,KFC,ISKIP IFLD=IFLD+1 DO J=1,KDGLU - PAIA(JK,ISL+J-1) = ZB(J,IFLD)*F%RW(ISL+J-1) + PAIA(JK,ISL+J-1) = ZB(J,IFLD)*REAL(F%RW(ISL+J-1),JPRB) ENDDO ENDDO @@ -180,7 +180,7 @@ SUBROUTINE LEDIRAD(KM,KMLOC,KFC,KIFC,KDGLU,KLED2,PAIA,PSIA,POA1) DO JK=1,KFC,ISKIP IFLD=IFLD+1 DO J=1,KDGLU - PSIA(JK,ISL+J-1) = ZB(J,IFLD)*F%RW(ISL+J-1) + PSIA(JK,ISL+J-1) = ZB(J,IFLD)*REAL(F%RW(ISL+J-1),JPRB) ENDDO ENDDO diff --git a/src/trans/cpu/internal/tpm_fields.F90 b/src/trans/cpu/internal/tpm_fields.F90 index 7a9ffb129..3e6b48767 100644 --- a/src/trans/cpu/internal/tpm_fields.F90 +++ b/src/trans/cpu/internal/tpm_fields.F90 @@ -19,7 +19,7 @@ MODULE TPM_FIELDS TYPE FIELDS_TYPE REAL(KIND=JPRD) ,ALLOCATABLE :: RPNM(:,:) ! Legendre polynomials REAL(KIND=JPRD) ,ALLOCATABLE :: RMU(:) ! sin(theta) for Gaussian latitudes -REAL(KIND=JPRB) ,ALLOCATABLE :: RW(:) ! Weights of the Gaussian quadrature +REAL(KIND=JPRD) ,ALLOCATABLE :: RW(:) ! Weights of the Gaussian quadrature REAL(KIND=JPRB) ,ALLOCATABLE :: R1MU2(:) ! 1.-MU*MU, cos(theta)**2 REAL(KIND=JPRB) ,ALLOCATABLE :: RACTHE(:) ! 1./SQRT(R1MU2), 1/(cos(theta)) diff --git a/src/trans/gpu/external/trans_inq.F90 b/src/trans/gpu/external/trans_inq.F90 index 6000d1d62..fb540d949 100755 --- a/src/trans/gpu/external/trans_inq.F90 +++ b/src/trans/gpu/external/trans_inq.F90 @@ -176,7 +176,7 @@ SUBROUTINE TRANS_INQ(KRESOL,KSPEC,KSPEC2,KSPEC2G,KSPEC2MX,KNUMP,& INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KNMENG(:) REAL(KIND=JPRD) ,OPTIONAL, INTENT(OUT) :: PMU(:) -REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PGW(:) +REAL(KIND=JPRD) ,OPTIONAL, INTENT(OUT) :: PGW(:) REAL(KIND=JPRB) ,OPTIONAL, INTENT(OUT) :: PRPNM(:,:) INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KLEI3 INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(OUT) :: KSPOLEGL @@ -417,7 +417,7 @@ SUBROUTINE TRANS_INQ(KRESOL,KSPEC,KSPEC2,KSPEC2G,KSPEC2MX,KNUMP,& IF(UBOUND(PGW,1) < R%NDGL) THEN CALL ABORT_TRANS('TRANS_INQ: PGW TOO SMALL') ELSE - PGW(1:R%NDGL) = REAl(F%RW,JPRB) + PGW(1:R%NDGL) = F%RW ENDIF ENDIF diff --git a/src/trans/gpu/internal/tpm_fields.F90 b/src/trans/gpu/internal/tpm_fields.F90 index 4b063f7d4..e37851db4 100755 --- a/src/trans/gpu/internal/tpm_fields.F90 +++ b/src/trans/gpu/internal/tpm_fields.F90 @@ -21,7 +21,7 @@ MODULE TPM_FIELDS TYPE FIELDS_TYPE REAL(KIND=JPRD) ,ALLOCATABLE :: RPNM(:,:) ! Legendre polynomials REAL(KIND=JPRD) ,ALLOCATABLE :: RMU(:) ! sin(theta) for Gaussian latitudes -REAL(KIND=JPRBT) ,ALLOCATABLE :: RW(:) ! Weights of the Gaussian quadrature +REAL(KIND=JPRD) ,ALLOCATABLE :: RW(:) ! Weights of the Gaussian quadrature REAL(KIND=JPRBT) ,ALLOCATABLE :: R1MU2(:) ! 1.-MU*MU, cos(theta)**2 REAL(KIND=JPRBT) ,ALLOCATABLE :: RACTHE(:) ! 1./SQRT(R1MU2), 1/(cos(theta)) @@ -35,7 +35,7 @@ MODULE TPM_FIELDS END TYPE FIELDS_TYPE !flat copies of the above -REAL(KIND=JPRBT) ,ALLOCATABLE :: F_RW(:) ! Weights of the Gaussian quadrature +REAL(KIND=JPRD) ,ALLOCATABLE :: F_RW(:) ! Weights of the Gaussian quadrature REAL(KIND=JPRBT) ,ALLOCATABLE :: F_RLAPIN(:) ! eigen-values of the inverse Laplace operator REAL(KIND=JPRBT) ,ALLOCATABLE :: F_RACTHE(:) ! eigen-values of the inverse Laplace operator diff --git a/src/trans/gpu/internal/trltom_pack_unpack.F90 b/src/trans/gpu/internal/trltom_pack_unpack.F90 index 3425f01fc..581ae3d5b 100755 --- a/src/trans/gpu/internal/trltom_pack_unpack.F90 +++ b/src/trans/gpu/internal/trltom_pack_unpack.F90 @@ -250,12 +250,12 @@ SUBROUTINE TRLTOM_UNPACK(ALLOCATOR,HTRLTOM_UNPACK,FOUBUF,ZINPS,ZINPA,ZINPS0,ZINP PAIS = PAIS*F_RACTHE(JGL) ENDIF IF (KM /= 0) THEN - ZINPA(JF+(JGL-ISL)*IIN_STRIDES0+IIN_STRIDES0*D_OFFSETS_GEMM1(KMLOC))=PAIA*F_RW(JGL) - ZINPS(JF+(JGL-ISL)*IIN_STRIDES0+IIN_STRIDES0*D_OFFSETS_GEMM1(KMLOC))=PAIS*F_RW(JGL) + ZINPA(JF+(JGL-ISL)*IIN_STRIDES0+IIN_STRIDES0*D_OFFSETS_GEMM1(KMLOC))=PAIA*REAL(F_RW(JGL),JPRBT) + ZINPS(JF+(JGL-ISL)*IIN_STRIDES0+IIN_STRIDES0*D_OFFSETS_GEMM1(KMLOC))=PAIS*REAL(F_RW(JGL),JPRBT) ELSEIF (MOD(JF-1,2) == 0) THEN ! every other field is sufficient because Im(KM=0) == 0 - ZINPA0((JF-1)/2+1+(JGL-1)*IIN0_STRIDES0)=PAIA*F_RW(JGL) - ZINPS0((JF-1)/2+1+(JGL-1)*IIN0_STRIDES0)=PAIS*F_RW(JGL) + ZINPA0((JF-1)/2+1+(JGL-1)*IIN0_STRIDES0)=PAIA*REAL(F_RW(JGL),JPRBT) + ZINPS0((JF-1)/2+1+(JGL-1)*IIN0_STRIDES0)=PAIS*REAL(F_RW(JGL),JPRBT) ENDIF ENDIF ENDDO From 5bfd6f1c0bcb03bf9a68436cd7e8d182ab19ac16 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 12 Sep 2024 07:22:31 +0000 Subject: [PATCH 53/86] Make F%R1MU2 (cos(theta)^2) double precision --- src/trans/cpu/internal/tpm_fields.F90 | 2 +- src/trans/gpu/internal/tpm_fields.F90 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/trans/cpu/internal/tpm_fields.F90 b/src/trans/cpu/internal/tpm_fields.F90 index 3e6b48767..876a5ea53 100644 --- a/src/trans/cpu/internal/tpm_fields.F90 +++ b/src/trans/cpu/internal/tpm_fields.F90 @@ -20,7 +20,7 @@ MODULE TPM_FIELDS REAL(KIND=JPRD) ,ALLOCATABLE :: RPNM(:,:) ! Legendre polynomials REAL(KIND=JPRD) ,ALLOCATABLE :: RMU(:) ! sin(theta) for Gaussian latitudes REAL(KIND=JPRD) ,ALLOCATABLE :: RW(:) ! Weights of the Gaussian quadrature -REAL(KIND=JPRB) ,ALLOCATABLE :: R1MU2(:) ! 1.-MU*MU, cos(theta)**2 +REAL(KIND=JPRD) ,ALLOCATABLE :: R1MU2(:) ! 1.-MU*MU, cos(theta)**2 REAL(KIND=JPRB) ,ALLOCATABLE :: RACTHE(:) ! 1./SQRT(R1MU2), 1/(cos(theta)) REAL(KIND=JPRB) ,ALLOCATABLE :: REPSNM(:) ! eps(n,m) used in the Legendre transforms diff --git a/src/trans/gpu/internal/tpm_fields.F90 b/src/trans/gpu/internal/tpm_fields.F90 index e37851db4..9a3c46f6c 100755 --- a/src/trans/gpu/internal/tpm_fields.F90 +++ b/src/trans/gpu/internal/tpm_fields.F90 @@ -22,7 +22,7 @@ MODULE TPM_FIELDS REAL(KIND=JPRD) ,ALLOCATABLE :: RPNM(:,:) ! Legendre polynomials REAL(KIND=JPRD) ,ALLOCATABLE :: RMU(:) ! sin(theta) for Gaussian latitudes REAL(KIND=JPRD) ,ALLOCATABLE :: RW(:) ! Weights of the Gaussian quadrature -REAL(KIND=JPRBT) ,ALLOCATABLE :: R1MU2(:) ! 1.-MU*MU, cos(theta)**2 +REAL(KIND=JPRD) ,ALLOCATABLE :: R1MU2(:) ! 1.-MU*MU, cos(theta)**2 REAL(KIND=JPRBT) ,ALLOCATABLE :: RACTHE(:) ! 1./SQRT(R1MU2), 1/(cos(theta)) REAL(KIND=JPRBT) ,ALLOCATABLE :: REPSNM(:) ! eps(n,m) used in the Legendre transforms From 9c0a9425c2e0b1105ac1aed679e770cb3ac88c1e Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 12 Sep 2024 07:44:15 +0000 Subject: [PATCH 54/86] Make F%RACTHE (1/cos(theta)) double precision --- src/trans/cpu/internal/fsc_mod.F90 | 6 +++--- src/trans/cpu/internal/fscad_mod.F90 | 2 +- src/trans/cpu/internal/ldfou2_mod.F90 | 8 ++++++-- src/trans/cpu/internal/ldfou2ad_mod.F90 | 8 ++++++-- src/trans/cpu/internal/ltinv_mod.F90 | 7 +++++-- src/trans/cpu/internal/tpm_fields.F90 | 4 ++-- src/trans/gpu/internal/fsc_mod.F90 | 8 ++++---- src/trans/gpu/internal/tpm_fields.F90 | 6 +++--- src/trans/gpu/internal/trltom_pack_unpack.F90 | 4 ++-- 9 files changed, 32 insertions(+), 21 deletions(-) diff --git a/src/trans/cpu/internal/fsc_mod.F90 b/src/trans/cpu/internal/fsc_mod.F90 index 37ef4a0fb..9246d69a2 100644 --- a/src/trans/cpu/internal/fsc_mod.F90 +++ b/src/trans/cpu/internal/fsc_mod.F90 @@ -73,15 +73,15 @@ SUBROUTINE FSC(KGL,KF_UV,KF_SCALARS,KF_SCDERS,& ! ------------------------------------------------------------------ IGLG = D%NPTRLS(MYSETW)+KGL-1 -ZACHTE = F%RACTHE(IGLG) +ZACHTE = REAL(F%RACTHE(IGLG),JPRB) IMEN = G%NMEN(IGLG) ISTAGTF = D%NSTAGTF(KGL) -ZACHTE2 = F%RACTHE(IGLG) +ZACHTE2 = REAL(F%RACTHE(IGLG),JPRB) IF( LATLON.AND.S%LDLL ) THEN ZPI = 2.0_JPRB*ASIN(1.0_JPRB) ZACHTE2 = 1._JPRB - ZACHTE = F%RACTHE2(IGLG) + ZACHTE = REAL(F%RACTHE2(IGLG),JPRB) ! apply shift for (even) lat-lon output grid IF( S%LSHIFTLL ) THEN diff --git a/src/trans/cpu/internal/fscad_mod.F90 b/src/trans/cpu/internal/fscad_mod.F90 index 390b4a3bd..0e2ff2a82 100644 --- a/src/trans/cpu/internal/fscad_mod.F90 +++ b/src/trans/cpu/internal/fscad_mod.F90 @@ -72,7 +72,7 @@ SUBROUTINE FSCAD(KGL,KF_UV,KF_SCALARS,KF_SCDERS,& ! ------------------------------------------------------------------ IGLG = D%NPTRLS(MYSETW)+KGL-1 -ZACHTE = F%RACTHE(IGLG) +ZACHTE = REAL(F%RACTHE(IGLG),JPRB) IMEN = G%NMEN(IGLG) ISTAGTF = D%NSTAGTF(KGL) diff --git a/src/trans/cpu/internal/ldfou2_mod.F90 b/src/trans/cpu/internal/ldfou2_mod.F90 index 4a63132a3..e5bd06272 100644 --- a/src/trans/cpu/internal/ldfou2_mod.F90 +++ b/src/trans/cpu/internal/ldfou2_mod.F90 @@ -70,6 +70,9 @@ SUBROUTINE LDFOU2(KM,KF_UV,PAIA,PSIA) REAL(KIND=JPRB) ,INTENT(INOUT) :: PSIA(:,:), PAIA(:,:) +! LOCAL REAL SCALARS +REAL(KIND=JPRB) :: ZACTHE + ! LOCAL INTEGER SCALARS INTEGER(KIND=JPIM) :: J, JGL ,IFLD ,ISL @@ -85,9 +88,10 @@ SUBROUTINE LDFOU2(KM,KF_UV,PAIA,PSIA) !* 1.1 U AND V DO JGL=ISL,R%NDGNH + ZACTHE = REAL(F%RACTHE(JGL),JPRB) DO J=1,IFLD - PAIA(J,JGL) = PAIA(J,JGL)*F%RACTHE(JGL) - PSIA(J,JGL) = PSIA(J,JGL)*F%RACTHE(JGL) + PAIA(J,JGL) = PAIA(J,JGL)*ZACTHE + PSIA(J,JGL) = PSIA(J,JGL)*ZACTHE ENDDO ENDDO diff --git a/src/trans/cpu/internal/ldfou2ad_mod.F90 b/src/trans/cpu/internal/ldfou2ad_mod.F90 index 681f686e4..44ca1ad6a 100644 --- a/src/trans/cpu/internal/ldfou2ad_mod.F90 +++ b/src/trans/cpu/internal/ldfou2ad_mod.F90 @@ -70,6 +70,9 @@ SUBROUTINE LDFOU2AD(KM,KF_UV,PAIA,PSIA) REAL(KIND=JPRB) ,INTENT(INOUT) :: PSIA(:,:), PAIA(:,:) +! LOCAL REAL SCALARS +REAL(KIND=JPRB) :: ZACTHE + ! LOCAL INTEGER SCALARS INTEGER(KIND=JPIM) :: J, JGL ,IFLD ,ISL @@ -85,9 +88,10 @@ SUBROUTINE LDFOU2AD(KM,KF_UV,PAIA,PSIA) !* 1.1 U AND V DO JGL=ISL,R%NDGNH + ZACTHE = REAL(F%RACTHE(JGL),JPRB) DO J=1,IFLD - PAIA(J,JGL) = PAIA(J,JGL)*F%RACTHE(JGL) - PSIA(J,JGL) = PSIA(J,JGL)*F%RACTHE(JGL) + PAIA(J,JGL) = PAIA(J,JGL)*ZACTHE + PSIA(J,JGL) = PSIA(J,JGL)*ZACTHE ENDDO ENDDO diff --git a/src/trans/cpu/internal/ltinv_mod.F90 b/src/trans/cpu/internal/ltinv_mod.F90 index 61464556d..963193b67 100644 --- a/src/trans/cpu/internal/ltinv_mod.F90 +++ b/src/trans/cpu/internal/ltinv_mod.F90 @@ -109,6 +109,7 @@ SUBROUTINE LTINV(KM,KMLOC,KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,KLEI2,KDIM1,& EXTERNAL FSPGL_PROC OPTIONAL FSPGL_PROC +REAL(KIND=JPRB) :: ZACTHE REAL(KIND=JPRB) :: ZIA(R%NLEI1,KLEI2) REAL(KIND=JPRB) :: ZEPSNM(0:R%NTMAX+2) !REAL(KIND=JPRB) :: ZSOA1(KDIM1,R%NLEI3),ZAOA1(KDIM1,R%NLEI3) @@ -273,15 +274,17 @@ SUBROUTINE LTINV(KM,KMLOC,KF_OUT_LT,KF_UV,KF_SCALARS,KF_SCDERS,KLEI2,KDIM1,& IGLS = 2*R%NDGNH - ISL + 1 IF( KF_UV > 0 ) THEN DO JGL=ISL, IGLS + ZACTHE = REAL(F%RACTHE(JGL),JPRB) DO JFLD=IUVS,IUVE - ZALN(JFLD, JGL) = ZALN(JFLD,JGL)*F%RACTHE(JGL) + ZALN(JFLD, JGL) = ZALN(JFLD,JGL)*ZACTHE ENDDO ENDDO ENDIF IF( KF_SCDERS > 0 ) THEN DO JGL=ISL, IGLS + ZACTHE = REAL(F%RACTHE(JGL),JPRB) DO JFLD=INSDS,INSDE - ZALN(JFLD, JGL) = ZALN(JFLD,JGL)*F%RACTHE(JGL) + ZALN(JFLD, JGL) = ZALN(JFLD,JGL)*ZACTHE ENDDO ENDDO ENDIF diff --git a/src/trans/cpu/internal/tpm_fields.F90 b/src/trans/cpu/internal/tpm_fields.F90 index 876a5ea53..9d71a3615 100644 --- a/src/trans/cpu/internal/tpm_fields.F90 +++ b/src/trans/cpu/internal/tpm_fields.F90 @@ -21,7 +21,7 @@ MODULE TPM_FIELDS REAL(KIND=JPRD) ,ALLOCATABLE :: RMU(:) ! sin(theta) for Gaussian latitudes REAL(KIND=JPRD) ,ALLOCATABLE :: RW(:) ! Weights of the Gaussian quadrature REAL(KIND=JPRD) ,ALLOCATABLE :: R1MU2(:) ! 1.-MU*MU, cos(theta)**2 -REAL(KIND=JPRB) ,ALLOCATABLE :: RACTHE(:) ! 1./SQRT(R1MU2), 1/(cos(theta)) +REAL(KIND=JPRD) ,ALLOCATABLE :: RACTHE(:) ! 1./SQRT(R1MU2), 1/(cos(theta)) REAL(KIND=JPRB) ,ALLOCATABLE :: REPSNM(:) ! eps(n,m) used in the Legendre transforms REAL(KIND=JPRB) ,ALLOCATABLE :: RN(:) ! n (to avoid integer to real conversion) @@ -29,7 +29,7 @@ MODULE TPM_FIELDS INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLTN(:) ! R%NTMAX+2-JN REAL(KIND=JPRB) ,ALLOCATABLE :: RMU2(:) ! sin(theta) for dual input/output latitudes -REAL(KIND=JPRB) ,ALLOCATABLE :: RACTHE2(:) ! 1./SQRT(R1MU2), 1/(cos(theta)) dual input/output latitudes +REAL(KIND=JPRD) ,ALLOCATABLE :: RACTHE2(:) ! 1./SQRT(R1MU2), 1/(cos(theta)) dual input/output latitudes END TYPE FIELDS_TYPE TYPE(FIELDS_TYPE),ALLOCATABLE,TARGET :: FIELDS_RESOL(:) diff --git a/src/trans/gpu/internal/fsc_mod.F90 b/src/trans/gpu/internal/fsc_mod.F90 index d5a06641b..0731b01c0 100755 --- a/src/trans/gpu/internal/fsc_mod.F90 +++ b/src/trans/gpu/internal/fsc_mod.F90 @@ -134,7 +134,7 @@ SUBROUTINE FSC(ALLOCATOR,HFSC,PREEL_COMPLEX, KF_FS, KF_UV, KF_SCALARS, KUV_OFFSE IOFF_LAT = KF_FS*D_NSTAGTF(KGL) IOFF_UV = IOFF_LAT+(KUV_OFFSET+JF-1)*(D_NSTAGTF(KGL+1)-D_NSTAGTF(KGL)) - ZACHTE2 = F_RACTHE(IGLG) + ZACHTE2 = REAL(F_RACTHE(IGLG),JPRBT) PREEL_COMPLEX(IOFF_UV+2*JM+1) = & & PREEL_COMPLEX(IOFF_UV+2*JM+1)*ZACHTE2 @@ -163,7 +163,7 @@ SUBROUTINE FSC(ALLOCATOR,HFSC,PREEL_COMPLEX, KF_FS, KF_UV, KF_SCALARS, KUV_OFFSE IOFF_LAT = KF_FS*D_NSTAGTF(KGL) IOFF_KSCALARS_NSDER = IOFF_LAT+(KSCALARS_NSDER_OFFSET+JF-1)*(D_NSTAGTF(KGL+1)-D_NSTAGTF(KGL)) - ZACHTE2 = F_RACTHE(IGLG) + ZACHTE2 = REAL(F_RACTHE(IGLG),JPRBT) PREEL_COMPLEX(IOFF_KSCALARS_NSDER+2*JM+1) = & & PREEL_COMPLEX(IOFF_KSCALARS_NSDER+2*JM+1)*ZACHTE2 @@ -206,7 +206,7 @@ SUBROUTINE FSC(ALLOCATOR,HFSC,PREEL_COMPLEX, KF_FS, KF_UV, KF_SCALARS, KUV_OFFSE RET_COMPLEX = 0.0_JPRBT IF (JM <= G_NMEN(IGLG)) THEN - ZACHTE2 = F_RACTHE(IGLG) + ZACHTE2 = REAL(F_RACTHE(IGLG),JPRBT) RET_REAL = & & -PREEL_COMPLEX(IOFF_UV+2*JM+2)*ZACHTE2*REAL(JM,JPRBT) @@ -247,7 +247,7 @@ SUBROUTINE FSC(ALLOCATOR,HFSC,PREEL_COMPLEX, KF_FS, KF_UV, KF_SCALARS, KUV_OFFSE RET_COMPLEX = 0.0_JPRBT IF (JM <= G_NMEN(IGLG)) THEN - ZACHTE2 = F_RACTHE(IGLG) + ZACHTE2 = REAL(F_RACTHE(IGLG),JPRBT) RET_REAL = & & -PREEL_COMPLEX(IOFF_SCALARS+2*JM+2)*ZACHTE2*REAL(JM,JPRBT) diff --git a/src/trans/gpu/internal/tpm_fields.F90 b/src/trans/gpu/internal/tpm_fields.F90 index 9a3c46f6c..17a0bb03e 100755 --- a/src/trans/gpu/internal/tpm_fields.F90 +++ b/src/trans/gpu/internal/tpm_fields.F90 @@ -23,7 +23,7 @@ MODULE TPM_FIELDS REAL(KIND=JPRD) ,ALLOCATABLE :: RMU(:) ! sin(theta) for Gaussian latitudes REAL(KIND=JPRD) ,ALLOCATABLE :: RW(:) ! Weights of the Gaussian quadrature REAL(KIND=JPRD) ,ALLOCATABLE :: R1MU2(:) ! 1.-MU*MU, cos(theta)**2 -REAL(KIND=JPRBT) ,ALLOCATABLE :: RACTHE(:) ! 1./SQRT(R1MU2), 1/(cos(theta)) +REAL(KIND=JPRD) ,ALLOCATABLE :: RACTHE(:) ! 1./SQRT(R1MU2), 1/(cos(theta)) REAL(KIND=JPRBT) ,ALLOCATABLE :: REPSNM(:) ! eps(n,m) used in the Legendre transforms REAL(KIND=JPRBT) ,ALLOCATABLE :: RN(:) ! n (to avoid integer to real conversion) @@ -31,13 +31,13 @@ MODULE TPM_FIELDS INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLTN(:) ! R%NTMAX+2-JN REAL(KIND=JPRBT) ,ALLOCATABLE :: RMU2(:) ! sin(theta) for dual input/output latitudes -REAL(KIND=JPRBT) ,ALLOCATABLE :: RACTHE2(:)! 1./SQRT(R1MU2), 1/(cos(theta)) dual input/output latitudes +REAL(KIND=JPRD) ,ALLOCATABLE :: RACTHE2(:)! 1./SQRT(R1MU2), 1/(cos(theta)) dual input/output latitudes END TYPE FIELDS_TYPE !flat copies of the above REAL(KIND=JPRD) ,ALLOCATABLE :: F_RW(:) ! Weights of the Gaussian quadrature REAL(KIND=JPRBT) ,ALLOCATABLE :: F_RLAPIN(:) ! eigen-values of the inverse Laplace operator -REAL(KIND=JPRBT) ,ALLOCATABLE :: F_RACTHE(:) ! eigen-values of the inverse Laplace operator +REAL(KIND=JPRD) ,ALLOCATABLE :: F_RACTHE(:) ! eigen-values of the inverse Laplace operator TYPE(FIELDS_TYPE),ALLOCATABLE,TARGET :: FIELDS_RESOL(:) TYPE(FIELDS_TYPE),POINTER :: F diff --git a/src/trans/gpu/internal/trltom_pack_unpack.F90 b/src/trans/gpu/internal/trltom_pack_unpack.F90 index 581ae3d5b..e208f4ebc 100755 --- a/src/trans/gpu/internal/trltom_pack_unpack.F90 +++ b/src/trans/gpu/internal/trltom_pack_unpack.F90 @@ -246,8 +246,8 @@ SUBROUTINE TRLTOM_UNPACK(ALLOCATOR,HTRLTOM_UNPACK,FOUBUF,ZINPS,ZINPA,ZINPS0,ZINP PAIS = FOUBUF(OFFSET1+JF)+FOUBUF(OFFSET2+JF) IF (JF <= 4*KF_UV) THEN ! Multiply in case of velocity - PAIA = PAIA*F_RACTHE(JGL) - PAIS = PAIS*F_RACTHE(JGL) + PAIA = PAIA*REAL(F_RACTHE(JGL),JPRBT) + PAIS = PAIS*REAL(F_RACTHE(JGL),JPRBT) ENDIF IF (KM /= 0) THEN ZINPA(JF+(JGL-ISL)*IIN_STRIDES0+IIN_STRIDES0*D_OFFSETS_GEMM1(KMLOC))=PAIA*REAL(F_RW(JGL),JPRBT) From 99203584fae02336a59714b81e752bb49f759131 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 12 Sep 2024 08:04:51 +0000 Subject: [PATCH 55/86] Make F%RMU2 (sin(theta)) double precision --- src/trans/cpu/algor/seefmm_mix.F90 | 4 ++-- src/trans/cpu/external/trans_pnm.F90 | 4 ++-- src/trans/cpu/internal/tpm_fields.F90 | 2 +- src/trans/gpu/algor/seefmm_mix.F90 | 6 +++--- src/trans/gpu/internal/tpm_fields.F90 | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/trans/cpu/algor/seefmm_mix.F90 b/src/trans/cpu/algor/seefmm_mix.F90 index c278eb1a7..f5ad3062b 100644 --- a/src/trans/cpu/algor/seefmm_mix.F90 +++ b/src/trans/cpu/algor/seefmm_mix.F90 @@ -86,7 +86,7 @@ recursive subroutine setup_seefmm(kx,px,ky,py,ydfmm,pdiff) integer(kind=jpim),intent(in) :: kx real(kind=jprd) ,intent(in) :: px(:) integer(kind=jpim),intent(in) :: ky -real(kind=jprb) ,intent(in) :: py(:) +real(kind=jprd) ,intent(in) :: py(:) type(fmm_type) ,intent(out) :: ydfmm real(kind=jprb),optional,intent(in) :: pdiff(:,:) @@ -461,7 +461,7 @@ recursive subroutine comb_xy(kx,px,ky,py,kxy,pxy,kindex) integer(kind=jpim), intent(in) :: kx,ky real(kind=jprd), intent(in) :: px(:) -real(kind=jprb), intent(in) :: py(:) +real(kind=jprd), intent(in) :: py(:) integer(kind=jpim), intent(in) :: kxy real(kind=jprb), intent(out) :: pxy(:) integer(kind=jpim), intent(out) :: kindex(:) diff --git a/src/trans/cpu/external/trans_pnm.F90 b/src/trans/cpu/external/trans_pnm.F90 index 267e0b467..917f71230 100644 --- a/src/trans/cpu/external/trans_pnm.F90 +++ b/src/trans/cpu/external/trans_pnm.F90 @@ -167,7 +167,7 @@ SUBROUTINE TRANS_PNM(KRESOL,KM,PRPNM,LDTRANSPOSE,LDCHEAP) !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JGL,ZLPOL,JI,JN) DO JGL=1,IDGLU - CALL SUPOLF(KM,INMAX,REAL (F%RMU(ISL+JGL-1), JPRD),ZLPOL(0:INMAX),KCHEAP=ICHEAP_ANTISYM) + CALL SUPOLF(KM,INMAX,F%RMU(ISL+JGL-1),ZLPOL(0:INMAX),KCHEAP=ICHEAP_ANTISYM) IF (LLTRANSPOSE) THEN DO JI=1,ILA PRPNM(IA+(JI-1)*2,ISL+JGL-1) = ZLPOL(KM+2*(ILA-JI)+1) @@ -177,7 +177,7 @@ SUBROUTINE TRANS_PNM(KRESOL,KM,PRPNM,LDTRANSPOSE,LDCHEAP) PRPNM(ISL+JGL-1,IA+(JI-1)*2) = ZLPOL(KM+2*(ILA-JI)+1) ENDDO ENDIF - CALL SUPOLF(KM,INMAX,REAL (F%RMU(ISL+JGL-1), JPRD),ZLPOL(0:INMAX),KCHEAP=ICHEAP_SYM) + CALL SUPOLF(KM,INMAX,F%RMU(ISL+JGL-1),ZLPOL(0:INMAX),KCHEAP=ICHEAP_SYM) IF (LLTRANSPOSE) THEN DO JI=1,ILS PRPNM(IS+(JI-1)*2,ISL+JGL-1) = ZLPOL(KM+2*(ILS-JI)) diff --git a/src/trans/cpu/internal/tpm_fields.F90 b/src/trans/cpu/internal/tpm_fields.F90 index 9d71a3615..2b2d503ed 100644 --- a/src/trans/cpu/internal/tpm_fields.F90 +++ b/src/trans/cpu/internal/tpm_fields.F90 @@ -28,7 +28,7 @@ MODULE TPM_FIELDS REAL(KIND=JPRB) ,ALLOCATABLE :: RLAPIN(:) ! eigen-values of the inverse Laplace operator INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLTN(:) ! R%NTMAX+2-JN -REAL(KIND=JPRB) ,ALLOCATABLE :: RMU2(:) ! sin(theta) for dual input/output latitudes +REAL(KIND=JPRD) ,ALLOCATABLE :: RMU2(:) ! sin(theta) for dual input/output latitudes REAL(KIND=JPRD) ,ALLOCATABLE :: RACTHE2(:) ! 1./SQRT(R1MU2), 1/(cos(theta)) dual input/output latitudes END TYPE FIELDS_TYPE diff --git a/src/trans/gpu/algor/seefmm_mix.F90 b/src/trans/gpu/algor/seefmm_mix.F90 index 2cf7e2714..e82e66060 100644 --- a/src/trans/gpu/algor/seefmm_mix.F90 +++ b/src/trans/gpu/algor/seefmm_mix.F90 @@ -81,7 +81,7 @@ recursive subroutine setup_seefmm(kx,px,ky,py,ydfmm,pdiff) integer(kind=jpim),intent(in) :: kx real(kind=jprd) ,intent(in) :: px(:) integer(kind=jpim),intent(in) :: ky -real(kind=JPRBT) ,intent(in) :: py(:) +real(kind=jprd) ,intent(in) :: py(:) type(fmm_type) ,intent(out) :: ydfmm real(kind=JPRBT),optional,intent(in) :: pdiff(:,:) @@ -456,9 +456,9 @@ recursive subroutine comb_xy(kx,px,ky,py,kxy,pxy,kindex) integer(kind=jpim), intent(in) :: kx,ky real(kind=jprd), intent(in) :: px(:) -real(kind=JPRBT), intent(in) :: py(:) +real(kind=jprd), intent(in) :: py(:) integer(kind=jpim), intent(in) :: kxy -real(kind=JPRBT), intent(out) :: pxy(:) +real(kind=JPRBT), intent(out) :: pxy(:) integer(kind=jpim), intent(out) :: kindex(:) integer(kind=jpim) :: jxy,ix,iy,iret diff --git a/src/trans/gpu/internal/tpm_fields.F90 b/src/trans/gpu/internal/tpm_fields.F90 index 17a0bb03e..435c05ca8 100755 --- a/src/trans/gpu/internal/tpm_fields.F90 +++ b/src/trans/gpu/internal/tpm_fields.F90 @@ -30,7 +30,7 @@ MODULE TPM_FIELDS REAL(KIND=JPRBT) ,ALLOCATABLE :: RLAPIN(:) ! eigen-values of the inverse Laplace operator INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLTN(:) ! R%NTMAX+2-JN -REAL(KIND=JPRBT) ,ALLOCATABLE :: RMU2(:) ! sin(theta) for dual input/output latitudes +REAL(KIND=JPRD) ,ALLOCATABLE :: RMU2(:) ! sin(theta) for dual input/output latitudes REAL(KIND=JPRD) ,ALLOCATABLE :: RACTHE2(:)! 1./SQRT(R1MU2), 1/(cos(theta)) dual input/output latitudes END TYPE FIELDS_TYPE From 591c94e106d87b105106a9f90f2ba4b67de6d697 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 12 Sep 2024 08:33:27 +0000 Subject: [PATCH 56/86] Make F%REPSNM double precision, but not yet flattened GPU version ZEPSNM --- src/trans/cpu/internal/prepsnm_mod.F90 | 2 +- src/trans/cpu/internal/tpm_fields.F90 | 2 +- src/trans/gpu/internal/prepsnm_mod.F90 | 2 +- src/trans/gpu/internal/tpm_fields.F90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/trans/cpu/internal/prepsnm_mod.F90 b/src/trans/cpu/internal/prepsnm_mod.F90 index b7a99fd45..2870af214 100644 --- a/src/trans/cpu/internal/prepsnm_mod.F90 +++ b/src/trans/cpu/internal/prepsnm_mod.F90 @@ -76,7 +76,7 @@ SUBROUTINE PREPSNM(KM,KMLOC,PEPSNM) ENDIF DO JN=KM,R%NTMAX+2 - PEPSNM(JN) = F%REPSNM(D%NPMT(KM)+KMLOC-KM+JN) + PEPSNM(JN) = REAL(F%REPSNM(D%NPMT(KM)+KMLOC-KM+JN),JPRB) ENDDO ! ------------------------------------------------------------------ diff --git a/src/trans/cpu/internal/tpm_fields.F90 b/src/trans/cpu/internal/tpm_fields.F90 index 2b2d503ed..69da69aa1 100644 --- a/src/trans/cpu/internal/tpm_fields.F90 +++ b/src/trans/cpu/internal/tpm_fields.F90 @@ -23,7 +23,7 @@ MODULE TPM_FIELDS REAL(KIND=JPRD) ,ALLOCATABLE :: R1MU2(:) ! 1.-MU*MU, cos(theta)**2 REAL(KIND=JPRD) ,ALLOCATABLE :: RACTHE(:) ! 1./SQRT(R1MU2), 1/(cos(theta)) -REAL(KIND=JPRB) ,ALLOCATABLE :: REPSNM(:) ! eps(n,m) used in the Legendre transforms +REAL(KIND=JPRD) ,ALLOCATABLE :: REPSNM(:) ! eps(n,m) used in the Legendre transforms REAL(KIND=JPRB) ,ALLOCATABLE :: RN(:) ! n (to avoid integer to real conversion) REAL(KIND=JPRB) ,ALLOCATABLE :: RLAPIN(:) ! eigen-values of the inverse Laplace operator INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLTN(:) ! R%NTMAX+2-JN diff --git a/src/trans/gpu/internal/prepsnm_mod.F90 b/src/trans/gpu/internal/prepsnm_mod.F90 index b6f165df7..3141257b8 100755 --- a/src/trans/gpu/internal/prepsnm_mod.F90 +++ b/src/trans/gpu/internal/prepsnm_mod.F90 @@ -89,7 +89,7 @@ SUBROUTINE PREPSNM ENDIF DO JN=KM,R%NTMAX+2 - ZEPSNM(KMLOC,JN) = F%REPSNM(D%NPMT(KM)+KMLOC-KM+JN) + ZEPSNM(KMLOC,JN) =REAL(F%REPSNM(D%NPMT(KM)+KMLOC-KM+JN),JPRBT) ENDDO ! end loop over wavenumber ENDDO diff --git a/src/trans/gpu/internal/tpm_fields.F90 b/src/trans/gpu/internal/tpm_fields.F90 index 435c05ca8..2881cafb7 100755 --- a/src/trans/gpu/internal/tpm_fields.F90 +++ b/src/trans/gpu/internal/tpm_fields.F90 @@ -25,7 +25,7 @@ MODULE TPM_FIELDS REAL(KIND=JPRD) ,ALLOCATABLE :: R1MU2(:) ! 1.-MU*MU, cos(theta)**2 REAL(KIND=JPRD) ,ALLOCATABLE :: RACTHE(:) ! 1./SQRT(R1MU2), 1/(cos(theta)) -REAL(KIND=JPRBT) ,ALLOCATABLE :: REPSNM(:) ! eps(n,m) used in the Legendre transforms +REAL(KIND=JPRD) ,ALLOCATABLE :: REPSNM(:) ! eps(n,m) used in the Legendre transforms REAL(KIND=JPRBT) ,ALLOCATABLE :: RN(:) ! n (to avoid integer to real conversion) REAL(KIND=JPRBT) ,ALLOCATABLE :: RLAPIN(:) ! eigen-values of the inverse Laplace operator INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLTN(:) ! R%NTMAX+2-JN From 89ad16859cdfa0b11b44a73f78adfcded7aa2db0 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 12 Sep 2024 08:47:08 +0000 Subject: [PATCH 57/86] Make F%RN double precision --- src/trans/cpu/internal/spnsde_mod.F90 | 4 ++-- src/trans/cpu/internal/spnsdead_mod.F90 | 4 ++-- src/trans/cpu/internal/uvtvd_mod.F90 | 2 +- src/trans/cpu/internal/uvtvdad_mod.F90 | 2 +- src/trans/cpu/internal/vdtuv_mod.F90 | 4 ++-- src/trans/cpu/internal/vdtuvad_mod.F90 | 4 ++-- src/trans/gpu/internal/tpm_fields.F90 | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/trans/cpu/internal/spnsde_mod.F90 b/src/trans/cpu/internal/spnsde_mod.F90 index d5782df81..a0247cba3 100644 --- a/src/trans/cpu/internal/spnsde_mod.F90 +++ b/src/trans/cpu/internal/spnsde_mod.F90 @@ -95,10 +95,10 @@ SUBROUTINE SPNSDE(KM,KF_SCALARS,PEPSNM,PF,PNSD) ISMAX = R%NSMAX DO JN=KM-1,ISMAX+2 IJ = ISMAX+3-JN - ZN(IJ) = F%RN(JN) + ZN(IJ) = REAL(F%RN(JN),JPRB) IF( JN >= 0 ) ZEPSNM(IJ) = PEPSNM(JN) ENDDO -ZN(0) = F%RN(ISMAX+3) +ZN(0) = REAL(F%RN(ISMAX+3),JPRB) IF(KM == 0) THEN ISKIP = 2 diff --git a/src/trans/cpu/internal/spnsdead_mod.F90 b/src/trans/cpu/internal/spnsdead_mod.F90 index d865fd553..f8de05762 100644 --- a/src/trans/cpu/internal/spnsdead_mod.F90 +++ b/src/trans/cpu/internal/spnsdead_mod.F90 @@ -91,11 +91,11 @@ SUBROUTINE SPNSDEAD(KM,KF_SCALARS,PEPSNM,PF,PNSD) ISMAX = R%NSMAX DO JN=KM-1,ISMAX+2 IJ = ISMAX+3-JN - ZN(IJ) = F%RN(JN) + ZN(IJ) = REAL(F%RN(JN),JPRB) IF( JN >= 0 ) ZEPSNM(IJ) = PEPSNM(JN) ENDDO -ZN(0) = F%RN(ISMAX+3) +ZN(0) = REAL(F%RN(ISMAX+3),JPRB) IF(KM == 0) THEN ISKIP = 2 ELSE diff --git a/src/trans/cpu/internal/uvtvd_mod.F90 b/src/trans/cpu/internal/uvtvd_mod.F90 index 9aa0c3a14..37bc37b91 100644 --- a/src/trans/cpu/internal/uvtvd_mod.F90 +++ b/src/trans/cpu/internal/uvtvd_mod.F90 @@ -90,7 +90,7 @@ SUBROUTINE UVTVD(KM,KFIELD,PEPSNM,PU,PV,PVOR,PDIV) ZKM = KM ITMAX = R%NTMAX -ZN(KM-1:ITMAX+3) = F%RN(KM-1:ITMAX+3) +ZN(KM-1:ITMAX+3) = REAL(F%RN(KM-1:ITMAX+3),JPRB) !* 1.1 SET N=KM-1 COMPONENT TO 0 FOR U AND V IN = F%NLTN(KM-1) diff --git a/src/trans/cpu/internal/uvtvdad_mod.F90 b/src/trans/cpu/internal/uvtvdad_mod.F90 index d3246a4e1..d7f6e8a25 100644 --- a/src/trans/cpu/internal/uvtvdad_mod.F90 +++ b/src/trans/cpu/internal/uvtvdad_mod.F90 @@ -88,7 +88,7 @@ SUBROUTINE UVTVDAD(KM,KFIELD,PEPSNM,PU,PV,PVOR,PDIV) ZKM = KM ITMAX = R%NTMAX -ZN(KM-1:ITMAX+3) = F%RN(KM-1:ITMAX+3) +ZN(KM-1:ITMAX+3) = REAL(F%RN(KM-1:ITMAX+3),JPRB) !* 1.2 COMPUTE VORTICITY AND DIVERGENCE. diff --git a/src/trans/cpu/internal/vdtuv_mod.F90 b/src/trans/cpu/internal/vdtuv_mod.F90 index bd840a7cf..ad13bd789 100644 --- a/src/trans/cpu/internal/vdtuv_mod.F90 +++ b/src/trans/cpu/internal/vdtuv_mod.F90 @@ -98,11 +98,11 @@ SUBROUTINE VDTUV(KM,KFIELD,PEPSNM,PVOR,PDIV,PU,PV) ISMAX = R%NSMAX DO JN=KM-1,ISMAX+2 IJ = ISMAX+3-JN - ZN(IJ) = F%RN(JN) + ZN(IJ) = REAL(F%RN(JN),JPRB) ZLAPIN(IJ) = F%RLAPIN(JN) IF( JN >= 0 ) ZEPSNM(IJ) = PEPSNM(JN) ENDDO -ZN(0) = F%RN(ISMAX+3) +ZN(0) = REAL(F%RN(ISMAX+3),JPRB) !* 1.1 U AND V (KM=0) . diff --git a/src/trans/cpu/internal/vdtuvad_mod.F90 b/src/trans/cpu/internal/vdtuvad_mod.F90 index 88589f026..d6d52a012 100644 --- a/src/trans/cpu/internal/vdtuvad_mod.F90 +++ b/src/trans/cpu/internal/vdtuvad_mod.F90 @@ -98,11 +98,11 @@ SUBROUTINE VDTUVAD(KM,KFIELD,PEPSNM,PVOR,PDIV,PU,PV) ISMAX = R%NSMAX DO JN=KM-1,ISMAX+2 IJ = ISMAX+3-JN - ZN(IJ) = F%RN(JN) + ZN(IJ) = REAL(F%RN(JN),JPRB) ZLAPIN(IJ) = F%RLAPIN(JN) IF( JN >= 0 ) ZEPSNM(IJ) = PEPSNM(JN) ENDDO -ZN(0) = F%RN(ISMAX+3) +ZN(0) = REAL(F%RN(ISMAX+3),JPRB) !* 1.1 U AND V (KM=0) . diff --git a/src/trans/gpu/internal/tpm_fields.F90 b/src/trans/gpu/internal/tpm_fields.F90 index 2881cafb7..c59ea4e65 100755 --- a/src/trans/gpu/internal/tpm_fields.F90 +++ b/src/trans/gpu/internal/tpm_fields.F90 @@ -26,7 +26,7 @@ MODULE TPM_FIELDS REAL(KIND=JPRD) ,ALLOCATABLE :: RACTHE(:) ! 1./SQRT(R1MU2), 1/(cos(theta)) REAL(KIND=JPRD) ,ALLOCATABLE :: REPSNM(:) ! eps(n,m) used in the Legendre transforms -REAL(KIND=JPRBT) ,ALLOCATABLE :: RN(:) ! n (to avoid integer to real conversion) +REAL(KIND=JPRD) ,ALLOCATABLE :: RN(:) ! n (to avoid integer to real conversion) REAL(KIND=JPRBT) ,ALLOCATABLE :: RLAPIN(:) ! eigen-values of the inverse Laplace operator INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLTN(:) ! R%NTMAX+2-JN From b79a6f3c8aa158f3b98ba273b4d815699d3695a5 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 12 Sep 2024 08:59:27 +0000 Subject: [PATCH 58/86] Make F%RLAPIN double precision, but not yet flattened GPU version F_RLAPIN --- src/trans/cpu/internal/vdtuv_mod.F90 | 2 +- src/trans/cpu/internal/vdtuvad_mod.F90 | 2 +- src/trans/gpu/external/setup_trans.F90 | 2 +- src/trans/gpu/internal/tpm_fields.F90 | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/trans/cpu/internal/vdtuv_mod.F90 b/src/trans/cpu/internal/vdtuv_mod.F90 index ad13bd789..3cea97c52 100644 --- a/src/trans/cpu/internal/vdtuv_mod.F90 +++ b/src/trans/cpu/internal/vdtuv_mod.F90 @@ -99,7 +99,7 @@ SUBROUTINE VDTUV(KM,KFIELD,PEPSNM,PVOR,PDIV,PU,PV) DO JN=KM-1,ISMAX+2 IJ = ISMAX+3-JN ZN(IJ) = REAL(F%RN(JN),JPRB) - ZLAPIN(IJ) = F%RLAPIN(JN) + ZLAPIN(IJ) = REAL(F%RLAPIN(JN),JPRB) IF( JN >= 0 ) ZEPSNM(IJ) = PEPSNM(JN) ENDDO ZN(0) = REAL(F%RN(ISMAX+3),JPRB) diff --git a/src/trans/cpu/internal/vdtuvad_mod.F90 b/src/trans/cpu/internal/vdtuvad_mod.F90 index d6d52a012..4644f19b7 100644 --- a/src/trans/cpu/internal/vdtuvad_mod.F90 +++ b/src/trans/cpu/internal/vdtuvad_mod.F90 @@ -99,7 +99,7 @@ SUBROUTINE VDTUVAD(KM,KFIELD,PEPSNM,PVOR,PDIV,PU,PV) DO JN=KM-1,ISMAX+2 IJ = ISMAX+3-JN ZN(IJ) = REAL(F%RN(JN),JPRB) - ZLAPIN(IJ) = F%RLAPIN(JN) + ZLAPIN(IJ) = REAL(F%RLAPIN(JN),JPRB) IF( JN >= 0 ) ZEPSNM(IJ) = PEPSNM(JN) ENDDO ZN(0) = REAL(F%RN(ISMAX+3),JPRB) diff --git a/src/trans/gpu/external/setup_trans.F90 b/src/trans/gpu/external/setup_trans.F90 index afbaa0429..d55db5f08 100755 --- a/src/trans/gpu/external/setup_trans.F90 +++ b/src/trans/gpu/external/setup_trans.F90 @@ -680,7 +680,7 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& F_RW(I)=F%RW(I) END DO DO I=-1,SIZE(F%RLAPIN)-2 - F_RLAPIN(I)=F%RLAPIN(I) + F_RLAPIN(I)=REAL(F%RLAPIN(I),JPRBT) END DO DO I=1,SIZE(F%RACTHE) F_RACTHE(I)=F%RACTHE(I) diff --git a/src/trans/gpu/internal/tpm_fields.F90 b/src/trans/gpu/internal/tpm_fields.F90 index c59ea4e65..01db6b026 100755 --- a/src/trans/gpu/internal/tpm_fields.F90 +++ b/src/trans/gpu/internal/tpm_fields.F90 @@ -27,7 +27,7 @@ MODULE TPM_FIELDS REAL(KIND=JPRD) ,ALLOCATABLE :: REPSNM(:) ! eps(n,m) used in the Legendre transforms REAL(KIND=JPRD) ,ALLOCATABLE :: RN(:) ! n (to avoid integer to real conversion) -REAL(KIND=JPRBT) ,ALLOCATABLE :: RLAPIN(:) ! eigen-values of the inverse Laplace operator +REAL(KIND=JPRD) ,ALLOCATABLE :: RLAPIN(:) ! eigen-values of the inverse Laplace operator INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLTN(:) ! R%NTMAX+2-JN REAL(KIND=JPRD) ,ALLOCATABLE :: RMU2(:) ! sin(theta) for dual input/output latitudes From 7085d45c32e73ca5e568cf67132f7d0f3a94069c Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 12 Sep 2024 09:17:39 +0000 Subject: [PATCH 59/86] Move TPM_FIELDS flattened arrays into separate module TPM_FIELS_FLAT This makes TPM_FIELDS now all double precision --- src/trans/gpu/external/gpnorm_trans.F90 | 3 +- src/trans/gpu/external/gpnorm_trans_gpu.F90 | 2 +- src/trans/gpu/external/setup_trans.F90 | 3 +- src/trans/gpu/external/trans_end.F90 | 3 +- src/trans/gpu/internal/fsc_mod.F90 | 2 +- src/trans/gpu/internal/ledir_mod.F90 | 2 +- src/trans/gpu/internal/leinv_mod.F90 | 2 +- src/trans/gpu/internal/ltinv_mod.F90 | 3 +- src/trans/gpu/internal/prepsnm_mod.F90 | 3 +- src/trans/gpu/internal/spnsde_mod.F90 | 2 +- src/trans/gpu/internal/tpm_fields.F90 | 20 +--------- src/trans/gpu/internal/tpm_fields_flat.F90 | 37 +++++++++++++++++++ src/trans/gpu/internal/trltom_pack_unpack.F90 | 2 +- src/trans/gpu/internal/uvtvd_mod.F90 | 2 +- src/trans/gpu/internal/vdtuv_mod.F90 | 3 +- 15 files changed, 57 insertions(+), 32 deletions(-) create mode 100755 src/trans/gpu/internal/tpm_fields_flat.F90 diff --git a/src/trans/gpu/external/gpnorm_trans.F90 b/src/trans/gpu/external/gpnorm_trans.F90 index d92b92b0a..7a691a538 100755 --- a/src/trans/gpu/external/gpnorm_trans.F90 +++ b/src/trans/gpu/external/gpnorm_trans.F90 @@ -62,7 +62,8 @@ SUBROUTINE GPNORM_TRANS(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) USE TPM_DISTR, ONLY: D, NPRCIDS, NPRTRV, NPRTRW, MYSETV, MYSETW, NPROC, D_NSTAGTF, & & D_NPTRLS, MYPROC USE TPM_GEOMETRY, ONLY: G, G_NLOEN -USE TPM_FIELDS, ONLY: F, F_RW +USE TPM_FIELDS, ONLY: F +USE TPM_FIELDS_FLAT, ONLY: F_RW USE SET_RESOL_MOD, ONLY: SET_RESOL USE SET2PE_MOD, ONLY: SET2PE USE MPL_MODULE, ONLY: MPL_RECV, MPL_SEND, JP_BLOCKING_STANDARD diff --git a/src/trans/gpu/external/gpnorm_trans_gpu.F90 b/src/trans/gpu/external/gpnorm_trans_gpu.F90 index 26801cc47..d72b7a1bf 100755 --- a/src/trans/gpu/external/gpnorm_trans_gpu.F90 +++ b/src/trans/gpu/external/gpnorm_trans_gpu.F90 @@ -60,7 +60,7 @@ SUBROUTINE GPNORM_TRANS_GPU(PGP,KFIELDS,KPROMA,PAVE,PMIN,PMAX,LDAVE_ONLY,KRESOL) USE TPM_TRANS, ONLY: LGPNORM, NGPBLKS, NPROMA USE TPM_DISTR, ONLY: D, NPRCIDS, NPRTRV, NPRTRW, MYSETV, MYSETW, NPROC, D_NSTAGTF, D_NPTRLS USE TPM_GEOMETRY, ONLY: G, G_NLOEN, G_NLOEN_MAX -USE TPM_FIELDS, ONLY: F_RW +USE TPM_FIELDS_FLAT, ONLY: F_RW USE SET_RESOL_MOD, ONLY: SET_RESOL USE TRGTOL_MOD, ONLY: TRGTOL USE SET2PE_MOD, ONLY: SET2PE diff --git a/src/trans/gpu/external/setup_trans.F90 b/src/trans/gpu/external/setup_trans.F90 index d55db5f08..f474ccba2 100755 --- a/src/trans/gpu/external/setup_trans.F90 +++ b/src/trans/gpu/external/setup_trans.F90 @@ -114,7 +114,8 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& & MYPROC, D_OFFSETS_GEMM1, D_OFFSETS_GEMM2 USE TPM_GEOMETRY, ONLY: G, GEOM_RESOL, G_NDGLU, G_NMEN, G_NMEN_MAX, G_NLOEN, & & G_NLOEN_MAX -USE TPM_FIELDS, ONLY: FIELDS_RESOL, F, F_RW, F_RLAPIN, F_RACTHE, ZEPSNM, & +USE TPM_FIELDS, ONLY: FIELDS_RESOL, F +USE TPM_FIELDS_FLAT, ONLY: F_RW, F_RLAPIN, F_RACTHE, ZEPSNM, & & ZAA, ZAS, ZAA0, ZAS0, KMLOC0 USE TPM_FLT, ONLY: FLT_RESOL, S USE TPM_CTL, ONLY: CTL_RESOL, C diff --git a/src/trans/gpu/external/trans_end.F90 b/src/trans/gpu/external/trans_end.F90 index d7081c94e..5329c8598 100755 --- a/src/trans/gpu/external/trans_end.F90 +++ b/src/trans/gpu/external/trans_end.F90 @@ -52,7 +52,8 @@ SUBROUTINE TRANS_END(CDMODE) & D_NPROCL, D_NPNTGTB1, D_NASM0, D_NSTAGTF, D_MSTABF, D_NPNTGTB0, & & D_NPROCM, D_NPTRLS USE TPM_GEOMETRY, ONLY: G, GEOM_RESOL, G_NDGLU, G_NMEN, G_NMEN_MAX,G_NLOEN, G_NLOEN_MAX -USE TPM_FIELDS, ONLY: F, FIELDS_RESOL, F_RW, ZEPSNM, ZAA, ZAS, ZAA0, ZAS0 +USE TPM_FIELDS, ONLY: F, FIELDS_RESOL +USE TPM_FIELDS_FLAT, ONLY: F_RW, ZEPSNM, ZAA, ZAS, ZAA0, ZAS0 USE TPM_CTL, ONLY: C, CTL_RESOL USE TPM_FLT, ONLY: S, FLT_RESOL USE TPM_TRANS, ONLY: FOUBUF, FOUBUF_IN diff --git a/src/trans/gpu/internal/fsc_mod.F90 b/src/trans/gpu/internal/fsc_mod.F90 index 0731b01c0..2254af6fb 100755 --- a/src/trans/gpu/internal/fsc_mod.F90 +++ b/src/trans/gpu/internal/fsc_mod.F90 @@ -67,7 +67,7 @@ SUBROUTINE FSC(ALLOCATOR,HFSC,PREEL_COMPLEX, KF_FS, KF_UV, KF_SCALARS, KUV_OFFSE USE TPM_TRANS, ONLY: LATLON USE TPM_DISTR, ONLY: MYSETW, MYPROC, NPROC, D_NUMP, D_NPTRLS, D_NSTAGTF USE TPM_GEOMETRY, ONLY: G_NMEN, G_NLOEN, G_NLOEN_MAX -USE TPM_FIELDS, ONLY: F_RACTHE +USE TPM_FIELDS_FLAT, ONLY: F_RACTHE USE TPM_GEN, ONLY: NOUT USE TPM_DIM, ONLY: R_NSMAX ! diff --git a/src/trans/gpu/internal/ledir_mod.F90 b/src/trans/gpu/internal/ledir_mod.F90 index 7655ab632..169a33aab 100755 --- a/src/trans/gpu/internal/ledir_mod.F90 +++ b/src/trans/gpu/internal/ledir_mod.F90 @@ -98,7 +98,7 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK USE TPM_DIM, ONLY: R_NDGNH,R_NSMAX,R_NTMAX,R_NDGL USE TPM_GEOMETRY, ONLY: G_NDGLU - USE TPM_FIELDS, ONLY: ZAA,ZAS,ZAA0,ZAS0,KMLOC0 + USE TPM_FIELDS_FLAT, ONLY: ZAA,ZAS,ZAA0,ZAS0,KMLOC0 USE TPM_DISTR, ONLY: D_NUMP, D_MYMS, D_OFFSETS_GEMM1, D_OFFSETS_GEMM2 USE HICBLAS_MOD, ONLY: HIP_DGEMM_BATCHED_OVERLOAD, & & HIP_DGEMM_GROUPED_OVERLOAD, HIP_SGEMM_GROUPED_OVERLOAD diff --git a/src/trans/gpu/internal/leinv_mod.F90 b/src/trans/gpu/internal/leinv_mod.F90 index 5f62d0a7a..18d0339fa 100755 --- a/src/trans/gpu/internal/leinv_mod.F90 +++ b/src/trans/gpu/internal/leinv_mod.F90 @@ -97,7 +97,7 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) USE YOMHOOK, ONLY: LHOOK, DR_HOOK, JPHOOK USE TPM_DIM, ONLY: R_NDGNH, R_NSMAX, R_NDGL USE TPM_GEOMETRY, ONLY: G_NDGLU - USE TPM_FIELDS, ONLY: ZAA, ZAS, ZAA0, ZAS0, KMLOC0 + USE TPM_FIELDS_FLAT, ONLY: ZAA, ZAS, ZAA0, ZAS0, KMLOC0 USE TPM_DISTR, ONLY: D_NUMP, D_MYMS, MYPROC, D_OFFSETS_GEMM1, D_OFFSETS_GEMM2 USE HICBLAS_MOD, ONLY: HIP_DGEMM_BATCHED_OVERLOAD, & & HIP_DGEMM_GROUPED_OVERLOAD, HIP_SGEMM_GROUPED_OVERLOAD diff --git a/src/trans/gpu/internal/ltinv_mod.F90 b/src/trans/gpu/internal/ltinv_mod.F90 index 4fbfcf437..36ad2bee0 100755 --- a/src/trans/gpu/internal/ltinv_mod.F90 +++ b/src/trans/gpu/internal/ltinv_mod.F90 @@ -113,7 +113,8 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,& USE SPNSDE_MOD, ONLY: SPNSDE USE LEINV_MOD, ONLY: LEINV_STRIDES, LEINV USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS - USE TPM_FIELDS, ONLY: F,ZEPSNM + USE TPM_FIELDS, ONLY: F + USE TPM_FIELDS_FLAT, ONLY: ZEPSNM USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM USE TPM_GEN, ONLY: LSYNC_TRANS USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX diff --git a/src/trans/gpu/internal/prepsnm_mod.F90 b/src/trans/gpu/internal/prepsnm_mod.F90 index 3141257b8..71841e4e1 100755 --- a/src/trans/gpu/internal/prepsnm_mod.F90 +++ b/src/trans/gpu/internal/prepsnm_mod.F90 @@ -52,7 +52,8 @@ SUBROUTINE PREPSNM USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT USE TPM_DIM, ONLY: R - USE TPM_FIELDS, ONLY: F, ZEPSNM + USE TPM_FIELDS, ONLY: F + USE TPM_FIELDS_FLAT, ONLY: ZEPSNM USE TPM_DISTR, ONLY: D ! diff --git a/src/trans/gpu/internal/spnsde_mod.F90 b/src/trans/gpu/internal/spnsde_mod.F90 index dea51b549..10acc5dc1 100755 --- a/src/trans/gpu/internal/spnsde_mod.F90 +++ b/src/trans/gpu/internal/spnsde_mod.F90 @@ -16,7 +16,7 @@ SUBROUTINE SPNSDE(KF_SCALARS,PEPSNM,PF,PNSD) USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT USE TPM_DIM, ONLY: R, R_NTMAX USE TPM_DISTR, ONLY: D, D_MYMS, D_NUMP -USE TPM_FIELDS, ONLY: ZEPSNM +USE TPM_FIELDS_FLAT, ONLY: ZEPSNM !**** *SPNSDE* - Compute North-South derivative in spectral space diff --git a/src/trans/gpu/internal/tpm_fields.F90 b/src/trans/gpu/internal/tpm_fields.F90 index 01db6b026..69deb0336 100755 --- a/src/trans/gpu/internal/tpm_fields.F90 +++ b/src/trans/gpu/internal/tpm_fields.F90 @@ -11,8 +11,7 @@ MODULE TPM_FIELDS -USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT, JPRD -USE ISO_C_BINDING +USE EC_PARKIND, ONLY: JPIM, JPRD IMPLICIT NONE @@ -34,24 +33,7 @@ MODULE TPM_FIELDS REAL(KIND=JPRD) ,ALLOCATABLE :: RACTHE2(:)! 1./SQRT(R1MU2), 1/(cos(theta)) dual input/output latitudes END TYPE FIELDS_TYPE -!flat copies of the above -REAL(KIND=JPRD) ,ALLOCATABLE :: F_RW(:) ! Weights of the Gaussian quadrature -REAL(KIND=JPRBT) ,ALLOCATABLE :: F_RLAPIN(:) ! eigen-values of the inverse Laplace operator -REAL(KIND=JPRD) ,ALLOCATABLE :: F_RACTHE(:) ! eigen-values of the inverse Laplace operator - TYPE(FIELDS_TYPE),ALLOCATABLE,TARGET :: FIELDS_RESOL(:) TYPE(FIELDS_TYPE),POINTER :: F -! scratch arrays for ltinv and ltdir and associated dimension variables - -REAL(KIND=JPRBT),ALLOCATABLE :: ZAA(:,:,:) !! JPRL for 1/2 -REAL(KIND=JPRBT),ALLOCATABLE :: ZAS(:,:,:) !! JPRL for 1/2 - -! for m=0 in ledir_mod: -REAL(KIND=JPRD),ALLOCATABLE :: ZAA0(:,:) -REAL(KIND=JPRD),ALLOCATABLE :: ZAS0(:,:) -INTEGER(KIND=JPIM) :: KMLOC0 - -REAL(KIND=JPRBT),ALLOCATABLE :: ZEPSNM(:,:) - END MODULE TPM_FIELDS diff --git a/src/trans/gpu/internal/tpm_fields_flat.F90 b/src/trans/gpu/internal/tpm_fields_flat.F90 new file mode 100755 index 000000000..780d0629c --- /dev/null +++ b/src/trans/gpu/internal/tpm_fields_flat.F90 @@ -0,0 +1,37 @@ +! (C) Copyright 2000- ECMWF. +! (C) Copyright 2000- Meteo-France. +! (C) Copyright 2022- NVIDIA. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! + +MODULE TPM_FIELDS_FLAT + +USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD + +IMPLICIT NONE + +SAVE + +! flat copies of the fields defined in TPM_FIELDS +REAL(KIND=JPRD) ,ALLOCATABLE :: F_RW(:) ! Weights of the Gaussian quadrature +REAL(KIND=JPRBT) ,ALLOCATABLE :: F_RLAPIN(:) ! eigen-values of the inverse Laplace operator +REAL(KIND=JPRD) ,ALLOCATABLE :: F_RACTHE(:) ! eigen-values of the inverse Laplace operator + +! scratch arrays for ltinv and ltdir and associated dimension variables + +REAL(KIND=JPRBT),ALLOCATABLE :: ZAA(:,:,:) !! JPRL for 1/2 +REAL(KIND=JPRBT),ALLOCATABLE :: ZAS(:,:,:) !! JPRL for 1/2 + +! for m=0 in ledir_mod: +REAL(KIND=JPRD),ALLOCATABLE :: ZAA0(:,:) +REAL(KIND=JPRD),ALLOCATABLE :: ZAS0(:,:) +INTEGER(KIND=JPIM) :: KMLOC0 + +REAL(KIND=JPRBT),ALLOCATABLE :: ZEPSNM(:,:) + +END MODULE TPM_FIELDS_FLAT diff --git a/src/trans/gpu/internal/trltom_pack_unpack.F90 b/src/trans/gpu/internal/trltom_pack_unpack.F90 index e208f4ebc..18d4c7f74 100755 --- a/src/trans/gpu/internal/trltom_pack_unpack.F90 +++ b/src/trans/gpu/internal/trltom_pack_unpack.F90 @@ -169,7 +169,7 @@ SUBROUTINE TRLTOM_UNPACK(ALLOCATOR,HTRLTOM_UNPACK,FOUBUF,ZINPS,ZINPA,ZINPS0,ZINP USE TPM_DIM, ONLY: R_NDGNH, R_NDGL USE TPM_GEOMETRY, ONLY: G_NDGLU USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION - USE TPM_FIELDS, ONLY: F_RW, F_RACTHE + USE TPM_FIELDS_FLAT, ONLY: F_RW, F_RACTHE USE TPM_DISTR, ONLY: D_NUMP, D_MYMS, D_NPNTGTB1, D_OFFSETS_GEMM1 USE LEDIR_MOD, ONLY: LEDIR_STRIDES USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_SIZE_T diff --git a/src/trans/gpu/internal/uvtvd_mod.F90 b/src/trans/gpu/internal/uvtvd_mod.F90 index 3ee8521a1..36646f8e9 100755 --- a/src/trans/gpu/internal/uvtvd_mod.F90 +++ b/src/trans/gpu/internal/uvtvd_mod.F90 @@ -61,7 +61,7 @@ SUBROUTINE UVTVD(KF_UV,PU,PV,PVOR,PDIV) USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT USE TPM_DIM, ONLY: R, R_NTMAX USE TPM_DISTR, ONLY: D, D_NUMP, D_MYMS -USE TPM_FIELDS, ONLY: ZEPSNM +USE TPM_FIELDS_FLAT, ONLY: ZEPSNM ! IMPLICIT NONE diff --git a/src/trans/gpu/internal/vdtuv_mod.F90 b/src/trans/gpu/internal/vdtuv_mod.F90 index 7cdb33ce9..68e8c47c8 100755 --- a/src/trans/gpu/internal/vdtuv_mod.F90 +++ b/src/trans/gpu/internal/vdtuv_mod.F90 @@ -15,7 +15,8 @@ SUBROUTINE VDTUV(KFIELD,PEPSNM,PVOR,PDIV,PU,PV) USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT USE TPM_DIM, ONLY: R, R_NTMAX -USE TPM_FIELDS, ONLY: F, F_RLAPIN +USE TPM_FIELDS, ONLY: F +USE TPM_FIELDS_FLAT, ONLY: F_RLAPIN USE TPM_DISTR, ONLY: D, D_NUMP, D_MYMS !**** *VDTUV* - Compute U,V in spectral space From 50abc74a4c37db287358dac39f2675bef4b51c7f Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 12 Sep 2024 09:25:32 +0000 Subject: [PATCH 60/86] Move tpm_fields.F90 to common --- src/trans/common/CMakeLists.txt | 1 + .../{gpu => common}/internal/tpm_fields.F90 | 0 src/trans/cpu/internal/tpm_fields.F90 | 38 ------------------- src/trans/sedrenames.txt | 2 +- 4 files changed, 2 insertions(+), 39 deletions(-) rename src/trans/{gpu => common}/internal/tpm_fields.F90 (100%) delete mode 100644 src/trans/cpu/internal/tpm_fields.F90 diff --git a/src/trans/common/CMakeLists.txt b/src/trans/common/CMakeLists.txt index aea3a9348..d6833962c 100644 --- a/src/trans/common/CMakeLists.txt +++ b/src/trans/common/CMakeLists.txt @@ -24,6 +24,7 @@ list( APPEND ectrans_common_src internal/tpm_constants.F90 internal/tpm_ctl.F90 internal/tpm_dim.F90 + internal/tpm_fields.F90 internal/tpm_gen.F90 internal/tpm_geometry.F90 internal/tpm_pol.F90 diff --git a/src/trans/gpu/internal/tpm_fields.F90 b/src/trans/common/internal/tpm_fields.F90 similarity index 100% rename from src/trans/gpu/internal/tpm_fields.F90 rename to src/trans/common/internal/tpm_fields.F90 diff --git a/src/trans/cpu/internal/tpm_fields.F90 b/src/trans/cpu/internal/tpm_fields.F90 deleted file mode 100644 index 69da69aa1..000000000 --- a/src/trans/cpu/internal/tpm_fields.F90 +++ /dev/null @@ -1,38 +0,0 @@ -! (C) Copyright 2000- ECMWF. -! (C) Copyright 2000- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE TPM_FIELDS - -USE PARKIND1 ,ONLY : JPIM, JPRB, JPRD - -IMPLICIT NONE - -SAVE - -TYPE FIELDS_TYPE -REAL(KIND=JPRD) ,ALLOCATABLE :: RPNM(:,:) ! Legendre polynomials -REAL(KIND=JPRD) ,ALLOCATABLE :: RMU(:) ! sin(theta) for Gaussian latitudes -REAL(KIND=JPRD) ,ALLOCATABLE :: RW(:) ! Weights of the Gaussian quadrature -REAL(KIND=JPRD) ,ALLOCATABLE :: R1MU2(:) ! 1.-MU*MU, cos(theta)**2 -REAL(KIND=JPRD) ,ALLOCATABLE :: RACTHE(:) ! 1./SQRT(R1MU2), 1/(cos(theta)) - -REAL(KIND=JPRD) ,ALLOCATABLE :: REPSNM(:) ! eps(n,m) used in the Legendre transforms -REAL(KIND=JPRB) ,ALLOCATABLE :: RN(:) ! n (to avoid integer to real conversion) -REAL(KIND=JPRB) ,ALLOCATABLE :: RLAPIN(:) ! eigen-values of the inverse Laplace operator -INTEGER(KIND=JPIM) ,ALLOCATABLE :: NLTN(:) ! R%NTMAX+2-JN - -REAL(KIND=JPRD) ,ALLOCATABLE :: RMU2(:) ! sin(theta) for dual input/output latitudes -REAL(KIND=JPRD) ,ALLOCATABLE :: RACTHE2(:) ! 1./SQRT(R1MU2), 1/(cos(theta)) dual input/output latitudes -END TYPE FIELDS_TYPE - -TYPE(FIELDS_TYPE),ALLOCATABLE,TARGET :: FIELDS_RESOL(:) -TYPE(FIELDS_TYPE),POINTER :: F - -END MODULE TPM_FIELDS diff --git a/src/trans/sedrenames.txt b/src/trans/sedrenames.txt index f39bb20a1..133dace6b 100644 --- a/src/trans/sedrenames.txt +++ b/src/trans/sedrenames.txt @@ -114,7 +114,7 @@ s/SULEG_MOD/SULEG_MOD_VARIANTDESIGNATOR/g s/SUTRLE_MOD/SUTRLE_MOD_VARIANTDESIGNATOR/g s/TPM_FFTW/TPM_FFTW_VARIANTDESIGNATOR/g s/TPM_FFT/TPM_FFT_VARIANTDESIGNATOR/g -s/TPM_FIELDS/TPM_FIELDS_VARIANTDESIGNATOR/g +s/TPM_FIELDS_FLAT/TPM_FIELDS_FLAT_VARIANTDESIGNATOR/g s/TPM_FLT/TPM_FLT_VARIANTDESIGNATOR/g s/TPM_TRANS/TPM_TRANS_VARIANTDESIGNATOR/g s/trans_end( *($|\(| |\*|\.h))/trans_end_VARIANTDESIGNATOR\1/g From fcd7d38b1a3d2b53572557e9898271ed38ce65ac Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 12 Sep 2024 10:39:16 +0000 Subject: [PATCH 61/86] Move pre_suleg_mod.F90 to common --- src/trans/common/CMakeLists.txt | 1 + .../internal/pre_suleg_mod.F90 | 2 +- src/trans/cpu/internal/pre_suleg_mod.F90 | 71 ------------------- src/trans/sedrenames.txt | 3 +- 4 files changed, 3 insertions(+), 74 deletions(-) rename src/trans/{gpu => common}/internal/pre_suleg_mod.F90 (98%) delete mode 100644 src/trans/cpu/internal/pre_suleg_mod.F90 diff --git a/src/trans/common/CMakeLists.txt b/src/trans/common/CMakeLists.txt index d6833962c..c492e8910 100644 --- a/src/trans/common/CMakeLists.txt +++ b/src/trans/common/CMakeLists.txt @@ -32,6 +32,7 @@ list( APPEND ectrans_common_src internal/pe2set_mod.F90 internal/set2pe_mod.F90 internal/eq_regions_mod.F90 + internal/pre_suleg_mod.F90 internal/shuffle_mod.F90 internal/sump_trans0_mod.F90 internal/sustaonl_mod.F90 diff --git a/src/trans/gpu/internal/pre_suleg_mod.F90 b/src/trans/common/internal/pre_suleg_mod.F90 similarity index 98% rename from src/trans/gpu/internal/pre_suleg_mod.F90 rename to src/trans/common/internal/pre_suleg_mod.F90 index 82fded5fd..ccbb63efb 100755 --- a/src/trans/gpu/internal/pre_suleg_mod.F90 +++ b/src/trans/common/internal/pre_suleg_mod.F90 @@ -12,7 +12,7 @@ MODULE PRE_SULEG_MOD IMPLICIT NONE CONTAINS SUBROUTINE PRE_SULEG -USE PARKIND1, ONLY: JPRD, JPIM +USE EC_PARKIND, ONLY: JPRD, JPIM USE TPM_GEN, ONLY: NPRINTLEV, NOUT USE TPM_DIM, ONLY: R USE TPM_CONSTANTS, ONLY: RA diff --git a/src/trans/cpu/internal/pre_suleg_mod.F90 b/src/trans/cpu/internal/pre_suleg_mod.F90 deleted file mode 100644 index 024341091..000000000 --- a/src/trans/cpu/internal/pre_suleg_mod.F90 +++ /dev/null @@ -1,71 +0,0 @@ -! (C) Copyright 2000- ECMWF. -! (C) Copyright 2000- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE PRE_SULEG_MOD -IMPLICIT NONE -CONTAINS -SUBROUTINE PRE_SULEG -USE PARKIND1 ,ONLY : JPRD, JPIM -USE TPM_GEN ,ONLY : NPRINTLEV,NOUT -USE TPM_DIM ,ONLY : R -USE TPM_CONSTANTS ,ONLY: RA -USE TPM_DISTR ,ONLY : D -USE TPM_FIELDS,ONLY : F - -INTEGER(KIND=JPIM) :: IM, ICOUNT,JMLOC,JN -LOGICAL :: LLP1,LLP2 - - -LLP1 = NPRINTLEV>0 -LLP2 = NPRINTLEV>1 - -ICOUNT = 0 -DO JMLOC=1,D%NUMP - IM = D%MYMS(JMLOC) - DO JN=IM,R%NTMAX+2 - ICOUNT = ICOUNT+1 - ENDDO -ENDDO - -ALLOCATE(F%REPSNM(ICOUNT)) -IF (LLP2) WRITE(NOUT,9) 'F%REPSNM ',SIZE(F%REPSNM ),SHAPE(F%REPSNM ) -ALLOCATE(F%RN(-1:R%NTMAX+3)) -IF (LLP2) WRITE(NOUT,9) 'F%RN ',SIZE(F%RN ),SHAPE(F%RN ) -ALLOCATE(F%RLAPIN(-1:R%NSMAX+2)) -IF (LLP2) WRITE(NOUT,9) 'F%RLAPIN ',SIZE(F%RLAPIN ),SHAPE(F%RLAPIN ) -ALLOCATE(F%NLTN(-1:R%NTMAX+3)) -IF (LLP2) WRITE(NOUT,9) 'F%NLTN ',SIZE(F%NLTN ),SHAPE(F%NLTN ) - -ICOUNT = 0 -DO JMLOC=1,D%NUMP - IM = D%MYMS(JMLOC) - DO JN=IM,R%NTMAX+2 - ICOUNT = ICOUNT+1 - F%REPSNM(ICOUNT) = SQRT(REAL(JN*JN-IM*IM,JPRD)/& - &REAL(4*JN*JN-1,JPRD)) - ENDDO -ENDDO - -DO JN=-1,R%NTMAX+3 - F%RN(JN) = REAL(JN,JPRD) - F%NLTN(JN) = R%NTMAX+2-JN -ENDDO -F%RLAPIN(:) = 0.0_JPRD -F%RLAPIN(0) = 0.0_JPRD -F%RLAPIN(-1) = 0.0_JPRD -DO JN=1,R%NSMAX+2 - F%RLAPIN(JN)=-(REAL(RA,JPRD)*REAL(RA,JPRD)/REAL(JN*(JN+1),JPRD)) -ENDDO - -! ------------------------------------------------------------------ -9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) - -END SUBROUTINE PRE_SULEG -END MODULE PRE_SULEG_MOD diff --git a/src/trans/sedrenames.txt b/src/trans/sedrenames.txt index 133dace6b..e9122bd03 100644 --- a/src/trans/sedrenames.txt +++ b/src/trans/sedrenames.txt @@ -83,7 +83,6 @@ s/PARKIND1/EC_PARKIND/g s/PARKIND2/EC_PARKIND/g s/parkind_ectrans/ec_parkind/g s/PARKIND_ECTRANS/ec_parkind/g -s/PRE_SULEG_MOD/PRE_SULEG_MOD_VARIANTDESIGNATOR/g s/PREPSNM_MOD/PREPSNM_MOD_VARIANTDESIGNATOR/g s/PRFI1_MOD/PRFI1_MOD_VARIANTDESIGNATOR/g s/PRFI1AD_MOD/PRFI1AD_MOD_VARIANTDESIGNATOR/g @@ -110,7 +109,7 @@ s/SPNSDE_MOD/SPNSDE_MOD_VARIANTDESIGNATOR/g s/SPNSDEAD_MOD/SPNSDEAD_MOD_VARIANTDESIGNATOR/g s/SUFFT_MOD/SUFFT_MOD_VARIANTDESIGNATOR/g s/SUMP_TRANS_MOD/SUMP_TRANS_MOD_VARIANTDESIGNATOR/g -s/SULEG_MOD/SULEG_MOD_VARIANTDESIGNATOR/g +s/\ SULEG_MOD/\ SULEG_MOD_VARIANTDESIGNATOR/g s/SUTRLE_MOD/SUTRLE_MOD_VARIANTDESIGNATOR/g s/TPM_FFTW/TPM_FFTW_VARIANTDESIGNATOR/g s/TPM_FFT/TPM_FFT_VARIANTDESIGNATOR/g From 9efdc9d89fbbe156fb34dd77af5b6a853c6e64f7 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 12 Sep 2024 11:33:17 +0000 Subject: [PATCH 62/86] Make wts500_mod.F90 double precision --- src/trans/cpu/algor/seefmm_mix.F90 | 76 +++++++++++++++--------------- src/trans/cpu/algor/wts500_mod.F90 | 4 +- src/trans/gpu/algor/seefmm_mix.F90 | 75 +++++++++++++++-------------- src/trans/gpu/algor/wts500_mod.F90 | 5 +- 4 files changed, 80 insertions(+), 80 deletions(-) diff --git a/src/trans/cpu/algor/seefmm_mix.F90 b/src/trans/cpu/algor/seefmm_mix.F90 index f5ad3062b..c1c4407df 100644 --- a/src/trans/cpu/algor/seefmm_mix.F90 +++ b/src/trans/cpu/algor/seefmm_mix.F90 @@ -39,7 +39,7 @@ module seefmm_mix ! ------------------------------------------------------------------ -use parkind1,only : jpim ,jprb, jprd +use parkind1, only : jpim, jprb, jprd use ecsort_mix, only : keysort use wts500_mod, only: wts500 @@ -88,10 +88,11 @@ recursive subroutine setup_seefmm(kx,px,ky,py,ydfmm,pdiff) integer(kind=jpim),intent(in) :: ky real(kind=jprd) ,intent(in) :: py(:) type(fmm_type) ,intent(out) :: ydfmm -real(kind=jprb),optional,intent(in) :: pdiff(:,:) +real(kind=jprd),optional,intent(in) :: pdiff(:,:) -real(kind=jprb) :: zxy(kx+ky),zrt(56),zcik((kx+ky)*(kx+ky)) -real(kind=jprb) :: zr +real(kind=jprd) :: zxy(kx+ky),zcik((kx+ky)*(kx+ky)) +real(kind=jprd) :: zr, zrt(56), zrw(56) +real(kind=jprd), allocatable :: zrdexp(:,:) integer(kind=jpim) :: ixy !--------------------------------------------------------------------------- ydfmm%nx=kx @@ -103,16 +104,19 @@ recursive subroutine setup_seefmm(kx,px,ky,py,ydfmm,pdiff) ! Combine px and py to form xxy, compute ascending index for xxy call comb_xy(kx,px,ky,py,ixy,zxy,ydfmm%index) ! Setup quadrature, scale (see 3.1.1 in [1]) -call suquad(ixy,zxy(ydfmm%index(1))-zxy(ydfmm%index(ixy)),& - & ydfmm%nquad,ydfmm%rw,zrt,zr) -allocate(ydfmm%rdexp(ydfmm%nquad,ixy)) +call suquad(ixy,zxy(ydfmm%index(1))-zxy(ydfmm%index(ixy)),ydfmm%nquad,& + & zrw,zrt,zr) +allocate(zrdexp(ydfmm%nquad,ixy)) allocate(ydfmm%nclose(ixy)) ! Main pre-computation -call prepotf(kx,ixy,ydfmm%nquad,ydfmm%rw,zrt,zr,zxy,ydfmm%index,& - & ydfmm%rdexp,ydfmm%nclose,zcik,ydfmm%ncik,pdiff) -! Needed as size of cik unknown beforehand +call prepotf(kx,ixy,ydfmm%nquad,zrw,zrt,zr,zxy,ydfmm%index,& + & zrdexp,ydfmm%nclose,zcik,ydfmm%ncik,pdiff) + +allocate(ydfmm%rdexp(ydfmm%nquad,ixy)) allocate(ydfmm%cik(ydfmm%ncik)) -ydfmm%cik(:)=zcik(1:ydfmm%ncik) +ydfmm%rw(:) = real(zrw(:),jprb) +ydfmm%rdexp(:,:) = real(zrdexp(:,:),jprb) +ydfmm%cik(:) = real(zcik(1:ydfmm%ncik),jprb) end subroutine setup_seefmm !========================================================================== @@ -432,19 +436,19 @@ end subroutine potfm recursive subroutine suquad(kn,prange,kquad,prw,prt,pr) implicit none -integer(kind=jpim) ,intent(in) :: kn -real(kind=jprb),intent(in) :: prange -integer(kind=jpim) ,intent(in) :: kquad -real(kind=jprb),intent(out) :: prw(:) -real(kind=jprb),intent(out) :: prt(:) -real(kind=jprb),intent(out) :: pr +integer(kind=jpim) ,intent(in) :: kn +real(kind=jprd) ,intent(in) :: prange +integer(kind=jpim) ,intent(in) :: kquad +real(kind=jprd) ,intent(out) :: prw(:) +real(kind=jprd) ,intent(out) :: prt(:) +real(kind=jprd) ,intent(out) :: pr -real(kind=jprb) :: za,zb,zs +real(kind=jprd) :: za,zb,zs integer(kind=jpim) :: jm !------------------------------------------------------------------------- -za=1.0 -zb=500.0 +za=1.0_jprd +zb=500.0_jprd zs=zb/prange pr=za/zs call wts500(prt,prw,kquad) @@ -463,21 +467,15 @@ recursive subroutine comb_xy(kx,px,ky,py,kxy,pxy,kindex) real(kind=jprd), intent(in) :: px(:) real(kind=jprd), intent(in) :: py(:) integer(kind=jpim), intent(in) :: kxy -real(kind=jprb), intent(out) :: pxy(:) +real(kind=jprd), intent(out) :: pxy(:) integer(kind=jpim), intent(out) :: kindex(:) integer(kind=jpim) :: iret -!integer(kind=jpim) :: jxy !------------------------------------------------------------------------- pxy(1:kx)=px(1:kx) pxy(kx+1:kx+ky)=py(1:ky) -!call m01daf(pxy,1,kxy,'D',irank,ifail) call keysort(iret,pxy,kxy,descending=.true.,index=kindex,init=.true.) -!!$do jxy=1,kxy -!!$ kindex(irank(jxy))=jxy -!!$enddo - end subroutine comb_xy !========================================================================== recursive subroutine prepotf(kx,kxy,kquad,prw,prt,pr,pxy,kindex,prdexp,& @@ -488,20 +486,20 @@ recursive subroutine prepotf(kx,kxy,kquad,prw,prt,pr,pxy,kindex,prdexp,& integer(kind=jpim), intent(in) :: kx integer(kind=jpim), intent(in) :: kxy integer(kind=jpim), intent(in) :: kquad -real(kind=jprb), intent(in) :: pxy(:) -real(kind=jprb), intent(in) :: prw(:) -real(kind=jprb), intent(in) :: pr -real(kind=jprb), intent(in) :: prt(:) +real(kind=jprd), intent(in) :: pxy(:) +real(kind=jprd), intent(in) :: prw(:) +real(kind=jprd), intent(in) :: pr +real(kind=jprd), intent(in) :: prt(:) integer(kind=jpim), intent(in) :: kindex(:) -real(kind=jprb), intent(out) :: prdexp(:,:) +real(kind=jprd), intent(out) :: prdexp(:,:) integer(kind=jpim), intent(out) :: kclosel(:) -real(kind=jprb), intent(out) :: pcik(:) +real(kind=jprd), intent(out) :: pcik(:) integer(kind=jpim), intent(out) :: knocik -real(kind=jprb),optional, intent(in) :: pdiff(:,:) +real(kind=jprd),optional, intent(in) :: pdiff(:,:) -real(kind=jprb) :: zdx -real(kind=jprb) :: zsum -real(kind=jprb) :: zdiff(kxy,kxy) +real(kind=jprd) :: zdx +real(kind=jprd) :: zsum +real(kind=jprd) :: zdiff(kxy,kxy) integer(kind=jpim) :: jxy,jq,isize,jdist,ixy,ixym1,i1,i1pd,j1,j2 logical :: llexit !------------------------------------------------------------------------- @@ -535,11 +533,11 @@ recursive subroutine prepotf(kx,kxy,kquad,prw,prt,pr,pxy,kindex,prdexp,& kclosel(jxy)=kclosel(jxy)+1 if((i1 > kx .and. i1pd <= kx) .or. (i1pd > kx .and. i1 <= kx)) then knocik=knocik+1 - zsum=0.0_jprb + zsum=0.0_jprd do jq=1,kquad zsum=zsum+prw(jq)*exp(-zdx*prt(jq)) enddo - pcik(knocik)=1.0_jprb/zdx-zsum + pcik(knocik)=1.0_jprd/zdx-zsum endif else exit diff --git a/src/trans/cpu/algor/wts500_mod.F90 b/src/trans/cpu/algor/wts500_mod.F90 index af9bb7e90..246d33e36 100644 --- a/src/trans/cpu/algor/wts500_mod.F90 +++ b/src/trans/cpu/algor/wts500_mod.F90 @@ -12,11 +12,11 @@ MODULE WTS500_MOD CONTAINS SUBROUTINE WTS500(PX,PW,KN) -USE PARKIND1 ,ONLY : JPIM ,JPRB +USE EC_PARKIND ,ONLY : JPIM, JPRD IMPLICIT NONE INTEGER(KIND=JPIM), INTENT(IN) :: KN -REAL(KIND=JPRB), INTENT(OUT) :: PX(:),PW(:) +REAL(KIND=JPRD), INTENT(OUT) :: PX(:),PW(:) ! This routine returns a set of Gaussian nodes and weights for ! integrating the functions exp(lambda*x)dx over the range x=0 to x=infinity. diff --git a/src/trans/gpu/algor/seefmm_mix.F90 b/src/trans/gpu/algor/seefmm_mix.F90 index e82e66060..596372196 100644 --- a/src/trans/gpu/algor/seefmm_mix.F90 +++ b/src/trans/gpu/algor/seefmm_mix.F90 @@ -42,6 +42,8 @@ module seefmm_mix use ecsort_mix, only: keysort use wts500_mod, only: wts500 +private + integer(kind=jpim) :: nfmm_lim=200 ! Appr. break-even limit for FMM integer(kind=jpim),parameter :: nquadEm14=28 ! Quadrature size for eps~=1.e-14 integer(kind=jpim),parameter :: nquadEm10=20! Quadrature size for eps~=1.e-10 @@ -60,6 +62,8 @@ module seefmm_mix end type fmm_type +public :: fmm_type, setup_seefmm, free_seefmm, seefmm_mulm + contains recursive subroutine setup_seefmm(kx,px,ky,py,ydfmm,pdiff) @@ -83,10 +87,11 @@ recursive subroutine setup_seefmm(kx,px,ky,py,ydfmm,pdiff) integer(kind=jpim),intent(in) :: ky real(kind=jprd) ,intent(in) :: py(:) type(fmm_type) ,intent(out) :: ydfmm -real(kind=JPRBT),optional,intent(in) :: pdiff(:,:) +real(kind=jprd),optional,intent(in) :: pdiff(:,:) -real(kind=JPRBT) :: zxy(kx+ky),zrt(56),zcik((kx+ky)*(kx+ky)) -real(kind=JPRBT) :: zr +real(kind=jprd) :: zxy(kx+ky), zcik((kx+ky)*(kx+ky)) +real(kind=jprd) :: zr, zrt(56), zrw(56) +real(kind=jprd), allocatable :: zrdexp(:,:) integer(kind=jpim) :: ixy !--------------------------------------------------------------------------- ydfmm%nx=kx @@ -98,16 +103,19 @@ recursive subroutine setup_seefmm(kx,px,ky,py,ydfmm,pdiff) ! Combine px and py to form xxy, compute ascending index for xxy call comb_xy(kx,px,ky,py,ixy,zxy,ydfmm%index) ! Setup quadrature, scale (see 3.1.1 in [1]) -call suquad(ixy,zxy(ydfmm%index(1))-zxy(ydfmm%index(ixy)),& - & ydfmm%nquad,ydfmm%rw,zrt,zr) -allocate(ydfmm%rdexp(ydfmm%nquad,ixy)) +call suquad(ixy,zxy(ydfmm%index(1))-zxy(ydfmm%index(ixy)),ydfmm%nquad,& + & zrw,zrt,zr) +allocate(zrdexp(ydfmm%nquad,ixy)) allocate(ydfmm%nclose(ixy)) ! Main pre-computation -call prepotf(kx,ixy,ydfmm%nquad,ydfmm%rw,zrt,zr,zxy,ydfmm%index,& - & ydfmm%rdexp,ydfmm%nclose,zcik,ydfmm%ncik,pdiff) -! Needed as size of cik unknown beforehand +call prepotf(kx,ixy,ydfmm%nquad,zrw,zrt,zr,zxy,ydfmm%index,& + & zrdexp,ydfmm%nclose,zcik,ydfmm%ncik,pdiff) + +allocate(ydfmm%rdexp(ydfmm%nquad,ixy)) allocate(ydfmm%cik(ydfmm%ncik)) -ydfmm%cik(:)=zcik(1:ydfmm%ncik) +ydfmm%rw(:) = real(zrw(:),JPRBT) +ydfmm%rdexp(:,:) = real(zrdexp(:,:),JPRBT) +ydfmm%cik(:) = real(zcik(1:ydfmm%ncik),JPRBT) end subroutine setup_seefmm !========================================================================== @@ -428,18 +436,18 @@ recursive subroutine suquad(kn,prange,kquad,prw,prt,pr) implicit none integer(kind=jpim) ,intent(in) :: kn -real(kind=JPRBT),intent(in) :: prange +real(kind=jprd),intent(in) :: prange integer(kind=jpim) ,intent(in) :: kquad -real(kind=JPRBT),intent(out) :: prw(:) -real(kind=JPRBT),intent(out) :: prt(:) -real(kind=JPRBT),intent(out) :: pr +real(kind=jprd),intent(out) :: prw(:) +real(kind=jprd),intent(out) :: prt(:) +real(kind=jprd),intent(out) :: pr -real(kind=JPRBT) :: za,zb,zs +real(kind=jprd) :: za,zb,zs integer(kind=jpim) :: jm !------------------------------------------------------------------------- -za=1.0 -zb=500.0 +za=1.0_jprd +zb=500.0_jprd zs=zb/prange pr=za/zs call wts500(prt,prw,kquad) @@ -458,20 +466,15 @@ recursive subroutine comb_xy(kx,px,ky,py,kxy,pxy,kindex) real(kind=jprd), intent(in) :: px(:) real(kind=jprd), intent(in) :: py(:) integer(kind=jpim), intent(in) :: kxy -real(kind=JPRBT), intent(out) :: pxy(:) +real(kind=jprd), intent(out) :: pxy(:) integer(kind=jpim), intent(out) :: kindex(:) -integer(kind=jpim) :: jxy,ix,iy,iret +integer(kind=jpim) :: iret !------------------------------------------------------------------------- pxy(1:kx)=px(1:kx) pxy(kx+1:kx+ky)=py(1:ky) -!call m01daf(pxy,1,kxy,'D',irank,ifail) call keysort(iret,pxy,kxy,descending=.true.,index=kindex,init=.true.) -!!$do jxy=1,kxy -!!$ kindex(irank(jxy))=jxy -!!$enddo - end subroutine comb_xy !========================================================================== recursive subroutine prepotf(kx,kxy,kquad,prw,prt,pr,pxy,kindex,prdexp,& @@ -482,20 +485,20 @@ recursive subroutine prepotf(kx,kxy,kquad,prw,prt,pr,pxy,kindex,prdexp,& integer(kind=jpim), intent(in) :: kx integer(kind=jpim), intent(in) :: kxy integer(kind=jpim), intent(in) :: kquad -real(kind=JPRBT), intent(in) :: pxy(:) -real(kind=JPRBT), intent(in) :: prw(:) -real(kind=JPRBT), intent(in) :: pr -real(kind=JPRBT), intent(in) :: prt(:) +real(kind=jprd), intent(in) :: pxy(:) +real(kind=jprd), intent(in) :: prw(:) +real(kind=jprd), intent(in) :: pr +real(kind=jprd), intent(in) :: prt(:) integer(kind=jpim), intent(in) :: kindex(:) -real(kind=JPRBT), intent(out) :: prdexp(:,:) +real(kind=jprd), intent(out) :: prdexp(:,:) integer(kind=jpim), intent(out) :: kclosel(:) -real(kind=JPRBT), intent(out) :: pcik(:) +real(kind=jprd), intent(out) :: pcik(:) integer(kind=jpim), intent(out) :: knocik -real(kind=JPRBT),optional, intent(in) :: pdiff(:,:) +real(kind=jprd),optional, intent(in) :: pdiff(:,:) -real(kind=JPRBT) :: zdx -real(kind=JPRBT) :: zsum -real(kind=JPRBT) :: zdiff(kxy,kxy) +real(kind=jprd) :: zdx +real(kind=jprd) :: zsum +real(kind=jprd) :: zdiff(kxy,kxy) integer(kind=jpim) :: jxy,jq,isize,jdist,ixy,ixym1,i1,i1pd,j1,j2 logical :: llexit !------------------------------------------------------------------------- @@ -529,11 +532,11 @@ recursive subroutine prepotf(kx,kxy,kquad,prw,prt,pr,pxy,kindex,prdexp,& kclosel(jxy)=kclosel(jxy)+1 if((i1 > kx .and. i1pd <= kx) .or. (i1pd > kx .and. i1 <= kx)) then knocik=knocik+1 - zsum=0.0_JPRBT + zsum=0.0_jprd do jq=1,kquad zsum=zsum+prw(jq)*exp(-zdx*prt(jq)) enddo - pcik(knocik)=1.0_JPRBT/zdx-zsum + pcik(knocik)=1.0_jprd/zdx-zsum endif else exit diff --git a/src/trans/gpu/algor/wts500_mod.F90 b/src/trans/gpu/algor/wts500_mod.F90 index dc45a4091..a4a0e4e12 100644 --- a/src/trans/gpu/algor/wts500_mod.F90 +++ b/src/trans/gpu/algor/wts500_mod.F90 @@ -11,12 +11,11 @@ MODULE WTS500_MOD CONTAINS SUBROUTINE WTS500(PX,PW,KN) -USE EC_PARKIND, ONLY: JPIM -USE PARKIND_ECTRANS, ONLY: JPRBT +USE EC_PARKIND, ONLY: JPIM, JPRD IMPLICIT NONE INTEGER(KIND=JPIM), INTENT(IN) :: KN -REAL(KIND=JPRBT), INTENT(OUT) :: PX(:),PW(:) +REAL(KIND=JPRD), INTENT(OUT) :: PX(:),PW(:) ! This routine returns a set of Gaussian nodes and weights for ! integrating the functions exp(lambda*x)dx over the range x=0 to x=infinity. From 8971be5b07a7142de97f60a787e26db2cc9aa711 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 12 Sep 2024 11:39:05 +0000 Subject: [PATCH 63/86] Move wts500_mod.F90 to common --- src/trans/common/CMakeLists.txt | 1 + .../algor => common/internal}/wts500_mod.F90 | 0 src/trans/gpu/CMakeLists.txt | 1 - src/trans/gpu/algor/wts500_mod.F90 | 3764 ----------------- src/trans/sedrenames.txt | 2 - 5 files changed, 1 insertion(+), 3767 deletions(-) rename src/trans/{cpu/algor => common/internal}/wts500_mod.F90 (100%) delete mode 100644 src/trans/gpu/algor/wts500_mod.F90 diff --git a/src/trans/common/CMakeLists.txt b/src/trans/common/CMakeLists.txt index c492e8910..850609a3a 100644 --- a/src/trans/common/CMakeLists.txt +++ b/src/trans/common/CMakeLists.txt @@ -44,6 +44,7 @@ list( APPEND ectrans_common_src internal/myrecvset_mod.F90 internal/suwavedi_mod.F90 internal/sump_trans_preleg_mod.F90 + internal/wts500_mod.F90 external/get_current.F90 external/setup_trans0.F90 external/ini_spec_dist.F90 diff --git a/src/trans/cpu/algor/wts500_mod.F90 b/src/trans/common/internal/wts500_mod.F90 similarity index 100% rename from src/trans/cpu/algor/wts500_mod.F90 rename to src/trans/common/internal/wts500_mod.F90 diff --git a/src/trans/gpu/CMakeLists.txt b/src/trans/gpu/CMakeLists.txt index 37f21db0d..031f69722 100644 --- a/src/trans/gpu/CMakeLists.txt +++ b/src/trans/gpu/CMakeLists.txt @@ -91,7 +91,6 @@ function(generate_backend_sources) ) list( APPEND files algor/seefmm_mix.F90 - algor/wts500_mod.F90 ) ecbuild_list_exclude_pattern( LIST files REGEX parkind_ectrans.F90 diff --git a/src/trans/gpu/algor/wts500_mod.F90 b/src/trans/gpu/algor/wts500_mod.F90 deleted file mode 100644 index a4a0e4e12..000000000 --- a/src/trans/gpu/algor/wts500_mod.F90 +++ /dev/null @@ -1,3764 +0,0 @@ -! (C) Copyright 2014- ECMWF. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE WTS500_MOD -CONTAINS -SUBROUTINE WTS500(PX,PW,KN) - -USE EC_PARKIND, ONLY: JPIM, JPRD -IMPLICIT NONE - -INTEGER(KIND=JPIM), INTENT(IN) :: KN -REAL(KIND=JPRD), INTENT(OUT) :: PX(:),PW(:) - -! This routine returns a set of Gaussian nodes and weights for -! integrating the functions exp(lambda*x)dx over the range x=0 to x=infinity. -! They work for lambda in the range [1,501]. The accuracy -! of the quadrature for each n is given in the tables below. - -! Input arguments: -! n - number of weights and nodes in the quadrature. This must -! be an integer in the range [2,56]. -! -! Output arguments: -! w - weights -! x - nodes -! -! -! The following table gives the approximate accuracy of the weights in -! this file, that is to say the experimentally determined maximum -! absolute error for lambda in the range [1,501]. -! -! 2 0.76126E-01 -! 3 0.26903E-01 -! 4 0.88758E-02 -! 5 0.28110E-02 -! 6 0.86785E-03 -! 7 0.26276E-03 -! 8 0.78346E-04 -! 9 0.23066E-04 -! 10 0.67184E-05 -! 11 0.19386E-05 -! 12 0.55482E-06 -! 13 0.15762E-06 -! 14 0.44478E-07 -! 15 0.12474E-07 -! 16 0.34787E-08 -! 17 0.96498E-09 -! 18 0.26636E-09 -! 19 0.73174E-10 -! 20 0.20013E-10 -! 21 0.54503E-11 -! 22 0.14783E-11 -! 23 0.39937E-12 -! 24 0.10749E-12 -! 25 0.28822E-13 -! 26 0.77011E-14 -! 27 0.20993E-14 -! 28 0.59593E-15 - -! (The accuracies beyond this point are -! only available if this routine is converted -! to extended precision.) -! -! 29 0.16665E-15 -! 30 0.45938E-16 -! 31 0.12483E-16 -! 32 0.33436E-17 -! 33 0.88209E-18 -! 34 0.22896E-18 -! 35 0.58363E-19 -! 36 0.15182E-19 -! 37 0.45892E-20 -! 38 0.13452E-20 -! 39 0.38384E-21 -! 40 0.10683E-21 -! 41 0.29025E-22 -! 42 0.76955E-23 -! 43 0.19878E-23 -! 44 0.49867E-24 -! 45 0.12879E-24 -! 46 0.38890E-25 -! 47 0.11493E-25 -! 48 0.32717E-26 -! 49 0.89977E-27 -! 50 0.23916E-27 -! 51 0.66534E-28 -! 52 0.20256E-28 -! 53 0.60754E-29 -! 54 0.17974E-29 -! 55 0.52173E-30 -! 56 0.14656E-30 -! 57 0.39867E-31 -! 58 0.17622E-31 -! 59 0.11941E-31 - - -if(kn < 1 .or. kn > 59) CALL ABOR1('kn out of bounds in wts500') - -if(kn == 1) then - px( 1)= 0.30029234138173323099658823269124393D+00 - pw( 1)= 0.10474544159373900054024730385996879D+01 -endif -if(kn == 2) then - px( 1)= 0.44614645646035084305052271657195780D-01 - px( 2)= 0.69921614559509068409005059560416520D+00 - pw( 1)= 0.15994862626671497398269903651565162D+00 - pw( 2)= 0.15511944041990193294522419186360192D+01 -endif -if(kn == 3) then - px( 1)= 0.11857389353662594950547350532174902D-01 - px( 2)= 0.16764835416208964726306668598724940D+00 - px( 3)= 0.11277491807394385305149473243132366D+01 - pw( 1)= 0.40534466810113107834140226328650886D-01 - pw( 2)= 0.36261372044374320167772351965467234D+00 - pw( 3)= 0.19347454032003660753035080254268649D+01 -endif -if(kn == 4) then - px( 1)= 0.57654208655188821571537226422468374D-02 - px( 2)= 0.62523167781181198280172417136126064D-01 - px( 3)= 0.36533207087496350173593536437526512D+00 - px( 4)= 0.16157524591822212021884702173778156D+01 - pw( 1)= 0.17868545000488806425866630797084082D-01 - pw( 2)= 0.12328668633600752343455555544669733D+00 - pw( 3)= 0.58372673164630844216911861958288950D+00 - pw( 4)= 0.22766170591050845697618451516268360D+01 -endif -if(kn == 5) then - px( 1)= 0.38286655100887720869535305553503767D-02 - px( 2)= 0.32638654131646742439488396483219523D-01 - px( 3)= 0.15979343932440245421190120326583297D+00 - px( 4)= 0.62593598592475461862138219614406608D+00 - px( 5)= 0.21448850159686373839369069493121931D+01 - pw( 1)= 0.11006282598632254556602552524231582D-01 - pw( 2)= 0.57230982733825644334586234468135637D-01 - pw( 3)= 0.23302193704059872350599634911774233D+00 - pw( 4)= 0.80399187922220639155300759119683454D+00 - pw( 5)= 0.25817835060095957471682462945418890D+01 -endif -if(kn == 6) then - px( 1)= 0.29149725976167556773168063131622477D-02 - px( 2)= 0.21103262424016434877304140537976324D-01 - px( 3)= 0.87672665103786085283260733993906787D-01 - px( 4)= 0.30362191434277672991331660483079722D+00 - px( 5)= 0.93772122523975475283401768866415608D+00 - px( 6)= 0.27034204049910086432949276908848690D+01 - pw( 1)= 0.80264082196721958922253358145693741D-02 - pw( 2)= 0.33206285080799577093824435206894388D-01 - pw( 3)= 0.11527314673621704717905199327206330D+00 - pw( 4)= 0.35781407343251622864391524621893261D+00 - pw( 5)= 0.10155006971456147012687855907386742D+01 - pw( 6)= 0.28564602273790248848620772701419901D+01 -endif -if(kn == 7) then - px( 1)= 0.23756434216797693908355583003051663D-02 - px( 2)= 0.15535118461800569190547935165674497D-01 - px( 3)= 0.56551273947723240042834250291153011D-01 - px( 4)= 0.17450996559452745580564075650691432D+00 - px( 5)= 0.49112500667928528178358415821262749D+00 - px( 6)= 0.12919504684055881399873474932878924D+01 - px( 7)= 0.32851412356973227675206901468229149D+01 - pw( 1)= 0.63886696350675163666711135442919764D-02 - pw( 2)= 0.22442242541224196499797804610442189D-01 - pw( 3)= 0.67162457789171790971598253749543852D-01 - pw( 4)= 0.18766490984256656291382113580254403D+00 - pw( 5)= 0.48993342911988760795594155472540210D+00 - pw( 6)= 0.12160105557427987424854903052204337D+01 - pw( 7)= 0.31065668529411483170389922159304207D+01 -endif -if(kn == 8) then - px( 1)= 0.20132180607225834852983700686031058D-02 - px( 2)= 0.12364145427770238507453563351438370D-01 - px( 3)= 0.40779487004322342225816753965428875D-01 - px( 4)= 0.11391148589791378330792648499651137D+00 - px( 5)= 0.29412883145944878287611616309747638D+00 - px( 6)= 0.71837098644914450185415022533227351D+00 - px( 7)= 0.16816853747148161279323645594606599D+01 - px( 8)= 0.38855951611162649511701947653116899D+01 - pw( 1)= 0.53408830450397378295721475271713427D-02 - pw( 2)= 0.16787984180357441921393643943808306D-01 - pw( 3)= 0.44204970292771876400860637563463140D-01 - pw( 4)= 0.11185011190895920238718302559774708D+00 - pw( 5)= 0.27016191944753418682586773667091217D+00 - pw( 6)= 0.62450892495416087531923817566640810D+00 - pw( 7)= 0.14051940584215133597891405916743820D+01 - pw( 8)= 0.33364764999199772405176908767598180D+01 -endif -if(kn == 9) then - px( 1)= 0.17503557878075214519879036524833375D-02 - px( 2)= 0.10330054871723184754037951908660005D-01 - px( 3)= 0.31732206325445549848937295392421049D-01 - px( 4)= 0.81679121803401719155156637120993002D-01 - px( 5)= 0.19535573613157974971806765507983509D+00 - px( 6)= 0.44595223788074039354969352640100852D+00 - px( 7)= 0.98135220945386835905397405725519927D+00 - px( 8)= 0.21015170770020791654126380210416672D+01 - px( 9)= 0.45016099625796459085057556421321067D+01 - pw( 1)= 0.46046107862322611600702922635137510D-02 - pw( 2)= 0.13429769279555420282230890098322098D-01 - pw( 3)= 0.31849272894061216190385847667286999D-01 - pw( 4)= 0.73619249380405585299506449662883408D-01 - pw( 5)= 0.16535947795274881422091783969932427D+00 - pw( 6)= 0.35934417872243713158616629815331565D+00 - pw( 7)= 0.75859660458155596407004967675485659D+00 - pw( 8)= 0.15835611580085742332645675327785911D+01 - pw( 9)= 0.35495334121390922353050388191171761D+01 -endif -if(kn == 10) then - px( 1)= 0.15499542669794147311761132610985078D-02 - px( 2)= 0.89096688398509678030924958952986541D-02 - px( 3)= 0.26016129693043413683698040760210971D-01 - px( 4)= 0.62749208598031754292711006026615926D-01 - px( 5)= 0.14050353407849426047312501552279980D+00 - px( 6)= 0.30181414123622395598472969521213967D+00 - px( 7)= 0.62857185766887058395879604119303521D+00 - px( 8)= 0.12763299399934852402803633611287669D+01 - px( 9)= 0.25471652578226896546390447678170800D+01 - px(10)= 0.51308067782658204682801321291953140D+01 - pw( 1)= 0.40548253986991389304086648382669812D-02 - pw( 2)= 0.11236188731318700188319947717229244D-01 - pw( 3)= 0.24538592970816425402207400802497825D-01 - pw( 4)= 0.52343517158460444258926972420783323D-01 - pw( 5)= 0.10996558531241373841512937168184451D+00 - pw( 6)= 0.22576984249893556435278829920283551D+00 - pw( 7)= 0.45261772769497660240184928082408646D+00 - pw( 8)= 0.89046194530658792052229355508350808D+00 - pw( 9)= 0.17519181692872931385803490555209237D+01 - pw(10)= 0.37483184442175079452559119390314762D+01 -endif -if(kn == 11) then - px( 1)= 0.13916503869954620980214122938112018D-02 - px( 2)= 0.78558050039620362116422912802704097D-02 - px( 3)= 0.22120351963459063625179184464273817D-01 - px( 4)= 0.50713242013627782652925282235350304D-01 - px( 5)= 0.10742319887758113859279057980251631D+00 - px( 6)= 0.21869117548117680828718663672766937D+00 - px( 7)= 0.43336030838054496927092430378951163D+00 - px( 8)= 0.84018288532692476091948497009100848D+00 - px( 9)= 0.15999462842086233335871841089805130D+01 - px(10)= 0.30151891304900922172275251687511795D+01 - px(11)= 0.57713337136325357659044166408988553D+01 - pw( 1)= 0.36266510989460802397873164288690734D-02 - pw( 2)= 0.96950369134995745371692684658412310D-02 - pw( 3)= 0.19868559916434820960232503677049332D-01 - pw( 4)= 0.39531505533000821496522977944503206D-01 - pw( 5)= 0.78139977968766268496958310477317134D-01 - pw( 6)= 0.15225813478813020210433348166511355D+00 - pw( 7)= 0.29137456345793780649576074277273596D+00 - pw( 8)= 0.54807495314077461646456170049418256D+00 - pw( 9)= 0.10191215020827390969142637205874466D+01 - pw(10)= 0.19111370110068844260874568369820828D+01 - pw(11)= 0.39348541670555911675625708842098543D+01 -endif -if(kn == 12) then - px( 1)= 0.12632156319939695579215561432558385D-02 - px( 2)= 0.70387146180879286642550585568639377D-02 - px( 3)= 0.19302659916972801592515201957656444D-01 - px( 4)= 0.42551194105513329489354627472007290D-01 - px( 5)= 0.86085992365439625679560348707191417D-01 - px( 6)= 0.16725813918676372888922850902058409D+00 - px( 7)= 0.31700589361943171070304767229555238D+00 - px( 8)= 0.58951191619791589683106843254408061D+00 - px( 9)= 0.10788413480839543106858840025589018D+01 - px(10)= 0.19492439011651360999619060490611511D+01 - px(11)= 0.35027911449093405858739078699931569D+01 - px(12)= 0.64217203451802643126959421992550844D+01 - pw( 1)= 0.32827550127604954815335522044868924D-02 - pw( 2)= 0.85507540197432347733759971108613029D-02 - pw( 3)= 0.16691067543209908975186757261638663D-01 - pw( 4)= 0.31305443669027793014029811945580099D-01 - pw( 5)= 0.58567896813505682343320309424861089D-01 - pw( 6)= 0.10877780693120356209434163632243069D+00 - pw( 7)= 0.19948368504455875259042629374456601D+00 - pw( 8)= 0.36073967408442558747147434040847228D+00 - pw( 9)= 0.64434179114194185133722660716595021D+00 - pw(10)= 0.11440591869200469793282529105709381D+01 - pw(11)= 0.20620613323447639907885178297356148D+01 - pw(12)= 0.41107547758236817466309181956604953D+01 -endif -if(kn == 13) then - px( 1)= 0.11568070520917003084172899381406952D-02 - px( 2)= 0.63841031288591798815234066736007959D-02 - px( 3)= 0.17167711063873432612672836118321923D-01 - px( 4)= 0.36716019315047555758945584779212588D-01 - px( 5)= 0.71538434902470635056036514447143755D-01 - px( 6)= 0.13352689214829569833793677431883262D+00 - px( 7)= 0.24327491638178311179091055382048232D+00 - px( 8)= 0.43570388054574854249210127119178039D+00 - px( 9)= 0.76944422789933215499607649746035088D+00 - px(10)= 0.13426076062689412695687622546331410D+01 - px(11)= 0.23216397056754496795639041380757885D+01 - px(12)= 0.40076692644610800412009568941378368D+01 - px(13)= 0.70807767026725092855482684561044825D+01 - pw( 1)= 0.29999461201538405335426373656007144D-02 - pw( 2)= 0.76647148893919466896210631066410433D-02 - pw( 3)= 0.14413099397767107036014918126327008D-01 - pw( 4)= 0.25735254465096214146045248201904623D-01 - pw( 5)= 0.45838911095175772187879197848974379D-01 - pw( 6)= 0.81479609103017647524386799211775902D-01 - pw( 7)= 0.14368582127715272775954689518716653D+00 - pw( 8)= 0.25069362938769053221329919008840415D+00 - pw( 9)= 0.43269827884396605348016529885550754D+00 - pw(10)= 0.74044407474304235266031833325121847D+00 - pw(11)= 0.12650477516195139055095418091901788D+01 - pw(12)= 0.22054684388256755415929800672572855D+01 - pw(13)= 0.42773285665753897495906588616832298D+01 -endif -if(kn == 14) then - px( 1)= 0.10671394835726993643401190254501129D-02 - px( 2)= 0.58463219387999205854721615603871185D-02 - px( 3)= 0.15490001691602291418567227600617406D-01 - px( 4)= 0.32358969391919711170669944447127038D-01 - px( 5)= 0.61149820125492142314645657324953302D-01 - px( 6)= 0.11030358671474348741774495105493271D+00 - px( 7)= 0.19410127886489828338777312891458955D+00 - px( 8)= 0.33608975279087339935499749076365196D+00 - px( 9)= 0.57467416136431217184690034730315880D+00 - px(10)= 0.97213677138209888520957230352753264D+00 - px(11)= 0.16296229550658990298708599801796051D+01 - px(12)= 0.27148835813646440924906285656041458D+01 - px(13)= 0.45279078084008377593720415669623160D+01 - px(14)= 0.77475240501693856896302484250295344D+01 - pw( 1)= 0.27629693325443496023249306681806622D-02 - pw( 2)= 0.69561289060164080762372517703735452D-02 - pw( 3)= 0.12707947219219552524391040926686251D-01 - pw( 4)= 0.21789865957161394780641970526254899D-01 - pw( 5)= 0.37165450147146819315861454426090832D-01 - pw( 6)= 0.63467023298815675308212729382023282D-01 - pw( 7)= 0.10796235746711791223910582849233129D+00 - pw( 8)= 0.18227038265561301471910246064953376D+00 - pw( 9)= 0.30504176678107803021400309104091856D+00 - pw(10)= 0.50632019264974725562617387788020652D+00 - pw(11)= 0.83570187185256971962451509332190849D+00 - pw(12)= 0.13820370228408991881296397708221031D+01 - pw(13)= 0.23420578929692185520173441027903024D+01 - pw(14)= 0.44356505634271121964311698817472400D+01 -endif -if(kn == 15) then - px( 1)= 0.99051088742239546911827900108699496D-03 - px( 2)= 0.53956859621998690434579925957171424D-02 - px( 3)= 0.14133099949775560668841655685250135D-01 - px( 4)= 0.28986898223070841544449976160392634D-01 - px( 5)= 0.53435705913102094881195404152789995D-01 - px( 6)= 0.93645206162765297669714244403422362D-01 - px( 7)= 0.15985934779166946137952895657247365D+00 - px( 8)= 0.26857062154409215737173322640809941D+00 - px( 9)= 0.44599642616020670957476031778753703D+00 - px(10)= 0.73354905454082877060570463263769745D+00 - px(11)= 0.11964718339441153959477756602491571D+01 - px(12)= 0.19381471569679916567453627291281352D+01 - px(13)= 0.31270149297417650854875367303912466D+01 - px(14)= 0.50618963332858424622137075227057629D+01 - px(15)= 0.84211464685237105475663901928632362D+01 - pw( 1)= 0.25613355491269301472247025631698555D-02 - pw( 2)= 0.63749573647622919143443050089843640D-02 - pw( 3)= 0.11385424145275303734569902166128714D-01 - pw( 4)= 0.18885541036270664981042580089520502D-01 - pw( 5)= 0.31017529676997219516938356388920347D-01 - pw( 6)= 0.51073868255802230619626900228351940D-01 - pw( 7)= 0.84046732560351404937544837831163072D-01 - pw( 8)= 0.13765675735555740692176543168654955D+00 - pw( 9)= 0.22395807420699105358370420342497012D+00 - pw(10)= 0.36179555862328041144887460703035761D+00 - pw(11)= 0.58087464283264030346041192108355511D+00 - pw(12)= 0.92964960101952292137644303560667260D+00 - pw(13)= 0.14950837580230651438254975578030658D+01 - pw(14)= 0.24724522131384988410164432454754806D+01 - pw(15)= 0.45866150536692880851084073088444439D+01 -endif -if(kn == 16) then - px( 1)= 0.92424556053971804395203579030996377D-03 - px( 2)= 0.50119857035276309167085959684639920D-02 - px( 3)= 0.13010148344461010136325444162584406D-01 - px( 4)= 0.26298847872231801244580127340702382D-01 - px( 5)= 0.47514597563807100058998705367026609D-01 - px( 6)= 0.81270265524596050675960333331275375D-01 - px( 7)= 0.13512212854897371400148043770796492D+00 - px( 8)= 0.22099831669763609789452208029307415D+00 - px( 9)= 0.35743533198723356545740999174337465D+00 - px(10)= 0.57303934564748771037392604303226621D+00 - px(11)= 0.91178803183594107838418321994075593D+00 - px(12)= 0.14412990877209031034459072051939979D+01 - px(13)= 0.22665732295803028877227882175486339D+01 - px(14)= 0.35563219327028564804732957321586739D+01 - px(15)= 0.56082683107347449605789692452752341D+01 - px(16)= 0.91009557053769985645449386780285663D+01 - pw( 1)= 0.23875730074587121522484947794868875D-02 - pw( 2)= 0.58885836354644078094393546205524519D-02 - pw( 3)= 0.10329316404953667658388475423521547D-01 - pw( 4)= 0.16675469828365142363971545189694061D-01 - pw( 5)= 0.26509288186703737427234519675535818D-01 - pw( 6)= 0.42238486164148838677474311842225533D-01 - pw( 7)= 0.67416914217096425505948847545251978D-01 - pw( 8)= 0.10736855475828194921843388490206803D+00 - pw( 9)= 0.17018723045468088538746817095795499D+00 - pw(10)= 0.26821766647062034548841613907117539D+00 - pw(11)= 0.42033354906191972945041277677914052D+00 - pw(12)= 0.65579398374758854246146234684717478D+00 - pw(13)= 0.10219768938841077866955934226960186D+01 - pw(14)= 0.16043073953579345130225854215735434D+01 - pw(15)= 0.25972025694404090887006269510370113D+01 - pw(16)= 0.47309741605753804978373532621330190D+01 -endif -if(kn == 17) then - px( 1)= 0.86635871969021922697368620380719171D-03 - px( 2)= 0.46809392638055264395521402125297454D-02 - px( 3)= 0.12063365169996780691336553708488918D-01 - px( 4)= 0.24103101327380368111361062755741909D-01 - px( 5)= 0.42839783531484774994275135488102184D-01 - px( 6)= 0.71796215026314425758110076991702409D-01 - px( 7)= 0.11667729790867741765510249018437646D+00 - px( 8)= 0.18634666322578944021297170782701070D+00 - px( 9)= 0.29431002802504656436686613469103506D+00 - px(10)= 0.46097947485749393786168585286436753D+00 - px(11)= 0.71707474488613285211211458276745483D+00 - px(12)= 0.11087399814864640384073502346191564D+01 - px(13)= 0.17054769509601451753469079240400202D+01 - px(14)= 0.26134294141755383267430920003219774D+01 - px(15)= 0.40013054555328633047267069424482020D+01 - px(16)= 0.61658542528138849120122933226961818D+01 - px(17)= 0.97863651496368699835621922821544266D+01 - pw( 1)= 0.22362031087732985183223720565899613D-02 - pw( 2)= 0.54748197495181657019575138084577960D-02 - pw( 3)= 0.94654762542541339323585186529928706D-02 - pw( 4)= 0.14944978294023789536885689508849665D-01 - pw( 5)= 0.23104141337215046610391678093559219D-01 - pw( 6)= 0.35743858431643263735692324511510319D-01 - pw( 7)= 0.55471485911701630593769305151799996D-01 - pw( 8)= 0.86081253422553517325473099895533971D-01 - pw( 9)= 0.13319208512009976788628468120231006D+00 - pw(10)= 0.20518383340535621745997474262468218D+00 - pw(11)= 0.31457026902282458163459435604989060D+00 - pw(12)= 0.48013605826872593355323421318271994D+00 - pw(13)= 0.73064164224806746286555643417201840D+00 - pw(14)= 0.11124853152432080885402590631214998D+01 - pw(15)= 0.17098620915289443753971369096340138D+01 - pw(16)= 0.27167962114974631513284929060677873D+01 - pw(17)= 0.48693666605809874327613580765223186D+01 -endif -if(kn == 18) then - px( 1)= 0.81534546944719247652148851160350705D-03 - px( 2)= 0.43921383032957290852797835084086745D-02 - px( 3)= 0.11252831287113979876218879126262041D-01 - px( 4)= 0.22272809918423842477230910840641111D-01 - px( 5)= 0.39059190621733757694717358109890744D-01 - px( 6)= 0.64351345855435130240707418934923649D-01 - px( 7)= 0.10254178168021558618169365620644254D+00 - px( 8)= 0.16036912239056014178785088963204099D+00 - px( 9)= 0.24793085437822483127871211432445117D+00 - px(10)= 0.38020606680363380296594904808373420D+00 - px(11)= 0.57931198537370336370768123946111904D+00 - px(12)= 0.87782030094596229067122251201903469D+00 - px(13)= 0.13236886505899049407415999004449793D+01 - px(14)= 0.19878981731926587403713993880950166D+01 - px(15)= 0.29773742063276571130424960439239811D+01 - px(16)= 0.44606479473592453417809733684642453D+01 - px(17)= 0.67336454736569532214104241445939923D+01 - px(18)= 0.10476870213785278025831291196419849D+02 - pw( 1)= 0.21031106134613254745224559686561387D-02 - pw( 2)= 0.51180225478690087298803997939678307D-02 - pw( 3)= 0.87447370935970058836913883371899363D-02 - pw( 4)= 0.13556333730223126136121123231259769D-01 - pw( 5)= 0.20464410216338036789779126385561199D-01 - pw( 6)= 0.30840826466947410241261173555971053D-01 - pw( 7)= 0.46646725563835069845910497771424517D-01 - pw( 8)= 0.70665992632893967524146851106623791D-01 - pw( 9)= 0.10691711075454395961181073477729829D+00 - pw(10)= 0.16126547210903485792084905962898804D+00 - pw(11)= 0.24229509616610255685205188073761953D+00 - pw(12)= 0.36259234617456851620630123413473169D+00 - pw(13)= 0.54077318770758097024862643911836327D+00 - pw(14)= 0.80508517291294611337977692389747849D+00 - pw(15)= 0.12010568605667241321654935386099247D+01 - pw(16)= 0.18119190754724382445935517697030275D+01 - pw(17)= 0.28316640971192016310500153967949394D+01 - pw(18)= 0.50023398513295527439214736503709354D+01 -endif -if(kn == 19) then - px( 1)= 0.77004314038027538186473565824644713D-03 - px( 2)= 0.41377999330050882914315571254404182D-02 - px( 3)= 0.10550059721986121680218103475882631D-01 - px( 4)= 0.20721165151890931602860316370210977D-01 - px( 5)= 0.35938464219963308088025295472146865D-01 - px( 6)= 0.58366846240533488939212086566679739D-01 - px( 7)= 0.91445878837421954880767431790624735D-01 - px( 8)= 0.14039874091074934933241224785817280D+00 - px( 9)= 0.21294392165350590964714225395783962D+00 - px(10)= 0.32034449388214261059174416345147381D+00 - px(11)= 0.47894630556295055579310759106626288D+00 - px(12)= 0.71240272881825772752142518345001522D+00 - px(13)= 0.10548945187200170944433178271823931D+01 - px(14)= 0.15558853016268620673915640887496031D+01 - px(15)= 0.22875048294126069831169385958820912D+01 - px(16)= 0.33571879025245227476638520498261293D+01 - px(17)= 0.49331870217088227943005162552018204D+01 - px(18)= 0.73107657529555946558412435928344152D+01 - px(19)= 0.11172033293322081236666941378477642D+02 - pw( 1)= 0.19851404434292128161572603749012797D-02 - pw( 2)= 0.48068259850224485883558544189622861D-02 - pw( 3)= 0.81333622551403917938013382271885652D-02 - pw( 4)= 0.12418290368147926797125266347472059D-01 - pw( 5)= 0.18370520996399132266929646675963512D-01 - pw( 6)= 0.27051260748772011764068846294046263D-01 - pw( 7)= 0.39965808856363914121506612420685851D-01 - pw( 8)= 0.59209186306872358867005896800202963D-01 - pw( 9)= 0.87733163743117132114765866319084195D-01 - pw(10)= 0.12975590467993236932824761605587432D+00 - pw(11)= 0.19133746801777096221136436300603368D+00 - pw(12)= 0.28119521004597734924657796391935552D+00 - pw(13)= 0.41191451657988898367993353735036478D+00 - pw(14)= 0.60189235614006428503841831846326255D+00 - pw(15)= 0.87887424161089233540150542700936886D+00 - pw(16)= 0.12876310724220064351766502884115249D+01 - pw(17)= 0.19106555786053829247748848866287425D+01 - pw(18)= 0.29421880423760456779000496029582960D+01 - pw(19)= 0.51303663807899157605725316747503964D+01 -endif -if(kn == 20) then - px( 1)= 0.72953870581824875898426590899779881D-03 - px( 2)= 0.39119765013576279090594732249736674D-02 - px( 3)= 0.99341482544177994609830844666447236D-02 - px( 4)= 0.19386948343749964006247704258211447D-01 - px( 5)= 0.33316937960159820123580449168857215D-01 - px( 6)= 0.53460240758464289437871006955420897D-01 - px( 7)= 0.82550373662718413944945968267436420D-01 - px( 8)= 0.12470333365766989270462564119778831D+00 - px( 9)= 0.18593248929460522396945432673813683D+00 - px(10)= 0.27488824477709750979845021236505632D+00 - px(11)= 0.40393075219294011341163041535306531D+00 - px(12)= 0.59066371193107531104946391158049526D+00 - px(13)= 0.86011311520208939992657730367961144D+00 - px(14)= 0.12478475018108315971898483004517581D+01 - px(15)= 0.18045718085352853467882826664457563D+01 - px(16)= 0.26032962838705539076929338309800964D+01 - px(17)= 0.37517626775845063477806691210141684D+01 - px(18)= 0.54178931704948062041809621881307184D+01 - px(19)= 0.78964489442873309920085649476831128D+01 - px(20)= 0.11871472067202549660846380067126100D+02 - pw( 1)= 0.18798307004678227911126701301382902D-02 - pw( 2)= 0.45327579066213798867372241143837770D-02 - pw( 3)= 0.76074915984009747299662992379957293D-02 - pw( 4)= 0.11468617772837279819666929117999041D-01 - pw( 5)= 0.16675567273387276975224875991907841D-01 - pw( 6)= 0.24060338209931569028899923305997740D-01 - pw( 7)= 0.34797589593340192429611675508702300D-01 - pw( 8)= 0.50498483539096564292114787807163509D-01 - pw( 9)= 0.73383972617080340693094946803759227D-01 - pw(10)= 0.10656241858153477344295213679085888D+00 - pw(11)= 0.15442089967699606004539206241769081D+00 - pw(12)= 0.22316517325789412150938929616587900D+00 - pw(13)= 0.32158746804288047814456613183637546D+00 - pw(14)= 0.46221802166864090447194287669501892D+00 - pw(15)= 0.66320650916245908698703462902787156D+00 - pw(16)= 0.95182294118921990028099509733606027D+00 - pw(17)= 0.13721884223731472188837264029436159D+01 - pw(18)= 0.20062479833990429444286070644467212D+01 - pw(19)= 0.30487071357319501782576369389563967D+01 - pw(20)= 0.52538573669990067214113994100342659D+01 -endif -if(kn == 21) then - px( 1)= 0.69310486169589490054767779485652272D-03 - px( 2)= 0.37100374076510691098395748526914731D-02 - px( 3)= 0.93893881986074420164580675137225674D-02 - px( 4)= 0.18225818290917406339248365551994452D-01 - px( 5)= 0.31081498488793808069000194256763116D-01 - px( 6)= 0.49367420729419602783748969736699563D-01 - px( 7)= 0.75284896750519291461000850881359128D-01 - px( 8)= 0.11212409497044480168252045403791321D+00 - px( 9)= 0.16464732212808249809258249926002432D+00 - px(10)= 0.23962225781352520103022311097593359D+00 - px(11)= 0.34658518012264822513642471749928831D+00 - px(12)= 0.49892252217405192191396093341852149D+00 - px(13)= 0.71538674271382417280638826801508549D+00 - px(14)= 0.10222215706608203725632212510312547D+01 - px(15)= 0.14561846864798943048112298539958500D+01 - px(16)= 0.20689966860645877444413948503855558D+01 - px(17)= 0.29343325491322992822962883455667422D+01 - px(18)= 0.41600923418527605289681744860608923D+01 - px(19)= 0.59138510177367345119623187111536027D+01 - px(20)= 0.84900211059998571872908583995965399D+01 - px(21)= 0.12574850278864023350809778583936857D+02 - pw( 1)= 0.17852307235103793959959450796716884D-02 - pw( 2)= 0.42893635684453385047417019848134615D-02 - pw( 3)= 0.71497939239830042802199211798008382D-02 - pw( 4)= 0.10663735418026519276123538929713931D-01 - pw( 5)= 0.15278738558420813530874306732239620D-01 - pw( 6)= 0.21655078725685226823011775486802689D-01 - pw( 7)= 0.30722016510142553890174563226245394D-01 - pw( 8)= 0.43741403514742541630405757195910036D-01 - pw( 9)= 0.62420477800170703110133642765679935D-01 - pw(10)= 0.89101011801730193914410815797468243D-01 - pw(11)= 0.12703103910528559102170033988553577D+00 - pw(12)= 0.18073427743399167030424190804804963D+00 - pw(13)= 0.25651888703698665673377349665161239D+00 - pw(14)= 0.36320523524230891449268691236436560D+00 - pw(15)= 0.51323006313409951821184870857512202D+00 - pw(16)= 0.72448352105440485035350350605592673D+00 - pw(17)= 0.10237957354057707173113819962280747D+01 - pw(18)= 0.14547382360170350806458262214201030D+01 - pw(19)= 0.20988676887781728328899280598296916D+01 - pw(20)= 0.31515233601433750723678379202457850D+01 - pw(21)= 0.53731727475008532237843822389416189D+01 -endif -if(kn == 22) then - px( 1)= 0.66015487793114579672026064530843580D-03 - px( 2)= 0.35283188763257655749934705032913356D-02 - px( 3)= 0.89037268484852140335335192775132156D-02 - px( 4)= 0.17204887313904086178312640698870646D-01 - px( 5)= 0.29150571838447577747778236163957035D-01 - px( 6)= 0.45901615793714000736387715594203021D-01 - px( 7)= 0.69252127065257752538232461849428663D-01 - px( 8)= 0.10186530460793644200891559033979032D+00 - px( 9)= 0.14756646191534939234956638050262194D+00 - px(10)= 0.21173627302990231446602872154763540D+00 - px(11)= 0.30186392834135377102923595739844832D+00 - px(12)= 0.42832456502483048109845734510065518D+00 - px(13)= 0.60545815580026546162531862085612489D+00 - px(14)= 0.85305900599439063193443527117705937D+00 - px(15)= 0.11984444149249300732981072917233551D+01 - px(16)= 0.16793849380767164321266153811733636D+01 - px(17)= 0.23484259421990250059871794075320909D+01 - px(18)= 0.32797346839031417791643463913803933D+01 - px(19)= 0.45812624093642886254220734381797637D+01 - px(20)= 0.64202435545325521457505355440378480D+01 - px(21)= 0.90908861089411744472574137099194608D+01 - px(22)= 0.13281870386631548366752315803495071D+02 - pw( 1)= 0.16997739815079876291270142303147830D-02 - pw( 2)= 0.40716314869569174969368080298995427D-02 - pw( 3)= 0.67473810622389953786984914722964259D-02 - pw( 4)= 0.99723772588960829502246883384624672D-02 - pw( 5)= 0.14109253034463888804804922921072868D-01 - pw( 6)= 0.19687945390172770224722572299963933D-01 - pw( 7)= 0.27452171417575159428955066586690985D-01 - pw( 8)= 0.38405330139190366155267988240486796D-01 - pw( 9)= 0.53884692766803369761362767720533598D-01 - pw(10)= 0.75690346610129349832179626694927492D-01 - pw(11)= 0.10627642236395592176596973365055862D+00 - pw(12)= 0.14901040431853077087285757833822327D+00 - pw(13)= 0.20852197820697925399881980495903384D+00 - pw(14)= 0.29118491216948479495401093995994782D+00 - pw(15)= 0.40581134430662519733362245846705041D+00 - pw(16)= 0.56471875154845478179304492458155087D+00 - pw(17)= 0.78553695828755046999062010778990119D+00 - pw(18)= 0.10946963582465286266450172551190923D+01 - pw(19)= 0.15353099122697920414083249854348604D+01 - pw(20)= 0.21886787280255501097880613119022761D+01 - pw(21)= 0.32509064591655802312953073550183727D+01 - pw(22)= 0.54886295320492726768301302193581765D+01 -endif -if(kn == 23) then - px( 1)= 0.63021006363433841831474149369938629D-03 - px( 2)= 0.33638809698722088589479529444503357D-02 - px( 3)= 0.84677484260852652320615476282127309D-02 - px( 4)= 0.16299243886097943905725295278638059D-01 - px( 5)= 0.27464030423556247673984741054128575D-01 - px( 6)= 0.42927866788160825736932098072471507D-01 - px( 7)= 0.64169139230662564082303838471065529D-01 - px( 8)= 0.93367767657860048911427490092659179D-01 - px( 9)= 0.13363456957733254533882679272072078D+00 - px(10)= 0.18930799757692553449088970134122038D+00 - px(11)= 0.26636229253087365091814709204751323D+00 - px(12)= 0.37297580801326442775998852633706569D+00 - px(13)= 0.52031366792206514283148817023645741D+00 - px(14)= 0.72359600291057382682653618452186879D+00 - px(15)= 0.10035561979187412137041345167147008D+01 - px(16)= 0.13884527722049286720427614547615987D+01 - px(17)= 0.19169141925914051220031027488430615D+01 - px(18)= 0.26421501677657864516298531009004741D+01 - px(19)= 0.36386833380196488331409942198293287D+01 - px(20)= 0.50144408008467284169777642211293427D+01 - px(21)= 0.69363388633219643316723260573935269D+01 - px(22)= 0.96985139442985089392873665294609340D+01 - px(23)= 0.13992267642385099650757505185917464D+02 - pw( 1)= 0.16221873306891233611184498841470861D-02 - pw( 2)= 0.38756062368028647892929037543456805D-02 - pw( 3)= 0.63904681228155448198104664799116312D-02 - pw( 4)= 0.93716130533642383510190970859726025D-02 - pw( 5)= 0.13116359133127231514779092120465823D-01 - pw( 6)= 0.18054564644654175010476006376716577D-01 - pw( 7)= 0.24787545528949442975874855915817576D-01 - pw( 8)= 0.34123347100699979880664417166869481D-01 - pw( 9)= 0.47126644724531756094751076593005203D-01 - pw(10)= 0.65206569654682359265380728357008572D-01 - pw(11)= 0.90252198579742822099518198813744744D-01 - pw(12)= 0.12481812207643272140418893825548652D+00 - pw(13)= 0.17237035227520038668196439463328275D+00 - pw(14)= 0.23761696881618806722148067227743494D+00 - pw(15)= 0.32696691109252817236452507243569941D+00 - pw(16)= 0.44919654054160466696484020754729982D+00 - pw(17)= 0.61648810596906235080169489216474494D+00 - pw(18)= 0.84621818814094735945360350883771170D+00 - pw(19)= 0.11644590806092632658493343351752018D+01 - pw(20)= 0.16139465349437736570093587472680170D+01 - pw(21)= 0.22758365152455412423280162288521126D+01 - pw(22)= 0.33470981208584956190993329351906399D+01 - pw(23)= 0.56005084488693785143169408072395655D+01 -endif -if(kn == 24) then - px( 1)= 0.60287590220675965883655392550708199D-03 - px( 2)= 0.32143351024974124786544026559297415D-02 - px( 3)= 0.80739802149419819753399469823810803D-02 - px( 4)= 0.15489662347070758196056101724102825D-01 - px( 5)= 0.25976667055963881164895151817795199D-01 - px( 6)= 0.40346714011175627204924600258024553D-01 - px( 7)= 0.59830440001259694739569609351980817D-01 - px( 8)= 0.86230308264184025703796753562250373D-01 - px( 9)= 0.12210371973165121163051429982451776D+00 - px(10)= 0.17099203191550984584070639962768156D+00 - px(11)= 0.23772719534691312495205955287685848D+00 - px(12)= 0.32885366959546582684337055007155510D+00 - px(13)= 0.45320556860130100192674333119232671D+00 - px(14)= 0.62268761188094067470211791701931515D+00 - px(15)= 0.85332750333845602641945072572045710D+00 - px(16)= 0.11667004858019451293051164929872329D+01 - px(17)= 0.15918861333740258915421115101398109D+01 - px(18)= 0.21682356132478165458928387387641436D+01 - px(19)= 0.29494889123245918407695084553095216D+01 - px(20)= 0.40104161879498382932676206134824540D+01 - px(21)= 0.54588693247348625992863433295378994D+01 - px(22)= 0.74614789122023638202247166496000371D+01 - px(23)= 0.10312431148373425365575271866282924D+02 - px(24)= 0.14705805275028081231659123827294642D+02 - pw( 1)= 0.15514249847644583248110080072115634D-02 - pw( 2)= 0.36981205611282502501592938773909080D-02 - pw( 3)= 0.60714888966684178001398442739385021D-02 - pw( 4)= 0.88442862964874279166007983229029862D-02 - pw( 5)= 0.12262956109957475661400953934190561D-01 - pw( 6)= 0.16679687959122678642462992762851947D-01 - pw( 7)= 0.22585160392521754760445705422367202D-01 - pw( 8)= 0.30637073979795749450832614038637313D-01 - pw( 9)= 0.41694955770756579068288981525602823D-01 - pw(10)= 0.56880053645721037701497821632682136D-01 - pw(11)= 0.77671508906845583575064408093070968D-01 - pw(12)= 0.10604093207274572585305524523636662D+00 - pw(13)= 0.14462967696642788409146183249874326D+00 - pw(14)= 0.19698231180562617603349065754159954D+00 - pw(15)= 0.26786134570104304270817950330971854D+00 - pw(16)= 0.36368625547483729027913381140943346D+00 - pw(17)= 0.49317740444788268405884668163369343D+00 - pw(18)= 0.66837334895456593354822252636195142D+00 - pw(19)= 0.90640972513509685558875644594827397D+00 - pw(20)= 0.12330418540439459719440490731353612D+01 - pw(21)= 0.16907002256383886184087957665579435D+01 - pw(22)= 0.23604873140910405927871490349730829D+01 - pw(23)= 0.34403155641323731993571442081042100D+01 - pw(24)= 0.57090593461669661084141390492124683D+01 -endif -if(kn == 25) then - px( 1)= 0.57782426626576571901902524785967824D-03 - px( 2)= 0.30777191085612843075800020147563616D-02 - px( 3)= 0.77164086160794063370485387996841129D-02 - px( 4)= 0.14761056704626219657716466804157254D-01 - px( 5)= 0.24653875400014389680962462419049193D-01 - px( 6)= 0.38083523913823039614894844760656273D-01 - px( 7)= 0.56084067240729530165951958613013393D-01 - px( 8)= 0.80159727346580499135438064461924847D-01 - px( 9)= 0.11243339120217788148662085401543468D+00 - px(10)= 0.15582715928119113928223955420169306D+00 - px(11)= 0.21429698577498381337290048538797344D+00 - px(12)= 0.29315054947238081211765761052065623D+00 - px(13)= 0.39947904356246012931942329550682875D+00 - px(14)= 0.54273750890918728697108592749256418D+00 - px(15)= 0.73551917539468211697041027264605642D+00 - px(16)= 0.99458876650351865510403034983347981D+00 - px(17)= 0.13422725867611823095435983972364716D+01 - px(18)= 0.18083631354696402035562158086478313D+01 - px(19)= 0.24328170422327489041615980739248420D+01 - px(20)= 0.32697931199298648820663632485790012D+01 - px(21)= 0.43942247576362161609644700980578340D+01 - px(22)= 0.59138559751958628054368604601307713D+01 - px(23)= 0.79950700685028878658475844781145359D+01 - px(24)= 0.10932212901976999668719864628273880D+02 - px(25)= 0.15422270538187376197239585881075748D+02 - pw( 1)= 0.14866196466384881027031457863846341D-02 - pw( 2)= 0.35366057944120495831689126436344645D-02 - pw( 3)= 0.57844969402891304739165399375745324D-02 - pw( 4)= 0.83773253924994338826352544649181741D-02 - pw( 5)= 0.11521432119169876731207250699755894D-01 - pw( 6)= 0.15508119705558278186014992003540867D-01 - pw( 7)= 0.20741213416907048628693122469214068D-01 - pw( 8)= 0.27760968610727861118254402368893756D-01 - pw( 9)= 0.37269537287121025856202203197461521D-01 - pw(10)= 0.50172098341486540969354039874345299D-01 - pw(11)= 0.67645050740466106776500656782887390D-01 - pw(12)= 0.91234375023821592290664320838692913D-01 - pw(13)= 0.12298569754451901426257783074280505D+00 - pw(14)= 0.16561310646694880188580709432419945D+00 - pw(15)= 0.22272153335996509389159575477724314D+00 - pw(16)= 0.29910756989135904284632341805878551D+00 - pw(17)= 0.40118169825940023204127855629976398D+00 - pw(18)= 0.53759403804464314015326297165302822D+00 - pw(19)= 0.72023661788993173698912869326221741D+00 - pw(20)= 0.96601967120274407325842915516032939D+00 - pw(21)= 0.13004209336797472935438387369012951D+01 - pw(22)= 0.17656287674999046987390122872170472D+01 - pw(23)= 0.24427681629137658222037012630588612D+01 - pw(24)= 0.35307546103133160002458888441211917D+01 - pw(25)= 0.58145056190411423438373377369723833D+01 -endif -if(kn == 26) then - px( 1)= 0.55477997323631075943068059436768679D-03 - px( 2)= 0.29524051251057578595813790584093855D-02 - px( 3)= 0.73901342939831943165981049698675567D-02 - px( 4)= 0.14101412872584234971190262785177463D-01 - px( 5)= 0.23468728331528566192654626405819210D-01 - px( 6)= 0.36081354457771003437927577005080307D-01 - px( 7)= 0.52815777662090440120493976780463113D-01 - px( 8)= 0.74938112557816762151465822385814066D-01 - px( 9)= 0.10422602290395549848995179022541619D+00 - px(10)= 0.14311379843177524483131355370021475D+00 - px(11)= 0.19487526595566569453971371848061316D+00 - px(12)= 0.26386679946717607062454498658707156D+00 - px(13)= 0.35585463477981546153201478673425350D+00 - px(14)= 0.47845228003668789580973663677048048D+00 - px(15)= 0.64169971258466993649179145930192297D+00 - px(16)= 0.85882779202859317392945336605019422D+00 - px(17)= 0.11472706757497270472184438297710363D+01 - px(18)= 0.15300218181930435465993945481075142D+01 - px(19)= 0.20374900555366768284567833572011801D+01 - px(20)= 0.27101363667403762213767958798451042D+01 - px(21)= 0.36024461925895008266595796162422643D+01 - px(22)= 0.47894509506391986224214442346207227D+01 - px(23)= 0.63787680281602106661197309343576824D+01 - px(24)= 0.85365750410497181159653468391328011D+01 - px(25)= 0.11557476465733555694328533876083768D+02 - px(26)= 0.16141471440934226918800792463390524D+02 - pw( 1)= 0.14270457888511795867992646035078330D-02 - pw( 2)= 0.33889549852374609060800978480342579D-02 - pw( 3)= 0.55247507422590039192859514393700253D-02 - pw( 4)= 0.79606058991690688534072920870913751D-02 - pw( 5)= 0.10870893529433384055135393826453360D-01 - pw( 6)= 0.14498723203402237674680094273702632D-01 - pw( 7)= 0.19179116372035461554716799339102955D-01 - pw( 8)= 0.25359449796805531683017831774564858D-01 - pw( 9)= 0.33619093306389351199382063563600456D-01 - pw(10)= 0.44698102947503307879411438578940957D-01 - pw(11)= 0.59545498569788992362177569759542076D-01 - pw(12)= 0.79391390756669405807121619168425508D-01 - pw(13)= 0.10584364527942384492476511067473085D+00 - pw(14)= 0.14101237929374191599558996117239049D+00 - pw(15)= 0.18767097313836850144218059320809785D+00 - pw(16)= 0.24946862930962964921754344345139933D+00 - pw(17)= 0.33121906265157831801486301115689293D+00 - pw(18)= 0.43930860961154203611926221969681638D+00 - pw(19)= 0.58230770087411222173176311911927048D+00 - pw(20)= 0.77196313761963428481446391672957021D+00 - pw(21)= 0.10249770981257475300463739775919616D+01 - pw(22)= 0.13665866627564720110102420582560601D+01 - pw(23)= 0.18387931590238615303343058387625137D+01 - pw(24)= 0.25228070818980592228138174960180006D+01 - pw(25)= 0.36185923153701902774641344711352413D+01 - pw(26)= 0.59170478659334870019465678075549678D+01 -endif -if(kn == 27) then - px( 1)= 0.53351049035404632679792315747760926D-03 - px( 2)= 0.28370305792687798355464055618059207D-02 - px( 3)= 0.70911216690725917919709661777310467D-02 - px( 4)= 0.13501035871356777604646936827660401D-01 - px( 5)= 0.22399961695605187088994736400122990D-01 - px( 6)= 0.34296092152256244946377676950669096D-01 - px( 7)= 0.49938369643043464349412733250120371D-01 - px( 8)= 0.70401007218931052290887962696404315D-01 - px( 9)= 0.97184515439527403402719960641852663D-01 - px(10)= 0.13233419679650342626453438780924062D+00 - px(11)= 0.17858535768221430523992541052027664D+00 - px(12)= 0.23955186967002279067082055689473205D+00 - px(13)= 0.31997738343786115574032202180084953D+00 - px(14)= 0.42606917808870858677237819912563285D+00 - px(15)= 0.56593757336952698275709554352322422D+00 - px(16)= 0.75017087050170008688238981674255895D+00 - px(17)= 0.99258767627203407312800418206037430D+00 - px(18)= 0.13112276119862221456283706350994460D+01 - px(19)= 0.17296743945926671138206419268165977D+01 - px(20)= 0.22788674435500397192690477951071238D+01 - px(21)= 0.29996852862458953100640794763905730D+01 - px(22)= 0.39468640965897806748062604397538661D+01 - px(23)= 0.51954835049739316659023548298272390D+01 - px(24)= 0.68530258860308177417175245026171396D+01 - px(25)= 0.90855060116903171155308252608094055D+01 - px(26)= 0.12187875689542501045614051917426613D+02 - px(27)= 0.16863234023420842677139640506273211D+02 - pw( 1)= 0.13720916834443970883055143999864006D-02 - pw( 2)= 0.32534222826312859680330393960878445D-02 - pw( 3)= 0.52884203958454954233826913886323010D-02 - pw( 4)= 0.75861682815905379946822876423968075D-02 - pw( 5)= 0.10295285639112777079908697606376495D-01 - pw( 6)= 0.13620382459545697869019461721069283D-01 - pw( 7)= 0.17841523884178755603525268045244929D-01 - pw( 8)= 0.23331884086195134219312146516778989D-01 - pw( 9)= 0.30573638051336305998992920739412241D-01 - pw(10)= 0.40178513173563771124515253029295068D-01 - pw(11)= 0.52921980029762362141777841892525060D-01 - pw(12)= 0.69796141123370781915050562748149785D-01 - pw(13)= 0.92082178946617789793924526199756695D-01 - pw(14)= 0.12144364501581543284664137255263233D+00 - pw(15)= 0.16004546398125829773973180676428128D+00 - pw(16)= 0.21070793305215927164839501305159383D+00 - pw(17)= 0.27711057981819535787363684480367458D+00 - pw(18)= 0.36407033983503565406434335329539225D+00 - pw(19)= 0.47793795110360227457939282572737921D+00 - pw(20)= 0.62719851165950099532441318332470642D+00 - pw(21)= 0.82345785449976155240467438227528273D+00 - pw(22)= 0.10832282284259041619866566987699138D+01 - pw(23)= 0.14315401678117573067846471990768551D+01 - pw(24)= 0.19102558501900861570470238950653852D+01 - pw(25)= 0.26007234476536547511161089599236255D+01 - pw(26)= 0.37039892290540711004993184577181103D+01 - pw(27)= 0.60168669305235587630364842700538762D+01 -endif -if(kn == 28) then - px( 1)= 0.51381795837439443363513212680713947D-03 - px( 2)= 0.27304457410704199858650659828620367D-02 - px( 3)= 0.68160137769119765426643490207146340D-02 - px( 4)= 0.12952008943105054557760398423212956D-01 - px( 5)= 0.21430556623995591107026232001066913D-01 - px( 6)= 0.32693077435416129189765100617149847D-01 - px( 7)= 0.47384339361452651219408943088094832D-01 - px( 8)= 0.66422538658869906464895332297645726D-01 - px( 9)= 0.91083609265701992171879031038032271D-01 - px(10)= 0.12309931515680468843361103443044334D+00 - px(11)= 0.16477454361350713713658223760718724D+00 - px(12)= 0.21913585560877712250760133646822826D+00 - px(13)= 0.29012664768975497883815066595179815D+00 - px(14)= 0.38286487674840579443374001038670456D+00 - px(15)= 0.50398058179729621330814443966676445D+00 - px(16)= 0.66205451410726746024107068348273274D+00 - px(17)= 0.86818664253955900521872813145997494D+00 - px(18)= 0.11367350670636899400604869200366967D+01 - px(19)= 0.14862849200104679887431662518488448D+01 - px(20)= 0.19409402276831122951996033127610769D+01 - px(21)= 0.25320952507423009788362491203678215D+01 - px(22)= 0.33009718613493033820591483341014688D+01 - px(23)= 0.43024948141012255149771188379035923D+01 - px(24)= 0.56117545045133971321428621533897157D+01 - px(25)= 0.73360976078470116280524716610880778D+01 - px(26)= 0.96414187592042593009284671180300365D+01 - px(27)= 0.12823096392169689557014539066503465D+02 - px(28)= 0.17587400071042305461725873529323834D+02 - pw( 1)= 0.13212378315760757100801294124815478D-02 - pw( 2)= 0.31285477801160114283501913408130908D-02 - pw( 3)= 0.50723762615025144279692888527822532D-02 - pw( 4)= 0.72476698125814896247817149986171532D-02 - pw( 5)= 0.97820955598317266921336611328577874D-02 - pw( 6)= 0.12849233584715652412922486858932270D-01 - pw( 7)= 0.16684914458309663575212478712400069D-01 - pw( 8)= 0.21602517467719514930691067702478109D-01 - pw( 9)= 0.28006326728639319667000527469785398D-01 - pw(10)= 0.36406794940307839233444412699072219D-01 - pw(11)= 0.47444873633013210207704765440613480D-01 - pw(12)= 0.61930727493054842346818886062879310D-01 - pw(13)= 0.80898273560459582900261577117872215D-01 - pw(14)= 0.10567594748292560400210313567244803D+00 - pw(15)= 0.13797620661979101239766714101470269D+00 - pw(16)= 0.18000947429659496345933986602370689D+00 - pw(17)= 0.23463186153608726981379368239281040D+00 - pw(18)= 0.30554132794139847744947603213040709D+00 - pw(19)= 0.39754682244155206328794798554812257D+00 - pw(20)= 0.51695511158335980560554454196568031D+00 - pw(21)= 0.67216328370835447637864341433166332D+00 - pw(22)= 0.87464250694849374310652866430377750D+00 - pw(23)= 0.11407332853853718344895028360165344D+01 - pw(24)= 0.14952907674277354153396565164371127D+01 - pw(25)= 0.19800794802566744638917545875047599D+01 - pw(26)= 0.26766284602650483548086237399782999D+01 - pw(27)= 0.37870913379223214170502998382833988D+01 - pw(28)= 0.61141264493435815963177300026057897D+01 -endif -if(kn == 29) then - px( 1)= 0.49553294523216557706121043094899937D-03 - px( 2)= 0.26316733478032430241054271443133571D-02 - px( 3)= 0.65619932921719095950945647167842097D-02 - px( 4)= 0.12447798146998465679635850497317547D-01 - px( 5)= 0.20546725218853503579585565380355127D-01 - px( 6)= 0.31244723477287375693064121565353181D-01 - px( 7)= 0.45100744250336127000529420647899915D-01 - px( 8)= 0.62905104337181109718071876082818174D-01 - px( 9)= 0.85750236217882191338049895922209411D-01 - px(10)= 0.11511276616778913843095284495474961D+00 - px(11)= 0.15294975780371902992191410154541288D+00 - px(12)= 0.20181754709239365590998602589064153D+00 - px(13)= 0.26502522465295890912360438565915637D+00 - px(14)= 0.34683572458174845871862351357320832D+00 - px(15)= 0.45272795494425557401215215391241853D+00 - px(16)= 0.58973560996706034541847942850272405D+00 - px(17)= 0.76688297098562182278932098742313991D+00 - px(18)= 0.99574549679776398032257261690761656D+00 - px(19)= 0.12911746195797983348858102746352939D+01 - px(20)= 0.16722452518287396697078251913438203D+01 - px(21)= 0.21635184617418031866723236843209620D+01 - px(22)= 0.27967767483991669703409180904294707D+01 - px(23)= 0.36135221397956750546277975652240522D+01 - px(24)= 0.46688173594398401975825823496477303D+01 - px(25)= 0.60377360285854709978780788792051344D+01 - px(26)= 0.78274940573494599108741305430804683D+01 - px(27)= 0.10203907613754336363839677845176062D+02 - px(28)= 0.13462852450631673420328416214301979D+02 - px(29)= 0.18313825184349771602172383716912527D+02 - pw( 1)= 0.12740401404642070584625642978316674D-02 - pw( 2)= 0.30131005567070877653374960845857871D-02 - pw( 3)= 0.48740340828768558996439695359657849D-02 - pw( 4)= 0.69399937174499322748248258405693356D-02 - pw( 5)= 0.93214421069590292260830707831625273D-02 - pw( 6)= 0.12166736320428934613437189621950636D-01 - pw( 7)= 0.15675841191975760517355581582896129D-01 - pw( 8)= 0.20113588991116143831392394792507242D-01 - pw( 9)= 0.25821196242227502811986031677074846D-01 - pw(10)= 0.33228090412405890048263783552877789D-01 - pw(11)= 0.42869421776728397919368187941044920D-01 - pw(12)= 0.55414351585478714766063918835679369D-01 - pw(13)= 0.71707159949828379722440698604318357D-01 - pw(14)= 0.92821391683596645191185627202531776D-01 - pw(15)= 0.12012815275736363503642593112420207D+00 - pw(16)= 0.15538195214533728663259981412132171D+00 - pw(17)= 0.20083006704520997993532092391882379D+00 - pw(18)= 0.25935464058941676571399243604292685D+00 - pw(19)= 0.33466206352796625147483719389400201D+00 - pw(20)= 0.43154442816926686793937626854331538D+00 - pw(21)= 0.55625869020489676787739935371459343D+00 - pw(22)= 0.71711352998990444136642620781136560D+00 - pw(23)= 0.92545309501307835297306650109719589D+00 - pw(24)= 0.11974638996261713170731294345952752D+01 - pw(25)= 0.15578539402639460936307650070163272D+01 - pw(26)= 0.20483259849497670771611527406588667D+01 - pw(27)= 0.27506256539950821112799212513803514D+01 - pw(28)= 0.38680317406459942528573299847997115D+01 - pw(29)= 0.62089749987608497692655462325987093D+01 -endif -if(kn == 30) then - px( 1)= 0.47850950616663001797979108558563715D-03 - px( 2)= 0.25398771556977649765987433184628366D-02 - px( 3)= 0.63266767276005636636918971079049107D-02 - px( 4)= 0.11982958698830863481679603697851263D-01 - px( 5)= 0.19737172990592845187334346934200923D-01 - px( 6)= 0.29928809539955562795388150514477224D-01 - px( 7)= 0.43045554614365273736449489448924963D-01 - px( 8)= 0.59772102008795894644542468648119854D-01 - px( 9)= 0.81049794509547598102960610576252333D-01 - px(10)= 0.10814588049834038786751975710018494D+00 - px(11)= 0.14273357759635461536908362003919156D+00 - px(12)= 0.18698857852398412902900671067259807D+00 - px(13)= 0.24371127323293364985980734512310093D+00 - px(14)= 0.31648527240822183351218895583590417D+00 - px(15)= 0.40988303161683259239808922937843934D+00 - px(16)= 0.52973043830387724896707606257507982D+00 - px(17)= 0.68344507067026327204085106743008657D+00 - px(18)= 0.88046769972296521320190133680773247D+00 - px(19)= 0.11328140045719997686900182451437385D+01 - px(20)= 0.14557849987988940136735253178131959D+01 - px(21)= 0.18688939211845819607060281336643708D+01 - px(22)= 0.23971019483057447245002052718973892D+01 - px(23)= 0.30725214791534315669485745292475362D+01 - px(24)= 0.39368810885201871521056281051541589D+01 - px(25)= 0.50453405183264296277555982679398449D+01 - px(26)= 0.64729369863961542161639397127927023D+01 - px(27)= 0.83267646024511909496921957165518850D+01 - px(28)= 0.10772601108679545696397581453580679D+02 - px(29)= 0.14106882472397927743465848536293191D+02 - px(30)= 0.19042377139722396576925840568797290D+02 - pw( 1)= 0.12301166672602858132749403305585138D-02 - pw( 2)= 0.29060349388917145213464128010319648D-02 - pw( 3)= 0.46912396989831005493100286944698875D-02 - pw( 4)= 0.66589658633863613568668981891460303D-02 - pw( 5)= 0.89054272701749525193189873717149483D-02 - pw( 6)= 0.11558310934603853176421212815818196D-01 - pw( 7)= 0.14788296604172157875123267577237353D-01 - pw( 8)= 0.18820536178193913710320280585533146D-01 - pw( 9)= 0.23944736371120032144351948458331453D-01 - pw(10)= 0.30524720054213913983988370110313231D-01 - pw(11)= 0.39011278230203159299747340391501401D-01 - pw(12)= 0.49962838489107032100208706917355628D-01 - pw(13)= 0.64076419786867156599743093793566683D-01 - pw(14)= 0.82229278957901320956487642041730955D-01 - pw(15)= 0.10553162871986467114052832148689872D+00 - pw(16)= 0.13539231869402525697097331669289294D+00 - pw(17)= 0.17360130147051396517985693386136263D+00 - pw(18)= 0.22243486135302704942601690670544171D+00 - pw(19)= 0.28479267765730766074850970825016903D+00 - pw(20)= 0.36438127580923131688892141299329716D+00 - pw(21)= 0.46596902265107476258412993277856600D+00 - pw(22)= 0.59575928418635779724375261556025579D+00 - pw(23)= 0.76197365269330802425420319974692784D+00 - pw(24)= 0.97583770654840390135150485526335360D+00 - pw(25)= 0.12534009762825410047987823857212526D+01 - pw(26)= 0.16192497318165861534514297610388291D+01 - pw(27)= 0.21150559758743170143427430064927639D+01 - pw(28)= 0.28228114202692982120171862876336191D+01 - pw(29)= 0.39469320964297990895048746179623006D+01 - pw(30)= 0.63015479148815916165232775496615043D+01 -endif -if(kn == 31) then - px( 1)= 0.46262124155980990900658158435387435D-03 - px( 2)= 0.24543371823976573134043990222488764D-02 - px( 3)= 0.61080328529962740945375030974273550D-02 - px( 4)= 0.11552913696384311289392080456928671D-01 - px( 5)= 0.18992554406021427042667932899509712D-01 - px( 6)= 0.28727239767175649142836330874634688D-01 - px( 7)= 0.41185025043396586435794925588583817D-01 - px( 8)= 0.56962729337650718045473296117050346D-01 - px( 9)= 0.76876408592321896648696565625767740D-01 - px(10)= 0.10202017613750426995368814592543654D+00 - px(11)= 0.13383358226439616664891093380483172D+00 - px(12)= 0.17418111785696519564699242954723688D+00 - px(13)= 0.22545079288954335283945305080525555D+00 - px(14)= 0.29068039334037578871072959727864135D+00 - px(15)= 0.37372028651206644042103206921271373D+00 - px(16)= 0.47944207171169672906363477630137984D+00 - px(17)= 0.61400401892256187595697112035275391D+00 - px(18)= 0.78518739405456531463788153132800130D+00 - px(19)= 0.10028226298243299869022545200635749D+01 - px(20)= 0.12793315955946422764804637028085576D+01 - px(21)= 0.16304237404723823714635864157191701D+01 - px(22)= 0.20760033968995280036753528016386266D+01 - px(23)= 0.26413808369702250796358745676110842D+01 - px(24)= 0.33589474379157213932868619266312477D+01 - px(25)= 0.42706130083047912404753160217494471D+01 - px(26)= 0.54316014237021642862351869609909389D+01 - px(27)= 0.69169001596879784406900118967237735D+01 - px(28)= 0.88334933035422052410960663437339313D+01 - px(29)= 0.11347158219793510037091924273351551D+02 - px(30)= 0.14754946949122040546764367410101515D+02 - px(31)= 0.19772934489331283651619534072783573D+02 - pw( 1)= 0.11891370745192323541501363233610476D-02 - pw( 2)= 0.28064565117632969596616229162402416D-02 - pw( 3)= 0.45221820039177658202628966334564400D-02 - pw( 4)= 0.64011462699419650278998414745094563D-02 - pw( 5)= 0.85276672843451997393681235521893987D-02 - pw( 6)= 0.11012361472143567538937306026897082D-01 - pw( 7)= 0.14001834101185433452944676115710984D-01 - pw( 8)= 0.17688602970920088694526112750101821D-01 - pw( 9)= 0.22319993264776239711478935025076064D-01 - pw(10)= 0.28206159264133299993412221361362783D-01 - pw(11)= 0.35729779624362463483758438125027756D-01 - pw(12)= 0.45361209930192015109636119041934318D-01 - pw(13)= 0.57681745959882977997939097436077625D-01 - pw(14)= 0.73415730766069123013128503531856238D-01 - pw(15)= 0.93471597750154348901340915093546613D-01 - pw(16)= 0.11899278980540690788248491553604027D+00 - pw(17)= 0.15142094037875549556146133150023155D+00 - pw(18)= 0.19257526453331204560446543583285304D+00 - pw(19)= 0.24475404687248924789069417791551804D+00 - pw(20)= 0.31086721331790994056051025981812504D+00 - pw(21)= 0.39461463811187169999440896691515521D+00 - pw(22)= 0.50073578853625257469356342726971100D+00 - pw(23)= 0.63537831862561035023942323431330555D+00 - pw(24)= 0.80667931860754124288487929852367162D+00 - pw(25)= 0.10257546571965496140352844249415068D+01 - pw(26)= 0.13085329420794059702124934248171024D+01 - pw(27)= 0.16795015057251400866456256943483557D+01 - pw(28)= 0.21803283205051475095028864283497279D+01 - pw(29)= 0.28932755231818831561701787327064654D+01 - pw(30)= 0.40239038808605675586573707403263756D+01 - pw(31)= 0.63919688446180182394784276398611383D+01 -endif -if(kn == 32) then - px( 1)= 0.44775812457284018024389459757135253D-03 - px( 2)= 0.23744300256081390660731331947131448D-02 - px( 3)= 0.59043190530701947087959015074527131D-02 - px( 4)= 0.11153785163059738085602585751236982D-01 - px( 5)= 0.18305065328015338438613004118489460D-01 - px( 6)= 0.27625127970368047372371104083117018D-01 - px( 7)= 0.39491775186977265606198670142306198D-01 - px( 8)= 0.54428213199640073895058009136940445D-01 - px( 9)= 0.73145916598392207837439901430911881D-01 - px(10)= 0.96594842317146387626737019235932073D-01 - px(11)= 0.12602067507135613816122919853447211D+00 - px(12)= 0.16303119243744080803051027229475087D+00 - px(13)= 0.20967680096838206807696535069330795D+00 - px(14)= 0.26855214787001775388966483767701730D+00 - px(15)= 0.34292617657317298468348748407414908D+00 - px(16)= 0.43690813207789007326976756359678361D+00 - px(17)= 0.55565789063347173772943038765658058D+00 - px(18)= 0.70565099753808518503172131364185913D+00 - px(19)= 0.89501204411614229330337167600565325D+00 - px(20)= 0.11339348063954267650829490706793417D+01 - px(21)= 0.14352147947875517113501672147412666D+01 - px(22)= 0.18149314500489324404656062560197686D+01 - px(23)= 0.22933370525154514327968223407435420D+01 - px(24)= 0.28960454333191297694038803298227801D+01 - px(25)= 0.36556826421582258456995650363192002D+01 - px(26)= 0.46143015669042314317688786912840126D+01 - px(27)= 0.58271640488452237241101113536108246D+01 - px(28)= 0.73691994618471152327249495515078891D+01 - px(29)= 0.93472955333283431319402613994267131D+01 - px(30)= 0.11927265101386731589772177284607045D+02 - px(31)= 0.15406825810545224143258580731663182D+02 - px(32)= 0.20505385359315843909738804845728816D+02 - pw( 1)= 0.11508141693310289431516002902194819D-02 - pw( 2)= 0.27135954170378174829279299706770743D-02 - pw( 3)= 0.43653263744209382080072687149869796D-02 - pw( 4)= 0.61636735125379730211346708661816263D-02 - pw( 5)= 0.81829488620066496094938528145972129D-02 - pw( 6)= 0.10519566782024470065554644097490174D-01 - pw( 7)= 0.13300211432891686360240172032295580D-01 - pw( 8)= 0.16690405167066082789755182863245534D-01 - pw( 9)= 0.20902378399561394483383583702025306D-01 - pw(10)= 0.26201993681677797113818409392334214D-01 - pw(11)= 0.32916309597556149563466321558626500D-01 - pw(12)= 0.41444783686249708806280965671571355D-01 - pw(13)= 0.52276724501596285458733044616038927D-01 - pw(14)= 0.66016045915514602434595691235181054D-01 - pw(15)= 0.83413404413409668999356206612085646D-01 - pw(16)= 0.10540610055778962043592758135111163D+00 - pw(17)= 0.13316716078519390466674733202657980D+00 - pw(18)= 0.16816620873421840783275153829642565D+00 - pw(19)= 0.21224605690896243222098764241728942D+00 - pw(20)= 0.26772081279710238967904075331055395D+00 - pw(21)= 0.33750446543867324459731453510204593D+00 - pw(22)= 0.42528477444911122706373139673310844D+00 - pw(23)= 0.53576855465657922120503664249148172D+00 - pw(24)= 0.67504694124786279657121038348068956D+00 - pw(25)= 0.85117601343595945001255999545786031D+00 - pw(26)= 0.10751709034800588277387396074134080D+01 - pw(27)= 0.13628543051087082785935353862196062D+01 - pw(28)= 0.17386349660199069072599128814460612D+01 - pw(29)= 0.22441998697757930470131299950344772D+01 - pw(30)= 0.29621015955004271568195551910771395D+01 - pw(31)= 0.40990494779803030065007758020292907D+01 - pw(32)= 0.64803510743879183341918928981358705D+01 -endif -if(kn == 33) then - px( 1)= 0.43382392840086627333673371770410791D-03 - px( 2)= 0.22996130762133902306219348170634844D-02 - px( 3)= 0.57140311482877757226936621231899489D-02 - px( 4)= 0.10782263460575904849778834680881905D-01 - px( 5)= 0.17668133941325186930637700801463696D-01 - px( 6)= 0.26610114005745400437311454869685139D-01 - px( 7)= 0.37943370827578419250767127256762961D-01 - px( 8)= 0.52129041240975080218154665697626616D-01 - px( 9)= 0.69790754616119288011923380403081974D-01 - px(10)= 0.91757677110053287920628073294896820D-01 - px(11)= 0.11911351898045213836878312985804824D+00 - px(12)= 0.15325258383592569039382713986292282D+00 - px(13)= 0.19594640045311118270956256575773479D+00 - px(14)= 0.24942637714448661140410427601352782D+00 - px(15)= 0.31648861015431273270737411206235176D+00 - px(16)= 0.40062704984338027141229486836727772D+00 - px(17)= 0.50620162303052368324000671362943743D+00 - px(18)= 0.63864913663861731335041615079256755D+00 - px(19)= 0.80474696130540033570426941368276095D+00 - px(20)= 0.10129427268888740212312948144945017D+01 - px(21)= 0.12737679814899102853785194013828910D+01 - px(22)= 0.16003609719649203492661497770183358D+01 - px(23)= 0.20091354103290292174609108669156369D+01 - px(24)= 0.25206522780436439397099624663858851D+01 - px(25)= 0.31607884518783084627961967518655955D+01 - px(26)= 0.39623662204110196024719874486085284D+01 - px(27)= 0.49675495548589556752790709024651368D+01 - px(28)= 0.62316176745314993859698590995521575D+01 - px(29)= 0.78294374119411180631665027515007813D+01 - px(30)= 0.98678149766403120157427026982285647D+01 - px(31)= 0.12512632243648644312040507648114218D+02 - px(32)= 0.16062316312799007384705365546800182D+02 - px(33)= 0.21239626413140571003413447643564263D+02 - pw( 1)= 0.11148970595521907012019732812607055D-02 - pw( 2)= 0.26267851653678976569258782421234610D-02 - pw( 3)= 0.42193631422457547168621098688321193D-02 - pw( 4)= 0.59441470797152969047545135707624789D-02 - pw( 5)= 0.78669738899974671911070000143132306D-02 - pw( 6)= 0.10072359597590157564525137294462730D-01 - pw( 7)= 0.12670399361862170180506056379944600D-01 - pw( 8)= 0.15804160264438711138334353671976234D-01 - pw( 9)= 0.19656646222033544724001127769742948D-01 - pw(10)= 0.24456891716604317647752096255377977D-01 - pw(11)= 0.30486078976025291042502816433302855D-01 - pw(12)= 0.38085942822052281180984432235900915D-01 - pw(13)= 0.47671859587250413816889365465789990D-01 - pw(14)= 0.59751905747580346721160295363733596D-01 - pw(15)= 0.74952101551419066714546625345670463D-01 - pw(16)= 0.94047940645329027189027157260871966D-01 - pw(17)= 0.11800297644299036213439491803576584D+00 - pw(18)= 0.14801615944289654934244517014490498D+00 - pw(19)= 0.18558059212548173745682851099099352D+00 - pw(20)= 0.23255756871893273043599614945440298D+00 - pw(21)= 0.29127163202367449192902938057774536D+00 - pw(22)= 0.36463564879320343679994202632934432D+00 - pw(23)= 0.45632094652154984485718193074718634D+00 - pw(24)= 0.57099911532700834474505095568484057D+00 - pw(25)= 0.71470499479041253227766745277939393D+00 - pw(26)= 0.89541776346343118676696712049315123D+00 - pw(27)= 0.11240606917291058823868093321319567D+01 - pw(28)= 0.14163644716761897658439794410119483D+01 - pw(29)= 0.17966773927205272792519385809395869D+01 - pw(30)= 0.23067252939728463992747791116176630D+01 - pw(31)= 0.30293676082841986853392483236331039D+01 - pw(32)= 0.41724631327442166841549815997399232D+01 - pw(33)= 0.65667986737889171343050053820031099D+01 -endif -if(kn == 34) then - px( 1)= 0.42073412472404638893756652613349609D-03 - px( 2)= 0.22294117501223585040813842121338791D-02 - px( 3)= 0.55358634424631592178894736933123541D-02 - px( 4)= 0.10435505221909818911938312253494801D-01 - px( 5)= 0.17076183522530106340715524871890215D-01 - px( 6)= 0.25671846892716699934599581849440412D-01 - px( 7)= 0.36521262393930458152775347026295878D-01 - px( 8)= 0.50032905506982571537052624490236219D-01 - px( 9)= 0.66756179329851771946428938115562858D-01 - px(10)= 0.87418440517600995276435533200107158D-01 - px(11)= 0.11296720871152482793171861979004167D+00 - px(12)= 0.14461798209060507185491668160548863D+00 - px(13)= 0.18391003445626398421131159916752177D+00 - px(14)= 0.23277438392452600064632190536358711D+00 - px(15)= 0.29361900416298607661742971283165405D+00 - px(16)= 0.36943648509469646191303516064166821D+00 - px(17)= 0.46393948817397095233405427748578045D+00 - px(18)= 0.58173003909628716647282591793935082D+00 - px(19)= 0.72851013492593350843955115700236997D+00 - px(20)= 0.91134335818633179015533214138153748D+00 - px(21)= 0.11389803746065945729570613409749246D+01 - px(22)= 0.14222658613929989350760708533653619D+01 - px(23)= 0.17746516415212834951524711400325322D+01 - px(24)= 0.22128526659161258391280177907354742D+01 - px(25)= 0.27577030475922356628315880749135163D+01 - px(26)= 0.34353067716163467499117583277012187D+01 - px(27)= 0.42786491225875840394307069342936437D+01 - px(28)= 0.53299784436903981664272138662385839D+01 - px(29)= 0.66445753694632339412682622877981687D+01 - px(30)= 0.82972428161435530717136911066799313D+01 - px(31)= 0.10394720964339982803669830220514570D+02 - px(32)= 0.13102991988894450504911092547924425D+02 - px(33)= 0.16721231207604090231249152699717947D+02 - px(34)= 0.21975561953387058269749184334695681D+02 - pw( 1)= 0.10811655766145357018010507192110778D-02 - pw( 2)= 0.25454456692504695230374260776656758D-02 - pw( 3)= 0.40831672721796992810291858236827121D-02 - pw( 4)= 0.57405373464649057612542381878716024D-02 - pw( 5)= 0.75761674957710259616551754950037804D-02 - pw( 6)= 0.96645393330211172110854747921485211D-02 - pw( 7)= 0.12101849132180839391980047456808267D-01 - pw( 8)= 0.15012385166203703798856896165367048D-01 - pw( 9)= 0.18554685664439102794960148200939741D-01 - pw(10)= 0.22926965460919156862511787954586100D-01 - pw(11)= 0.28372234446912034618472293586026471D-01 - pw(12)= 0.35184737546237040728179133891634893D-01 - pw(13)= 0.43719795721391424383284929004984363D-01 - pw(14)= 0.54408450285118084483187831793976080D-01 - pw(15)= 0.67777311678116535143958157494852949D-01 - pw(16)= 0.84473623053475603301350103260219498D-01 - pw(17)= 0.10529589998535694380806537413267952D+00 - pw(18)= 0.13123120457466970233581600829200986D+00 - pw(19)= 0.16350086670529523991639926152378893D+00 - pw(20)= 0.20361729924366521691008806803033027D+00 - pw(21)= 0.25345571161549014721855872141678839D+00 - pw(22)= 0.31534642949457072923890964012293452D+00 - pw(23)= 0.39219690177745437575400107500840528D+00 - pw(24)= 0.48765869057533013816866773942301700D+00 - pw(25)= 0.60636656060951114856441189594293935D+00 - pw(26)= 0.75430007270808930870580918261471173D+00 - pw(27)= 0.93936601064002840188234207354112438D+00 - pw(28)= 0.11724044094922051599035401593357573D+01 - pw(29)= 0.14690667743714044794229623979582298D+01 - pw(30)= 0.18536570456604450828475022635489700D+01 - pw(31)= 0.23679569977291335149484204767547713D+01 - pw(32)= 0.30951463106197815052582135293694448D+01 - pw(33)= 0.42442317841513325464351782732665369D+01 - pw(34)= 0.66514074844570206292427905989816795D+01 -endif -if(kn == 35) then - px( 1)= 0.40841415548604021515920964002399699D-03 - px( 2)= 0.21634090823285078411638264192646176D-02 - px( 3)= 0.53686766256969092483856645653707325D-02 - px( 4)= 0.10111052749147679432588829471203234D-01 - px( 5)= 0.16524448303260385083329478751404387D-01 - px( 6)= 0.24801589512429266354562963867459375D-01 - px( 7)= 0.35209981971294629902224557027954734D-01 - px( 8)= 0.48113158246267914322027983645621137D-01 - px( 9)= 0.63997447579595411241665965285069235D-01 - px(10)= 0.83503921106837065850088749934585509D-01 - px(11)= 0.10746492183189159858451002479100191D+00 - px(12)= 0.13694520147247420040072594436271529D+00 - px(13)= 0.17328916996785784591140856733770825D+00 - px(14)= 0.21817740121438829093802932764502942D+00 - px(15)= 0.27369653832797779961249433698659440D+00 - px(16)= 0.34242699906149186851837117836266744D+00 - px(17)= 0.42755291549796611179923030387067599D+00 - px(18)= 0.53299909554182158428637150014155340D+00 - px(19)= 0.66360070964681593124502281725999578D+00 - px(20)= 0.82531292882837242125696121653082133D+00 - px(21)= 0.10254699397214209538105608220109125D+01 - px(22)= 0.12731058972360937198805843671213415D+01 - px(23)= 0.15793550240911338542780101952325349D+01 - px(24)= 0.19579553515472245197868811537799063D+01 - px(25)= 0.24258926480980508600328748991363509D+01 - px(26)= 0.30042420249956940401285542677229005D+01 - px(27)= 0.37193027838966272865717795755051677D+01 - px(28)= 0.46041945352246751335584778757254638D+01 - px(29)= 0.57012278072022800341169042054436900D+01 - px(30)= 0.70656725104389005421933634969655744D+01 - px(31)= 0.87722686454754899579881428713693370D+01 - px(32)= 0.10927706100825575628425432470397431D+02 - px(33)= 0.13698096354347216687650292593862930D+02 - px(34)= 0.17383397148588794631800252981912492D+02 - px(35)= 0.22713103140185533251728138884147593D+02 - pw( 1)= 0.10494256988526777696106600190246748D-02 - pw( 2)= 0.24690695403322712989700976425347600D-02 - pw( 3)= 0.39557664886781607598248979520245308D-02 - pw( 4)= 0.55511159047852988512979064225320046D-02 - pw( 5)= 0.73075320876513631863948214849392405D-02 - pw( 6)= 0.92909810870320629850049703997767543D-02 - pw( 7)= 0.11585945487945439996692114097667348D-01 - pw( 8)= 0.14300927726198362348901143963929085D-01 - pw( 9)= 0.17573886933485282780152536853558582D-01 - pw(10)= 0.21577101857175674214083208317224449D-01 - pw(11)= 0.26521578525384159056539231969282277D-01 - pw(12)= 0.32662117530178998186687978899517756D-01 - pw(13)= 0.40304760583700356024679394121082348D-01 - pw(14)= 0.49818024406493848323885700890345396D-01 - pw(15)= 0.61648498278387927702353594142316029D-01 - pw(16)= 0.76340842519971003579540175209086024D-01 - pw(17)= 0.94562313841812711417919902804637839D-01 - pw(18)= 0.11713243611448668905011510723033361D+00 - pw(19)= 0.14505903302187588429490257418340878D+00 - pw(20)= 0.17958246056420660249030476513346108D+00 - pw(21)= 0.22223064193925674596660827103307295D+00 - pw(22)= 0.27488866495140173131704069241445894D+00 - pw(23)= 0.33988865913831065785773004058455433D+00 - pw(24)= 0.42012914561889293897263514396102618D+00 - pw(25)= 0.51923942630667304009247143973261919D+00 - pw(26)= 0.64181663147268449329914151912099321D+00 - pw(27)= 0.79378665914719288398623606417929212D+00 - pw(28)= 0.98298862630823929161929765759110274D+00 - pw(29)= 0.12201876100804632086249205242578178D+01 - pw(30)= 0.15209676736777719585360185637783971D+01 - pw(31)= 0.19096027011177832508222236236206329D+01 - pw(32)= 0.24279450923678125311674033890233300D+01 - pw(33)= 0.31595056381881229211813965570928908D+01 - pw(34)= 0.43144357961085612515538420099471387D+01 - pw(35)= 0.67342659787038143488625655899472123D+01 -endif -if(kn == 36) then - px( 1)= 0.39679800273035937692456569764529929D-03 - px( 2)= 0.21012371857514574429814634636370868D-02 - px( 3)= 0.52114717733736355471865173697424917D-02 - px( 4)= 0.98067697534794548776488857189272695D-02 - px( 5)= 0.16008829048610610861085787414831567D-01 - px( 6)= 0.23991913025422802197171963117636148D-01 - px( 7)= 0.33996529414545629919769339896568646D-01 - px( 8)= 0.46347640349961743012995653495638967D-01 - px( 9)= 0.61477688475695067453069318914244194D-01 - px(10)= 0.79954234262530625440149978974196142D-01 - px(11)= 0.10251169364078550312390054066216880D+00 - px(12)= 0.13008697511618887542147079025562156D+00 - px(13)= 0.16385989560749921185926347303565033D+00 - px(14)= 0.20530066324396880599546126878370330D+00 - px(15)= 0.25622775706568656239933185280550197D+00 - px(16)= 0.31887992047473434247218676095668498D+00 - px(17)= 0.39600601270766777602081700578327931D+00 - px(18)= 0.49097660869784478263571961512354166D+00 - px(19)= 0.60792179107232897416464078744063313D+00 - px(20)= 0.75190061348713360022081088193051979D+00 - px(21)= 0.92910925563205451252801298079690580D+00 - px(22)= 0.11471370551908310980346755006364606D+01 - px(23)= 0.14152827011474745738425470935393175D+01 - px(24)= 0.17449475260933662620173991491940897D+01 - px(25)= 0.21501302020535220058465237512287759D+01 - px(26)= 0.26480593984665582591226977578025315D+01 - px(27)= 0.32600222787200297912657865580064671D+01 - px(28)= 0.40124854077257132134482639353308378D+01 - px(29)= 0.49386780680939238173283121803851184D+01 - px(30)= 0.60809546519546778854776584514884910D+01 - px(31)= 0.74945653594942201002132544001003708D+01 - px(32)= 0.92541900969385753944715137178141946D+01 - px(33)= 0.11466484149563363201326491294746713D+02 - px(34)= 0.14297715117728357842590639672319382D+02 - px(35)= 0.18048653298710078396919876650411551D+02 - px(36)= 0.23452167308415151157069277254133065D+02 - pw( 1)= 0.10195057714700151361278566695804673D-02 - pw( 2)= 0.23972109360881477362914694357034945D-02 - pw( 3)= 0.38363158447342177123425836110343923D-02 - pw( 4)= 0.53744010839767438647013296257593752D-02 - pw( 5)= 0.70585351522933460613686733697482296D-02 - pw( 6)= 0.89474146486528988019752703037120466D-02 - pw( 7)= 0.11115594177177423398237576162176609D-01 - pw( 8)= 0.13658239363054657454269471936854599D-01 - pw( 9)= 0.16695920500566358906782047930663171D-01 - pw(10)= 0.20378981713905551927970998274789754D-01 - pw(11)= 0.24891420496520210335919294073059272D-01 - pw(12)= 0.30454995468399097263969267647541847D-01 - pw(13)= 0.37334924544188163864246559109559135D-01 - pw(14)= 0.45848499525488242728225859778156080D-01 - pw(15)= 0.56377323577091624057815057252308315D-01 - pw(16)= 0.69383296835632602012769529156119272D-01 - pw(17)= 0.85428366438632838815674340562473751D-01 - pw(18)= 0.10519836473842813187954075539197942D+00 - pw(19)= 0.12953172788969281249628541902884322D+00 - pw(20)= 0.15945437502320699870857044488189859D+00 - pw(21)= 0.19622256830996434214924230362068723D+00 - pw(22)= 0.24137631349243177937384183525770442D+00 - pw(23)= 0.29680703930305420431908018221051720D+00 - pw(24)= 0.36484530976842630509122266207581834D+00 - pw(25)= 0.44837789623740881716704249442275046D+00 - pw(26)= 0.55101005426642996452229825498001978D+00 - pw(27)= 0.67730110868383751051906031936646973D+00 - pw(28)= 0.83312535104283525888024444527691004D+00 - pw(29)= 0.10262590489160128936079715697075387D+01 - pw(30)= 0.12674001847129782122095271537034034D+01 - pw(31)= 0.15720761022006313395632370048802129D+01 - pw(32)= 0.19645432933987013230055443656345311D+01 - pw(33)= 0.24867374093856289898664232230940440D+01 - pw(34)= 0.32225090907915900251361180974071379D+01 - pw(35)= 0.43831496004036045715808964702697293D+01 - pw(36)= 0.68154560080748566632769223367688120D+01 -endif -if(kn == 37) then - px( 1)= 0.38582699810911109314412610582600285D-03 - px( 2)= 0.20425701942290192876130910611009103D-02 - px( 3)= 0.50633691210660737148684629408797998D-02 - px( 4)= 0.95207896708291718422773083291578438D-02 - px( 5)= 0.15525778682735758953134513377993514D-01 - px( 6)= 0.23236458251477143892762148491421503D-01 - px( 7)= 0.32869898315502381365977793756399700D-01 - px( 8)= 0.44717783821182009237592740999277480D-01 - px( 9)= 0.59166282378413474386219652173387790D-01 - px(10)= 0.76720016407611302507577044991006663D-01 - px(11)= 0.98029725568599136751267367485311366D-01 - px(12)= 0.12392331356464118155739304688774820D+00 - px(13)= 0.15544072437387706131982741857227680D+00 - px(14)= 0.19387425549171156055801006554376491D+00 - px(15)= 0.24081693190720646255336253748180444D+00 - px(16)= 0.29822205905402883495704063798959865D+00 - px(17)= 0.36847715057286422135924881742734873D+00 - px(18)= 0.45449546602004465241771763312107545D+00 - px(19)= 0.55982869948207391699702170763669914D+00 - px(20)= 0.68880504649787296744511192196333508D+00 - px(21)= 0.84669796033901533360995646038438838D+00 - px(22)= 0.10399324356558473561934598413040205D+01 - px(23)= 0.12763377882779834661586906782407420D+01 - px(24)= 0.15654589805265212137286807835074586D+01 - px(25)= 0.19189432205583964070987477122905362D+01 - px(26)= 0.23510260307961591423359389441199440D+01 - px(27)= 0.28791534439182687258499291870153821D+01 - px(28)= 0.35247986674670581588018777649051460D+01 - px(29)= 0.43145708343894232186639323097990119D+01 - px(30)= 0.52817877652195029549414854087069499D+01 - px(31)= 0.64688326916686594614365796118970700D+01 - px(32)= 0.79309297085796581254349823893132444D+01 - px(33)= 0.97427028243618144296296506331896705D+01 - px(34)= 0.12010788145502681400420281251059076D+02 - px(35)= 0.14901634128904268951043131468444574D+02 - px(36)= 0.18716850108988162034804184337830585D+02 - px(37)= 0.24192677368936148334878341907713337D+02 - pw( 1)= 0.99125336551824862194889482679760082D-03 - pw( 2)= 0.23294764153889055024996601218914981D-02 - pw( 3)= 0.37240772547834537676742266421935443D-02 - pw( 4)= 0.52091149342528848273366607113774535D-02 - pw( 5)= 0.68270221227859789716885624541247654D-02 - pw( 6)= 0.86302549871569688435939302492406456D-02 - pw( 7)= 0.10684907929606001826647765915296935D-01 - pw( 8)= 0.13074823650284587459124044679067858D-01 - pw( 9)= 0.15905815137012769943635728692865399D-01 - pw(10)= 0.19309592949421631021202995586098476D-01 - pw(11)= 0.23447231955796843102356124526512731D-01 - pw(12)= 0.28512602643555882480364043596258204D-01 - pw(13)= 0.34736803269082359319855891282489532D-01 - pw(14)= 0.42394778196431500823106022890118590D-01 - pw(15)= 0.51814900990726921245206073768999637D-01 - pw(16)= 0.63391754887352977638748380428399567D-01 - pw(17)= 0.77602103176771937423249866012581660D-01 - pw(18)= 0.95024188094564273453247510650970162D-01 - pw(19)= 0.11636084820208068310423973571076075D+00 - pw(20)= 0.14246733764500807361275561785541076D+00 - pw(21)= 0.17438513757282787980358846897741684D+00 - pw(22)= 0.21338354906834882772151573726954513D+00 - pw(23)= 0.26101159566291693588807442987832548D+00 - pw(24)= 0.31916397294414160306456288368801219D+00 - pw(25)= 0.39016685675452558044568607656388734D+00 - pw(26)= 0.47689304458379631259135969323199221D+00 - pw(27)= 0.58292255373275454371013059783773154D+00 - pw(28)= 0.71277724055369180393369777699867214D+00 - pw(29)= 0.87228215826925362283182363826585130D+00 - pw(30)= 0.10691555317433509592820312414251932D+01 - pw(31)= 0.13140356602358313756291999585335256D+01 - pw(32)= 0.16224029261605229346735838359067505D+01 - pw(33)= 0.20185076394260072518838683149008566D+01 - pw(34)= 0.25443795429863924820797324346795088D+01 - pw(35)= 0.32842160798477830990525297742207531D+01 - pw(36)= 0.44504422639266402367212097908451198D+01 - pw(37)= 0.68950534584189288110300715731079937D+01 -endif -if(kn == 38) then - px( 1)= 0.37544882641214739544404487691314902D-03 - px( 2)= 0.19871183957226785505693683210337852D-02 - px( 3)= 0.49235906141676391015802154321946533D-02 - px( 4)= 0.92514737512580193448510992395727495D-02 - px( 5)= 0.15072210889929832914852564552062275D-01 - px( 6)= 0.22529747565558688599709359818783270D-01 - px( 7)= 0.31820706479818463453808764992549706D-01 - px( 8)= 0.43207917804566229579979815937989733D-01 - px( 9)= 0.57037614703055219788340245932475170D-01 - px(10)= 0.73760278305643433875885418326364923D-01 - px(11)= 0.93954814072806105125535625384970094D-01 - px(12)= 0.11835572279751275812340426098616966D+00 - px(13)= 0.14788342387597970850254556771271985D+00 - px(14)= 0.18367881184867197784381084444678116D+00 - px(15)= 0.22714406717510235393336995794234796D+00 - px(16)= 0.27999230770730436478836636945630613D+00 - px(17)= 0.34430881911279975709736095638802572D+00 - px(18)= 0.42262660561797928237112864643574725D+00 - px(19)= 0.51801914793576724661446993655612171D+00 - px(20)= 0.63421369082081852676499438226656058D+00 - px(21)= 0.77572913890901541203968257694714068D+00 - px(22)= 0.94804373045176433040869132064288662D+00 - px(23)= 0.11577991608735723735204737419458983D+01 - px(24)= 0.14130499332620481162978195094240590D+01 - px(25)= 0.17235697910838497297077019443263907D+01 - px(26)= 0.21012318099421305470189826928381899D+01 - px(27)= 0.25604863031155004117350909468147106D+01 - px(28)= 0.31189733700354116534218512680792939D+01 - px(29)= 0.37983289491193925871766329237375059D+01 - px(30)= 0.46252830528399268638347963582117115D+01 - px(31)= 0.56332239825265713634766419255858690D+01 - px(32)= 0.68645515916046830561016745830456758D+01 - px(33)= 0.83744595975704688906750898197385297D+01 - px(34)= 0.10237521325244942798063968606118549D+02 - px(35)= 0.12560368707162995423911721031640974D+02 - px(36)= 0.15509653816608570103011114649053437D+02 - px(37)= 0.19387848243793684608247770401886161D+02 - px(38)= 0.24934561281635323945874914765928781D+02 - pw( 1)= 0.96453265301318427548094362468201990D-03 - pw( 2)= 0.22655173902979609807058505788853476D-02 - pw( 3)= 0.36184028900176044156504296271444603D-02 - pw( 4)= 0.50541489537150279336470105887486457D-02 - pw( 5)= 0.66111480676528721493086153967962652D-02 - pw( 6)= 0.83364709970141383682744167961798317D-02 - pw( 7)= 0.10288965239682486008404941690219778D-01 - pw( 8)= 0.12542814650103402794331337460320134D-01 - pw( 9)= 0.15191255363614986206935175799741358D-01 - pw(10)= 0.18350103409130288787013350611437802D-01 - pw(11)= 0.22160882105318682870151221782836377D-01 - pw(12)= 0.26793767627663720473790893510507021D-01 - pw(13)= 0.32451109787899399883524910095936794D-01 - pw(14)= 0.39372541954009629417292538828294592D-01 - pw(15)= 0.47842474290684012119499234323721647D-01 - pw(16)= 0.58200301955035707564120883548436910D-01 - pw(17)= 0.70853350044334604149559560301899009D-01 - pw(18)= 0.86292591208817884216614254072269241D-01 - pw(19)= 0.10511141477535783724613732416344133D+00 - pw(20)= 0.12802804286358628862797652201096219D+00 - pw(21)= 0.15591251149396720172437659323068423D+00 - pw(22)= 0.18981949259837187914390190669151597D+00 - pw(23)= 0.23102871661757404151219056182072738D+00 - pw(24)= 0.28109550616216987958688311291596089D+00 - pw(25)= 0.34191517456611856887400710976438959D+00 - pw(26)= 0.41580717321823436217712748531850542D+00 - pw(27)= 0.50562861773538920954280122211069833D+00 - pw(28)= 0.61493358954575527626904288123143367D+00 - pw(29)= 0.74820721198453838888752387245859917D+00 - pw(30)= 0.91122787666637256187790465660240598D+00 - pw(31)= 0.11116604876702928314541969577850448D+01 - pw(32)= 0.13600906035248579424433723117814169D+01 - pw(33)= 0.16719605033700192144793651594580592D+01 - pw(34)= 0.20715242290059813187830495943964160D+01 - pw(35)= 0.26009149126553161918961209793496358D+01 - pw(36)= 0.33446822473746498943306001989892373D+01 - pw(37)= 0.45163779904182883931050846768355054D+01 - pw(38)= 0.69731288252045659086112524629580346D+01 -endif -if(kn == 39) then - px( 1)= 0.36561668715257863266695867839020084D-03 - px( 2)= 0.19346233268023844607752680140092771D-02 - px( 3)= 0.47914454654721250429608171020940566D-02 - px( 4)= 0.89973768150331118636464557997751850D-02 - px( 5)= 0.14645426459456932116061991386768235D-01 - px( 6)= 0.21867035291160181873696376563265340D-01 - px( 7)= 0.30840905275941465760959181463130484D-01 - px( 8)= 0.41804727278858172089611260923812570D-01 - px( 9)= 0.55070109596011254633584203260158210D-01 - px(10)= 0.71040748192925559559810356000599258D-01 - px(11)= 0.90233607305000360208735980487986444D-01 - px(12)= 0.11330278644013276387294932973826412D+00 - px(13)= 0.14106605202734226946949558771740988D+00 - px(14)= 0.17453471922456742887333856505611605D+00 - px(15)= 0.21494840120203884103453089817606487D+00 - px(16)= 0.26381673974086082406623827494703215D+00 - px(17)= 0.32297045833724193805138734154727157D+00 - px(18)= 0.39462409178162717260419023658129017D+00 - px(19)= 0.48145279757007806226628785793904093D+00 - px(20)= 0.58668591241992432419881469135301121D+00 - px(21)= 0.71422043441619441875490470925759117D+00 - px(22)= 0.86875839397007365611188934469437619D+00 - px(23)= 0.10559731561016268223241013124184012D+01 - px(24)= 0.12827111749931695832113875967148563D+01 - px(25)= 0.15572378120557119987867239447717751D+01 - px(26)= 0.18895389181496921659373840308737441D+01 - px(27)= 0.22916946565666188923157402572614733D+01 - px(28)= 0.27783497396142931899333530655336223D+01 - px(29)= 0.33673171344416414054075236409384439D+01 - px(30)= 0.40803746579421435360342926127423516D+01 - px(31)= 0.49443541982197303363350679681860647D+01 - px(32)= 0.59926991656565097356612507658187921D+01 - px(33)= 0.72678162022521051390335450639073869D+01 - px(34)= 0.88248661080276432761936204880867089D+01 - px(35)= 0.10738377470289614240120984197486960D+02 - px(36)= 0.13114992524642316318648160324994074D+02 - px(37)= 0.16121587864037157860973324745148775D+02 - px(38)= 0.20061517632007615495343028194490514D+02 - px(39)= 0.25677751590102375173572742100364982D+02 - pw( 1)= 0.93922220165197278758235278705125378D-03 - pw( 2)= 0.22050238560788499833482429614154492D-02 - pw( 3)= 0.35187216063020038804486790499409390D-02 - pw( 4)= 0.49085365506617880092326144941967522D-02 - pw( 5)= 0.64093236535073721253961413565734053D-02 - pw( 6)= 0.80634829478885867255854065621590218D-02 - pw( 7)= 0.99236234747694546900134742610818704D-02 - pw( 8)= 0.12055651790884970603397688654161516D-01 - pw( 9)= 0.14542041549622581450180752510105879D-01 - pw(10)= 0.17484998327571243644029476510788471D-01 - pw(11)= 0.21009295862219636601744654763649681D-01 - pw(12)= 0.25264862509860073296774992740630911D-01 - pw(13)= 0.30429647240913452161840939756263150D-01 - pw(14)= 0.36713599504420717007290949145895236D-01 - pw(15)= 0.44364526109307181291869691370038599D-01 - pw(16)= 0.53676231693459070390920038412533532D-01 - pw(17)= 0.64999020675906355562370375168522980D-01 - pw(18)= 0.78752553989958863994856994484156486D-01 - pw(19)= 0.95441198327713183692637121288756175D-01 - pw(20)= 0.11567225339206538896170499893449828D+00 - pw(21)= 0.14017770592600709076251201422928256D+00 - pw(22)= 0.16984043156916862434884153031547730D+00 - pw(23)= 0.20572609864904223612685592628540649D+00 - pw(24)= 0.24912251113833289072621517399741476D+00 - pw(25)= 0.30158889670929686638710684005186011D+00 - pw(26)= 0.36501892348491283180440889799004221D+00 - pw(27)= 0.44172341189659552954879007161166844D+00 - pw(28)= 0.53454253015480261760607954897116394D+00 - pw(29)= 0.64700413373280623862585575306735597D+00 - pw(30)= 0.78355765539183802574608463599148275D+00 - pw(31)= 0.94993752822514424638014213628449162D+00 - pw(32)= 0.11537599191614455897031555123375337D+01 - pw(33)= 0.14055641164435818777056131176331966D+01 - pw(34)= 0.17207623206557334615184702866032957D+01 - pw(35)= 0.21236210670677286367475649834301224D+01 - pw(36)= 0.26563848390712257212223490954097065D+01 - pw(37)= 0.34039597582687380169009802962894561D+01 - pw(38)= 0.45810165654673262153491823163228067D+01 - pw(39)= 0.70497477205153040846297597338553262D+01 -endif -if(kn == 40) then - px( 1)= 0.35628858567646346686837324510549370D-03 - px( 2)= 0.18848536486770311807591243848705546D-02 - px( 3)= 0.46663181280317856746168443158323968D-02 - px( 4)= 0.87572190745324032833162010260613025D-02 - px( 5)= 0.14243053462025729117427416370397854D-01 - px( 6)= 0.21244187714177671012923575144774521D-01 - px( 7)= 0.29923549070178329860704059006245261D-01 - px( 8)= 0.40496827285887221934804797848083118D-01 - px( 9)= 0.53245474481585595162008164942410512D-01 - px(10)= 0.68532582430157398594384714629729053D-01 - px(11)= 0.86821479567961854490494392488927025D-01 - px(12)= 0.10869675959524067911380119096839804D+00 - px(13)= 0.13488761722649275069059959653024014D+00 - px(14)= 0.16629388885455273807852247897310019D+00 - px(15)= 0.20401590432120076472183920609098846D+00 - px(16)= 0.24938984749465339706889280753165521D+00 - px(17)= 0.30403061355446670063478194165459695D+00 - px(18)= 0.36988419909316149270323787724602738D+00 - px(19)= 0.44929166750312582192671647579960907D+00 - px(20)= 0.54506686870623140996608099899850594D+00 - px(21)= 0.66059043398617261201896956069992349D+00 - px(22)= 0.79992312516554243682533149390065526D+00 - px(23)= 0.96794240207734869040410161024196399D+00 - px(24)= 0.11705071340447720718704866053067197D+01 - px(25)= 0.14146568411163837104501550620112388D+01 - px(26)= 0.17088539364613371655586392377700409D+01 - px(27)= 0.20632805527205428543659205472373927D+01 - px(28)= 0.24902063739619416723013658330633921D+01 - px(29)= 0.30044517126674338175114949672570867D+01 - px(30)= 0.36239831566943951780098581431720407D+01 - px(31)= 0.43707017882279510268089904263627705D+01 - px(32)= 0.52715247584257797079839546521646347D+01 - px(33)= 0.63599375545114314246728093354610643D+01 - px(34)= 0.76783457965158492083638000098773087D+01 - px(35)= 0.92818762327705176821137326934557272D+01 - px(36)= 0.11245019162985129683252871377987811D+02 - px(37)= 0.13674441002824139783929760948169562D+02 - px(38)= 0.16737262031081031207287624813172862D+02 - px(39)= 0.20737736626704553460329127303821744D+02 - px(40)= 0.26422185009410264431343933932848600D+02 - pw( 1)= 0.91521311273862401110414707754929676D-03 - pw( 2)= 0.21477191521928219197974773759587103D-02 - pw( 3)= 0.34245277733745781136392870149979323D-02 - pw( 4)= 0.47714307428687057651870425259278087D-02 - pw( 5)= 0.62201720373839252470536835470530964D-02 - pw( 6)= 0.78090816762799270025355432702422127D-02 - pw( 7)= 0.95853728776285881753692296587104674D-02 - pw( 8)= 0.11607827206228475028893871423142139D-01 - pw( 9)= 0.13949671718334027814092300225470600D-01 - pw(10)= 0.16701414648594482319533882783791014D-01 - pw(11)= 0.19973423842092704666661066907283955D-01 - pw(12)= 0.23898237345016617036372142753573396D-01 - pw(13)= 0.28632957303620028137537791558953675D-01 - pw(14)= 0.34362389756581172586707762233025920D-01 - pw(15)= 0.41303629060426009015015262453667296D-01 - pw(16)= 0.49712538117407657728121783442595623D-01 - pw(17)= 0.59892265779927519547458491064456622D-01 - pw(18)= 0.72203793116717427042782050909270304D-01 - pw(19)= 0.87078559354632307141427375007562753D-01 - pw(20)= 0.10503340154082661838457995134010675D+00 - pw(21)= 0.12668825810872029062869005987170137D+00 - pw(22)= 0.15278730399841792433216511718627756D+00 - pw(23)= 0.18422442714170367120354764302353497D+00 - pw(24)= 0.22207427969059917966672557751489214D+00 - pw(25)= 0.26763062752730913486480650477422812D+00 - pw(26)= 0.32245451049240744596849032983153986D+00 - pw(27)= 0.38843603675318055142995014015287773D+00 - pw(28)= 0.46787586661529213993652263387714350D+00 - pw(29)= 0.56359633220992492533897924842606201D+00 - pw(30)= 0.67909910572915995345517074647986275D+00 - pw(31)= 0.81879920277693732798543935642566763D+00 - pw(32)= 0.98838986255598396167621024931216632D+00 - pw(33)= 0.11954429228293928218210248618287692D+01 - pw(34)= 0.14504574076282353803601676013439121D+01 - pw(35)= 0.17688226967485673903033088821262440D+01 - pw(36)= 0.21748255570140773474399755838047824D+01 - pw(37)= 0.27108286283847583355764140776741630D+01 - pw(38)= 0.34620975677969687591016085409314313D+01 - pw(39)= 0.46444137521824838114376662229079725D+01 - pw(40)= 0.71249713212825268738104553603659849D+01 -endif -if(kn == 41) then - px( 1)= 0.34742673101001169298125786189192247D-03 - px( 2)= 0.18376016626066348589572611512977315D-02 - px( 3)= 0.45476582212664459469467031011454626D-02 - px( 4)= 0.85298627941631364768947297314276555D-02 - px( 5)= 0.13862998302969016172574004035541326D-01 - px( 6)= 0.20657586094570613395846290407493021D-01 - px( 7)= 0.29062610848297752288163047809306784D-01 - px( 8)= 0.39274425358723378021088136325757383D-01 - px( 9)= 0.51548104852787877566528890250659503D-01 - px(10)= 0.66211354354233299700140534259286291D-01 - px(11)= 0.83680871232624778086967339679729237D-01 - px(12)= 0.10448092042565902676173295142297193D+00 - px(13)= 0.12926394759018944327153762885108248D+00 - px(14)= 0.15883342588000497563552833153551216D+00 - px(15)= 0.19416971506631978910646170849840836D+00 - px(16)= 0.23646026989593002896655227863024343D+00 - px(17)= 0.28713586785719640965554266593644607D+00 - px(18)= 0.34791462052391896376462581083932962D+00 - px(19)= 0.42085552853141849349998238918417102D+00 - px(20)= 0.50842339932407666570140739641291868D+00 - px(21)= 0.61356716058210435249023757196485689D+00 - px(22)= 0.73981399451706291428067431492187345D+00 - px(23)= 0.89138229125166542251073985530751701D+00 - px(24)= 0.10733171950376849976883148880289368D+01 - px(25)= 0.12916535619608503598632452641060769D+01 - px(26)= 0.15536125962495445292827177181517718D+01 - px(27)= 0.18678405221272713224762723085314113D+01 - px(28)= 0.22447007897223064544921318025044505D+01 - px(29)= 0.26966362208010185020303083586792707D+01 - px(30)= 0.32386254398637317225899745563640301D+01 - px(31)= 0.38887712166747737454815122744695001D+01 - px(32)= 0.46690813167550880509031095796259491D+01 - px(33)= 0.56065436673995350526181894949125319D+01 - px(34)= 0.67346748355004755553478375342343373D+01 - px(35)= 0.80958733206377174584850755308484861D+01 - px(36)= 0.97452318197102039374768068435419398D+01 - px(37)= 0.11757209117436549173752389088784801D+02 - px(38)= 0.14238509041698890616269777375250039D+02 - px(39)= 0.17356513104267573046562740943972043D+02 - px(40)= 0.21416391258741087275776630427364534D+02 - px(41)= 0.27167802059826190961056594217938860D+02 - pw( 1)= 0.89240744144922233411809208993761819D-03 - pw( 2)= 0.20933555605342582529457970383304286D-02 - pw( 3)= 0.33353720205083480430061442115773668D-02 - pw( 4)= 0.46420859644861606778799308270413399D-02 - pw( 5)= 0.60424942046045364422720752662654441D-02 - pw( 6)= 0.75713643924191847990582966217707736D-02 - pw( 7)= 0.92712216138022670728066109402065568D-02 - pw( 8)= 0.11194687905391631640874958915231129D-01 - pw( 9)= 0.13407015211365284658909716719520612D-01 - pw(10)= 0.15988623214362774067334406180515621D-01 - pw(11)= 0.19037444847571012625771486857897508D-01 - pw(12)= 0.22671015569010270758765332286863717D-01 - pw(13)= 0.27028523256092179219838374149240132D-01 - pw(14)= 0.32273327230549150524506142920429224D-01 - pw(15)= 0.38596560710149393493004578367090095D-01 - pw(16)= 0.46222282500302385970118307137327059D-01 - pw(17)= 0.55414377032731402665476377100171599D-01 - pw(18)= 0.66485217878875067216476264141658569D-01 - pw(19)= 0.79806099857296581997108727302834078D-01 - pw(20)= 0.95819567831341990475832520086841170D-01 - pw(21)= 0.11505394471321825395206987370902480D+00 - pw(22)= 0.13814054039338535506658629671808687D+00 - pw(23)= 0.16583420916823077593541262981490681D+00 - pw(24)= 0.19903814998014368860888042911909173D+00 - pw(25)= 0.23883416675372027779180574899396133D+00 - pw(26)= 0.28652010702264135124796999487042423D+00 - pw(27)= 0.34365700663826115610743525613053815D+00 - pw(28)= 0.41212981100557300459399743393003457D+00 - pw(29)= 0.49422782047790986141383860345034700D+00 - pw(30)= 0.59275496121528084438932784828140258D+00 - pw(31)= 0.71118703347840582157967515903715531D+00 - pw(32)= 0.85390607737164494406520343373727721D+00 - pw(33)= 0.10265669138579149759586965414767668D+01 - pw(34)= 0.12367012590999922651850699786000809D+01 - pw(35)= 0.14947734294405908884100490275594732D+01 - pw(36)= 0.18161565391675246807943861856738552D+01 - pw(37)= 0.22251644165626364386518263812767700D+01 - pw(38)= 0.27642836611922099334373971118073338D+01 - pw(39)= 0.35191416662378097344643779658011469D+01 - pw(40)= 0.47066216438742057965343731115092608D+01 - pw(41)= 0.71988567667853812451766331822159390D+01 -endif -if(kn == 42) then - px( 1)= 0.33899702213207280303556565703741990D-03 - px( 2)= 0.17926803514586918122543179233298713D-02 - px( 3)= 0.44349720472477033588788261295928634D-02 - px( 4)= 0.83142928381370954944600712429031326D-02 - px( 5)= 0.13503405398912139202218387144909328D-01 - px( 6)= 0.20104047687222803992574530787065418D-01 - px( 7)= 0.28252833643724993821423023769509983D-01 - px( 8)= 0.38129051839561461833536104861505433D-01 - px( 9)= 0.49964611823304399745794720191846551D-01 - px(10)= 0.64056255466771758626847757441909748D-01 - px(11)= 0.80779982442382913230902383270370264D-01 - px(12)= 0.10060749467110151157610640414769097D+00 - px(13)= 0.12412446888143911749100232205634547D+00 - px(14)= 0.15205071694865663047292718989423185D+00 - px(15)= 0.18526275842687218439176583557408384D+00 - px(16)= 0.22481983515906206676758114216568014D+00 - px(17)= 0.27199475262711723613680204884879081D+00 - px(18)= 0.32831107191481630404052581830045252D+00 - px(19)= 0.39558818386929789909718409480658300D+00 - px(20)= 0.47599581233231530250697095670252798D+00 - px(21)= 0.57211961616026616366050109797592391D+00 - px(22)= 0.68703982771101430011400322499023268D+00 - px(23)= 0.82442528240215569955771735381318182D+00 - px(24)= 0.98864576667159865631116805089235696D+00 - px(25)= 0.11849063745873302190560203655789150D+01 - px(26)= 0.14194085949505718343903162487063753D+01 - px(27)= 0.16995442942632463982698312503343140D+01 - px(28)= 0.20341308620656637127889186046327503D+01 - px(29)= 0.24336989628881715953740329050451161D+01 - px(30)= 0.29108493179186889815360617341447733D+01 - px(31)= 0.34807029996457758665062194913386789D+01 - px(32)= 0.41614831892896507985708808145127823D+01 - px(33)= 0.49752895914290239340663575251573810D+01 - px(34)= 0.59491683086745559122027634515663744D+01 - px(35)= 0.71166577579837023101788394059159047D+01 - px(36)= 0.85201446660709564225208283238116453D+01 - px(37)= 0.10214688587324610607847909840137625D+02 - px(38)= 0.12274723743508615874901666750507663D+02 - px(39)= 0.14807003938009410302904866873102566D+02 - px(40)= 0.17979187958243548948159702687043315D+02 - px(41)= 0.22097374571880291313408725390611447D+02 - px(42)= 0.27914546740390271130933515005738687D+02 - pw( 1)= 0.87071685062486345289172157206777171D-03 - pw( 2)= 0.20417105878963868357298871515770460D-02 - pw( 3)= 0.32508535230574150323908782582998050D-02 - pw( 4)= 0.45198431212516341141392418533095799D-02 - pw( 5)= 0.58752408889593859079959250826252330D-02 - pw( 6)= 0.73486832934924937540882060804195150D-02 - pw( 7)= 0.89786045784956752332612520710862566D-02 - pw( 8)= 0.10812279756759579335639253148771645D-01 - pw( 9)= 0.12908056239717444457676015564846892D-01 - pw(10)= 0.15337623022218768136530241218842739D-01 - pw(11)= 0.18188143417513304840817866168341234D-01 - pw(12)= 0.21564159053085002516938026776853287D-01 - pw(13)= 0.25589384327944258948303670138994570D-01 - pw(14)= 0.30408768217117848936156421895538494D-01 - pw(15)= 0.36191344708155839281431088511826248D-01 - pw(16)= 0.43134326776658801296020544503200249D-01 - pw(17)= 0.51468687405796338041097105695766697D-01 - pw(18)= 0.61466278505771427877600196882159870D-01 - pw(19)= 0.73448477231342626126309188685807545D-01 - pw(20)= 0.87796417742468560052699023987407758D-01 - pw(21)= 0.10496300171125714320281462758628502D+00 - pw(22)= 0.12548703017795381911118827176515123D+00 - pw(23)= 0.15000994875313221416387070158771740D+00 - pw(24)= 0.17929586510892162331076812701124762D+00 - pw(25)= 0.21425571841544983392913772225490756D+00 - pw(26)= 0.25597680754299265400920520155620068D+00 - pw(27)= 0.30575939808892163354965075699382712D+00 - pw(28)= 0.36516295819168300959340602426394554D+00 - pw(29)= 0.43606594548816172375527529406447798D+00 - pw(30)= 0.52074538635856855968547701789163874D+00 - pw(31)= 0.62198649880275266624161896479324475D+00 - pw(32)= 0.74323973657036909817079545894769618D+00 - pw(33)= 0.88885572273739267168333040860537136D+00 - pw(34)= 0.10644536078580082753759483481056876D+01 - pw(35)= 0.12775289785980491630656102334631630D+01 - pw(36)= 0.15385165702010908217004674883905600D+01 - pw(37)= 0.18627791456676687262611182457669180D+01 - pw(38)= 0.22746636192228746631618234661193503D+01 - pw(39)= 0.28167854835143406837432208820997977D+01 - pw(40)= 0.35751353025579259567249078677450433D+01 - pw(41)= 0.47676889791677097956471776553934610D+01 - pw(42)= 0.72714575121908501417344894592801099D+01 -endif -if(kn == 43) then - px( 1)= 0.33096860786757750154898847120917057D-03 - px( 2)= 0.17499208566123461094898550046151874D-02 - px( 3)= 0.43278154096847331355188904178071447D-02 - px( 4)= 0.81096003646317825364067625754120221D-02 - px( 5)= 0.13162623744349233672062735003036974D-01 - px( 6)= 0.19580760979979784765554480104376286D-01 - px( 7)= 0.27489609951663532195103068673087354D-01 - px( 8)= 0.37053342869799653592283933396820960D-01 - px( 9)= 0.48483444452707627033448163720522761D-01 - px(10)= 0.62049459953072389488156028383179189D-01 - px(11)= 0.78091737934971368286739663026054961D-01 - px(12)= 0.97036017163386886705197231591959380D-01 - px(13)= 0.11940967168918372175774848212559756D+00 - px(14)= 0.14585958712300992040134315459436264D+00 - px(15)= 0.17717200022543900754299287107723618D+00 - px(16)= 0.21429507577429224752942159606670995D+00 - px(17)= 0.25836535107930525415060613221934697D+00 - px(18)= 0.31073935543877479582966549813156744D+00 - px(19)= 0.37303174367650550564362643090028699D+00 - px(20)= 0.44716127946900452807387231860775343D+00 - px(21)= 0.53540606537110705599704958284246183D+00 - px(22)= 0.64046959170961720285075028881968045D+00 - px(23)= 0.76555947611245457114222548768598476D+00 - px(24)= 0.91448119032995475750346963701721081D+00 - px(25)= 0.10917496366295753151128433644125688D+01 - px(26)= 0.13027221869155879323859021531153006D+01 - px(27)= 0.15537578204695570075088633454278321D+01 - px(28)= 0.18524084405877676482171967152697008D+01 - px(29)= 0.22076505684206197863173711190960219D+01 - px(30)= 0.26301688303884504911030778880813732D+01 - px(31)= 0.31327077074207879423269172652003364D+01 - px(32)= 0.37305161917200709143270025763858640D+01 - px(33)= 0.44419236396092482776254163193454757D+01 - px(34)= 0.52891086092067566005114585507151512D+01 - px(35)= 0.62991644484541209348967251319712091D+01 - px(36)= 0.75056437278338836321053009023954332D+01 - px(37)= 0.89509179672327616775374703555124487D+01 - px(38)= 0.10690015208487653327362214867738207D+02 - px(39)= 0.12797352129263264861734948864857745D+02 - px(40)= 0.15379744394428552764554128123427935D+02 - px(41)= 0.18605142714959248273487455172506479D+02 - px(42)= 0.22780586028957730000926904102977113D+02 - px(43)= 0.28662366237232832381924388811320297D+02 - pw( 1)= 0.85006145870848429334757469982573891D-03 - pw( 2)= 0.19925838109657703878469708707415329D-02 - pw( 3)= 0.31706135365278565227115398483457554D-02 - pw( 4)= 0.44041172343279662518790702958553625D-02 - pw( 5)= 0.57174896648731729427179253475841935D-02 - pw( 6)= 0.71396041280184287136596813041547263D-02 - pw( 7)= 0.87053105293648014281590926123488040D-02 - pw( 8)= 0.10457223593582426133616067442291247D-01 - pw( 9)= 0.12447691003362071183889633075575652D-01 - pw(10)= 0.14740821116468595319728157346456377D-01 - pw(11)= 0.17414420402114891406970230872875573D-01 - pw(12)= 0.20561736545282396913149978083028889D-01 - pw(13)= 0.24293057998247351709738591334952020D-01 - pw(14)= 0.28737438822145949516145087089517893D-01 - pw(15)= 0.34044977279791239841839057487094614D-01 - pw(16)= 0.40390072912737209513481740353650117D-01 - pw(17)= 0.47975933504865389303614731289416436D-01 - pw(18)= 0.57040421240269648291641111667509080D-01 - pw(19)= 0.67863232374253637764436196090399013D-01 - pw(20)= 0.80774426563059878523617995686503505D-01 - pw(21)= 0.96164420051293883535914595526514204D-01 - pw(22)= 0.11449568019439733156432157598398321D+00 - pw(23)= 0.13631648279559252800439663886965342D+00 - pw(24)= 0.16227722290947194423527738296713650D+00 - pw(25)= 0.19314992693280818601029240961091576D+00 - pw(26)= 0.22985183401280394837726875217480564D+00 - pw(27)= 0.27347424857196858115127321375763169D+00 - pw(28)= 0.32531839177175415639431272795218179D+00 - pw(29)= 0.38694082912221267186717847996208730D+00 - pw(30)= 0.46021245154573169215949688893911839D+00 - pw(31)= 0.54739734403912358955407316606691874D+00 - pw(32)= 0.65126193729202891760603881973382504D+00 - pw(33)= 0.77523203175189474974013051456241330D+00 - pw(34)= 0.92362846689464044743151918155531626D+00 - pw(35)= 0.11020374135362457925484267398369432D+01 - pw(36)= 0.13179220978816197700516342241847467D+01 - pw(37)= 0.15816923933252917714931130477654841D+01 - pw(38)= 0.19087060424955494066311220499293247D+01 - pw(39)= 0.23233483559535813743753279936284135D+01 - pw(40)= 0.28683678978287494020672145638838434D+01 - pw(41)= 0.36301191889236335565768841054222539D+01 - pw(42)= 0.48276614242022139042477288493084378D+01 - pw(43)= 0.73428236438762783138460481364952627D+01 -endif -if(kn == 44) then - px( 1)= 0.32331350836957546603744084419849788D-03 - px( 2)= 0.17091703169716826666769331280363544D-02 - px( 3)= 0.42257875064315455206561743770986376D-02 - px( 4)= 0.79149690832372160827916911872302865D-02 - px( 5)= 0.12839179023186853233140061142661062D-01 - px( 6)= 0.19085232241568949577899855509474977D-01 - px( 7)= 0.26768883186791699077493305776558926D-01 - px( 8)= 0.36040864557027892650839059295103373D-01 - px( 9)= 0.47094585778667144264845412823636449D-01 - px(10)= 0.60175615761544264401796703227341314D-01 - px(11)= 0.75592961225388113581704029754538741D-01 - px(12)= 0.93732029387412788080736336661325883D-01 - px(13)= 0.11506910593622805003749038841255850D+00 - px(14)= 0.14018727035639085866142640394142696D+00 - px(15)= 0.16979393946354437271616691990993614D+00 - px(16)= 0.20474060430292067802523043906109989D+00 - px(17)= 0.24604566740448100845633933054720796D+00 - px(18)= 0.29492149096685690036119497895105667D+00 - px(19)= 0.35280682708785925696562700896642017D+00 - px(20)= 0.42140579664772257270434756016372204D+00 - px(21)= 0.50273460611520704557595850856713180D+00 - px(22)= 0.59917729885865279633574477547906320D+00 - px(23)= 0.71355204803416102949757204396579206D+00 - px(24)= 0.84918981260514039761248477195520438D+00 - px(25)= 0.10100276016887074982125673033831345D+01 - px(26)= 0.12007191488994777606413935958511916D+01 - px(27)= 0.14267665395522811775994265815554724D+01 - px(28)= 0.16946773518359189852395063278947950D+01 - px(29)= 0.20121533231038550963171865171844820D+01 - px(30)= 0.23883186915697571717337880438736199D+01 - px(31)= 0.28339996248002878664983052972434529D+01 - px(32)= 0.33620712713198530053650295556681017D+01 - px(33)= 0.39878972622760453246577762221089257D+01 - px(34)= 0.47299002992789324698627794306541718D+01 - px(35)= 0.56103262027415485179449762966436919D+01 - px(36)= 0.66563061139504917392538125507749794D+01 - px(37)= 0.79014003881557845126238132373008625D+01 - px(38)= 0.93879629282953731177093056973506411D+01 - px(39)= 0.11170992458953013913700288894894255D+02 - px(40)= 0.13324895111520577881664368628734089D+02 - px(41)= 0.15956559624176004069472090284905753D+02 - px(42)= 0.19234241988629565032886618545011883D+02 - px(43)= 0.23465930980103016989495364982244146D+02 - px(44)= 0.29411210662212999597346209126697707D+02 - pw( 1)= 0.83036884987403513181911166534208663D-03 - pw( 2)= 0.19457941864115460112729277032771097D-02 - pw( 3)= 0.30943299472937825975912487677647078D-02 - pw( 4)= 0.42943871620956718877444401513682917D-02 - pw( 5)= 0.55684261348013693314562032089920088D-02 - pw( 6)= 0.69428725530123470368722128932621244D-02 - pw( 7)= 0.84494234604398957532432243635753483D-02 - pw( 8)= 0.10126616172976027067397699017267216D-01 - pw( 9)= 0.12021566160747622141057949671782224D-01 - pw(10)= 0.14191778414242213809164508501045497D-01 - pw(11)= 0.16706905485358088359523445571590018D-01 - pw(12)= 0.19650346938023694904494456157357191D-01 - pw(13)= 0.23120695042785977245044097938079732D-01 - pw(14)= 0.27233209875082837015975056631389154D-01 - pw(15)= 0.32121665600246232435212867479076854D-01 - pw(16)= 0.37940949845194110884289250317530407D-01 - pw(17)= 0.44870698984096406199701289089460328D-01 - pw(18)= 0.53120093303752446444612781898982047D-01 - pw(19)= 0.62933823516449817737731500047201570D-01 - pw(20)= 0.74599224373678150636698930396457382D-01 - pw(21)= 0.88454634536330462816566048592135618D-01 - pw(22)= 0.10489914083790778708121373941299966D+00 - pw(23)= 0.12440396953470249784020277878699461D+00 - pw(24)= 0.14752589155074737609776847913675882D+00 - pw(25)= 0.17492312579420798336376479913219788D+00 - pw(26)= 0.20737437787760038519638439806427540D+00 - pw(27)= 0.24580187433567244121381237836878940D+00 - pw(28)= 0.29129959389673368311222620480570831D+00 - pw(29)= 0.34516843605203008284533614069195425D+00 - pw(30)= 0.40896093500380134048288051036096437D+00 - pw(31)= 0.48453955284019804235911876681118743D+00 - pw(32)= 0.57415497731524515579134334664918167D+00 - pw(33)= 0.68055495684587130214889340452349758D+00 - pw(34)= 0.80714146055673592921708826105860249D+00 - pw(35)= 0.95820721891454345652447310233062356D+00 - pw(36)= 0.11393080348437704220650007990276423D+01 - pw(37)= 0.13578783180672521841897407574039451D+01 - pw(38)= 0.16243074162684344243952826001273292D+01 - pw(39)= 0.19539528530618256474917072512577991D+01 - pw(40)= 0.23712430126501098428281754997758214D+01 - pw(41)= 0.29190630527561174415810627420409994D+01 - pw(42)= 0.36841316877299013922727992267156054D+01 - pw(43)= 0.48865818259133309055220603437140743D+01 - pw(44)= 0.74130021614058319715789731577046943D+01 -endif -if(kn == 45) then - px( 1)= 0.31600628834973779292559127048190037D-03 - px( 2)= 0.16702900106190273328935895866739893D-02 - px( 3)= 0.41285257114804008864642830706317964D-02 - px( 4)= 0.77296636135218507544800673352591724D-02 - px( 5)= 0.12531750213154611452085965505028540D-01 - px( 6)= 0.18615241131540077290889348326202186D-01 - px( 7)= 0.26087066631843252886061485477515189D-01 - px( 8)= 0.35085969566222178513934460224332887D-01 - px( 9)= 0.45789306577033888813174278261897663D-01 - px(10)= 0.58421434435784388362459047968056087D-01 - px(11)= 0.73263711622882621260808485115770070D-01 - px(12)= 0.90666037430716786669379135384670844D-01 - px(13)= 0.11105978210403381887459852100321712D+00 - px(14)= 0.13497200418547898709664745719407710D+00 - px(15)= 0.16304104546216750230118904547804422D+00 - px(16)= 0.19603390202336562386932779699909816D+00 - px(17)= 0.23486608544820513725469302847128384D+00 - px(18)= 0.28062490631545969684155812688765467D+00 - px(19)= 0.33459720050887095268667406972580638D+00 - px(20)= 0.39830252448023971951147973257838648D+00 - px(21)= 0.47353284765622731594152923198004578D+00 - px(22)= 0.56239982933240531713761978532829046D+00 - px(23)= 0.66739091019498584233201402129369733D+00 - px(24)= 0.79143567909811363215071199148744656D+00 - px(25)= 0.93798429449898007763809445265874043D+00 - px(26)= 0.11111001581385252565298438994097319D+01 - px(27)= 0.13155695863345333806714751926528375D+01 - px(28)= 0.15570319587074322621285835341493748D+01 - px(29)= 0.18421348417902183390620002381054988D+01 - px(30)= 0.21787200444979152172206336828110265D+01 - px(31)= 0.25760487257631498506605264603704116D+01 - px(32)= 0.30450769803228709332081128619189764D+01 - px(33)= 0.35987985256158263799485414495379126D+01 - px(34)= 0.42526795117768400994862312123212388D+01 - px(35)= 0.50252244422171176606674140586086445D+01 - px(36)= 0.59387361521365530542536690567593329D+01 - px(37)= 0.70203754298618509877391417240792276D+01 - px(38)= 0.83037051949719291418317245580593492D+01 - px(39)= 0.98310601809174348734008913125923897D+01 - px(40)= 0.11657412426662143324540788749464127D+02 - px(41)= 0.13857164426266931434200566791289955D+02 - px(42)= 0.16537288540540952756080764303901777D+02 - px(43)= 0.19866358206258941542582357564294186D+02 - px(44)= 0.24153320185396072162240284411839110D+02 - px(45)= 0.30161032818176703094230836042654050D+02 - pw( 1)= 0.81157322026241886094392714295783692D-03 - pw( 2)= 0.19011777475163676304804330211208542D-02 - pw( 3)= 0.30217126568442239903479316417011601D-02 - pw( 4)= 0.41901870013204838998208477401165665D-02 - pw( 5)= 0.54273283828323581210343431353844446D-02 - pw( 6)= 0.67573866388465515363650925512746062D-02 - pw( 7)= 0.82092751225427288897814574011554460D-02 - pw( 8)= 0.98179504955536258107199257981433439D-02 - pw( 9)= 0.11625949429528062575538287604998941D-01 - pw(10)= 0.13685006654390859174410475886206370D-01 - pw(11)= 0.16057648452306267238525538418198228D-01 - pw(12)= 0.18818661412340581662760322741958838D-01 - pw(13)= 0.22056412054981119160700553625749748D-01 - pw(14)= 0.25874134633155885709619608590472202D-01 - pw(15)= 0.30391451859719384524106580753360271D-01 - pw(16)= 0.35746460932048448724214073937351505D-01 - pw(17)= 0.42098664685115079575098628197326142D-01 - pw(18)= 0.49632898864791727296990224145235518D-01 - pw(19)= 0.58564289944594589064829987997677920D-01 - pw(20)= 0.69144234514984419908558444457124034D-01 - pw(21)= 0.81667423637721859042077146566843517D-01 - pw(22)= 0.96480011247988074883978780802066204D-01 - pw(23)= 0.11398911344507944971212279753122991D+00 - pw(24)= 0.13467391267458107409324694436955753D+00 - pw(25)= 0.15909873207292332703499649578337212D+00 - pw(26)= 0.18792855598692739186974920544448162D+00 - pw(27)= 0.22194762553095863671304473597844931D+00 - pw(28)= 0.26208196494751982936477406183051153D+00 - pw(29)= 0.30942704406840347450420829122426871D+00 - pw(30)= 0.36528233311686115136869183803961988D+00 - pw(31)= 0.43119539125683582249214467187083528D+00 - pw(32)= 0.50901957973734783536801637831942790D+00 - pw(33)= 0.60099191357850333533977421143095798D+00 - pw(34)= 0.70984171451926250865508439176773176D+00 - pw(35)= 0.83894803839540918949226578045417203D+00 - pw(36)= 0.99257719538081268693583754948673040D+00 - pw(37)= 0.11762571380368294152931731126534612D+01 - pw(38)= 0.13973967807100820386062004887130792D+01 - pw(39)= 0.16663689232723581345802345799199851D+01 - pw(40)= 0.19985351917633034604206124546990186D+01 - pw(41)= 0.24183711599931949498296065604711404D+01 - pw(42)= 0.29689015304223315699189706042837967D+01 - pw(43)= 0.37372089827137538865937716176457237D+01 - pw(44)= 0.49444904398530770204722137069339350D+01 - pw(45)= 0.74820372303276097896128738491191685D+01 -endif -if(kn == 46) then - px( 1)= 0.30902377399120893788354424898855426D-03 - px( 2)= 0.16331537506671398843215268321368443D-02 - px( 3)= 0.40357010980175320995569579449408174D-02 - px( 4)= 0.75530195763806797812195436379199476D-02 - px( 5)= 0.12239149854724502952182976196758722D-01 - px( 6)= 0.18168803621919060471207612598191755D-01 - px( 7)= 0.25440976363694145320552734001486569D-01 - px( 8)= 0.34183679422904259346408703316308100D-01 - px( 9)= 0.44559964643964573543760075913259475D-01 - px(10)= 0.56775358517699538859278511856645348D-01 - px(11)= 0.71086748757462020933356688085151754D-01 - px(12)= 0.87812673145710441814424785984121246D-01 - px(13)= 0.10734488854150171164189046430233515D+00 - px(14)= 0.13016110718036063404346759877348097D+00 - px(15)= 0.15683892196176944176774935350653314D+00 - px(16)= 0.18807118924767432734960867652772029D+00 - px(17)= 0.22468341948583423108691031411123462D+00 - px(18)= 0.26765394743983501400088905846999460D+00 - px(19)= 0.31813776531045013135926232571664330D+00 - px(20)= 0.37749492399880062738311332663588965D+00 - px(21)= 0.44732440247018853385136813722855067D+00 - px(22)= 0.52950437210775343555603255060398518D+00 - px(23)= 0.62623987488687805361419457851831370D+00 - px(24)= 0.74011910006308993968762196795007188D+00 - px(25)= 0.87417968361959824522374584708126947D+00 - px(26)= 0.10319867719667534605241559659266134D+01 - px(27)= 0.12177250028776661981781648200867157D+01 - px(28)= 0.14363070973204841479279696750353849D+01 - px(29)= 0.16935024864806789035793857077605729D+01 - px(30)= 0.19960904185444808460178685393801009D+01 - px(31)= 0.23520434618090888941520463681053941D+01 - px(32)= 0.27707495103418382337989450786653183D+01 - px(33)= 0.32632837492864188626066742354315931D+01 - px(34)= 0.38427473041855077266339137871017428D+01 - px(35)= 0.45246978009253960226440639549407685D+01 - px(36)= 0.53277111750428588477775437304742452D+01 - px(37)= 0.62741382355089597665586817773799499D+01 - px(38)= 0.73911624234084726194161817156844889D+01 - px(39)= 0.87123449938120379656808053147144537D+01 - px(40)= 0.10280000673796667050592943428615469D+02 - px(41)= 0.12149077777983097408939533602647387D+02 - px(42)= 0.14393981931423795298134131653931530D+02 - px(43)= 0.17121779022116518315203538273624565D+02 - px(44)= 0.20501370994989532793115106347627070D+02 - px(45)= 0.24842669385551363138030331359754804D+02 - px(46)= 0.30911787987864913738727989247192145D+02 - pw( 1)= 0.79361463897360195929686273302248409D-03 - pw( 2)= 0.18585856237700307630029880256700761D-02 - pw( 3)= 0.29524996535416808392477309033103772D-02 - pw( 4)= 0.40910988545579399083185141944050702D-02 - pw( 5)= 0.52935540515794390997929859327326122D-02 - pw( 6)= 0.65821742586378490181117941215141045D-02 - pw( 7)= 0.79834063288880762331149927032946792D-02 - pw( 8)= 0.95290513069482303557158918089126201D-02 - pw( 9)= 0.11257625314697109024742951848074159D-01 - pw(10)= 0.13215805246967286000173950239611213D-01 - pw(11)= 0.15459871723048351392169871195100160D-01 - pw(12)= 0.18057057593147761999943506400696980D-01 - pw(13)= 0.21086760440076034016469007977829453D-01 - pw(14)= 0.24641687279129405797304341158994351D-01 - pw(15)= 0.28829130387470289935818007648098355D-01 - pw(16)= 0.33772655230285352209194232463306431D-01 - pw(17)= 0.39614465986840122572670076622475638D-01 - pw(18)= 0.46518617394519992951670269787487787D-01 - pw(19)= 0.54675131186708511535131483491430821D-01 - pw(20)= 0.64305014007496806948186778346285377D-01 - pw(21)= 0.75666179654741774777744798242374952D-01 - pw(22)= 0.89060332032660676125924796117040007D-01 - pw(23)= 0.10484093752103114356647229133814935D+00 - pw(24)= 0.12342248951027967461470307235875341D+00 - pw(25)= 0.14529134199630383752180561542530020D+00 - pw(26)= 0.17101847250539549716018144547422270D+00 - pw(27)= 0.20127464277089058198410341836023683D+00 - pw(28)= 0.23684858001359149093932878615193101D+00 - pw(29)= 0.27866903348006793962100039913921415D+00 - pw(30)= 0.32783191849208153321924464180174769D+00 - pw(31)= 0.38563432289935890460881756854144066D+00 - pw(32)= 0.45361805217372703197887339399899876D+00 - pw(33)= 0.53362686058844350813392769068823926D+00 - pw(34)= 0.62788396771362463542546047416846800D+00 - pw(35)= 0.73910064578742344795091934329070699D+00 - pw(36)= 0.87063402416829911198578365788259413D+00 - pw(37)= 0.10267256741807495552845728772900802D+01 - pw(38)= 0.12128781106626951287021151112542470D+01 - pw(39)= 0.14364778560404412196974965116370614D+01 - pw(40)= 0.17078848068460459936312054922566995D+01 - pw(41)= 0.20424685786283419841913953641602057D+01 - pw(42)= 0.24647555529206493569854377641616308D+01 - pw(43)= 0.30179124308683067944200720815335092D+01 - pw(44)= 0.37893852356394974569134386395867514D+01 - pw(45)= 0.50014251355936200780723745695031871D+01 - pw(46)= 0.75499704094498894150628414754712103D+01 -endif -if(kn == 47) then - px( 1)= 0.30234480684949870042066556848012510D-03 - px( 2)= 0.15976464953770958135599778441796202D-02 - px( 3)= 0.39470145814685269106194100213451950D-02 - px( 4)= 0.73844351214347704596731706115099534D-02 - px( 5)= 0.11960307325670953208852468939751834D-01 - px( 6)= 0.17744140854528926114461559850667081D-01 - px( 7)= 0.24827775421109866384949286855042231D-01 - px( 8)= 0.33329587337597891764863922994014555D-01 - px( 9)= 0.43399840203304717839217570161011672D-01 - px(10)= 0.55227290263964079065971805634237348D-01 - px(11)= 0.69047097585245923081163450963219921D-01 - px(12)= 0.85150014954361349896311981444336255D-01 - px(13)= 0.10389275615680304450176721539326430D+00 - px(14)= 0.12570943253264044220914087598374078D+00 - px(15)= 0.15112403488297752197534393801499475D+00 - px(16)= 0.18076413027130191024242456354592003D+00 - px(17)= 0.21537618776652269918043941270387835D+00 - px(18)= 0.25584316244558279667676325421037561D+00 - px(19)= 0.30320509530959575930710602679134976D+00 - px(20)= 0.35868352704264273864602902528848421D+00 - px(21)= 0.42371052026058185226325968835995444D+00 - px(22)= 0.49996309228509504682760172037312828D+00 - px(23)= 0.58940391513730639900316144249394630D+00 - px(24)= 0.69432925544472178824662119719828653D+00 - px(25)= 0.81742530558526965352748082134155990D+00 - px(26)= 0.96183429918308909086642705501460506D+00 - px(27)= 0.11312321171355587417637105018144043D+01 - px(28)= 0.13299194954977380580964549469885521D+01 - px(29)= 0.15629294821033111247460192707385455D+01 - px(30)= 0.18361545182421012128782235750936341D+01 - px(31)= 0.21564975473013180685304927966921050D+01 - px(32)= 0.25320530389913426066138968271032273D+01 - px(33)= 0.29723260351325192884401956439321237D+01 - px(34)= 0.34885007189662000039152029330111157D+01 - px(35)= 0.40937753452277820469237089959367401D+01 - px(36)= 0.48037889682999426543864264911219419D+01 - px(37)= 0.56371796552402367761125198516216112D+01 - px(38)= 0.66163382295353580459275111274902267D+01 - px(39)= 0.77684648059929853861219213698259439D+01 - px(40)= 0.91271156012149482363243828361549091D+01 - px(41)= 0.10734585093601096152552856517558275D+02 - px(42)= 0.12645801076306643342528896213826369D+02 - px(43)= 0.14935178894323617316649392464504258D+02 - px(44)= 0.17709887244612807742256277065719222D+02 - px(45)= 0.21139166627448714331281414920298006D+02 - px(46)= 0.25533898913637103231788330664255958D+02 - px(47)= 0.31663433742152343555689779493983534D+02 - pw( 1)= 0.77643840612392469558233568878288438D-03 - pw( 2)= 0.18178823313702654997790171434491737D-02 - pw( 3)= 0.28864536542127938374932216849800024D-02 - pw( 4)= 0.39967467153526294501560789189634526D-02 - pw( 5)= 0.51665295391631236792192347882343893D-02 - pw( 6)= 0.64163743838733321438814922251353746D-02 - pw( 7)= 0.77705352297994246418018494260403994D-02 - pw( 8)= 0.92580225788462023682293420241402079D-02 - pw( 9)= 0.10913810605238600296521875414785753D-01 - pw(10)= 0.12780129454852165115348470540659794D-01 - pw(11)= 0.14907770873253776058864543333587366D-01 - pw(12)= 0.17357325452550337474212111231031977D-01 - pw(13)= 0.20200301191038969448754000899427036D-01 - pw(14)= 0.23520156080274576647610305543693418D-01 - pw(15)= 0.27413389239659681777619137570103249D-01 - pw(16)= 0.31990921824324695951826704036400849D-01 - pw(17)= 0.37380010931288310349162200884914999D-01 - pw(18)= 0.43726873583833196750460928410582440D-01 - pw(19)= 0.51200100443073974036149329120700021D-01 - pw(20)= 0.59994868260574886373334055836155630D-01 - pw(21)= 0.70337945020122803930065727247244319D-01 - pw(22)= 0.82493514750755835638795021587104324D-01 - pw(23)= 0.96769906495425836064670059167481399D-01 - pw(24)= 0.11352737495114584133685628457043583D+00 - pw(25)= 0.13318714247714292913023639085868125D+00 - pw(26)= 0.15624197763113426788311768418626933D+00 - pw(27)= 0.18326866459826594224170442595536027D+00 - pw(28)= 0.21494282570862350034472593398023850D+00 - pw(29)= 0.25205671631617380517032575867290262D+00 - pw(30)= 0.29554084830882541265073842592331588D+00 - pw(31)= 0.34649066398539118260401510223685881D+00 - pw(32)= 0.40620005573936491411597998731411544D+00 - pw(33)= 0.47620444337408185641294547642135994D+00 - pw(34)= 0.55833761203594537949491272468091006D+00 - pw(35)= 0.65480899160066358211037008014529884D+00 - pw(36)= 0.76831227871509708311671525382157320D+00 - pw(37)= 0.90218370924525609794334519875989345D+00 - pw(38)= 0.10606417731219318086869090064772246D+01 - pw(39)= 0.12491658485426170682375819102086492D+01 - pw(40)= 0.14751229591934938816623767074938966D+01 - pw(41)= 0.17488634335620615325269065049873221D+01 - pw(42)= 0.20857683710668100441568568919092486D+01 - pw(43)= 0.25104181373755596577440533667319287D+01 - pw(44)= 0.30661234529278624717819692903450825D+01 - pw(45)= 0.38406927297086597567375854792414220D+01 - pw(46)= 0.50574215821071962594952640986635070D+01 - pw(47)= 0.76168408553950413361259182112809766D+01 -endif -if(kn == 48) then - px( 1)= 0.29595002899720089598048509729240858D-03 - px( 2)= 0.15636631385928177658857961799981344D-02 - px( 3)= 0.38621935810659565095543543710661065D-02 - px( 4)= 0.72233636458828631437195788212396685D-02 - px( 5)= 0.11694254587023519966120947669484770D-01 - px( 6)= 0.17339652834546167382576155326885531D-01 - px( 7)= 0.24244927054495278219738066751504960D-01 - px( 8)= 0.32519777490587042844934209629601974D-01 - px( 9)= 0.42303000131579013083618262933841541D-01 - px(10)= 0.53768369083049019518863347968872785D-01 - px(11)= 0.67131693007024900369022557250738329D-01 - px(12)= 0.82659034800333661590098251027713411D-01 - px(13)= 0.10067601794995718748717592252167950D+00 - px(14)= 0.12157811670283109994038391671204829D+00 - px(15)= 0.14584188025331271469532975454293184D+00 - px(16)= 0.17403718708094337911584686617663455D+00 - px(17)= 0.20684083223970121872293135877750441D+00 - px(18)= 0.24505195226053213810882336066380088D+00 - px(19)= 0.28960993053758356990356801037005909D+00 - px(20)= 0.34161548376800915367159074480126869D+00 - px(21)= 0.40235563464585336049157109682672131D+00 - px(22)= 0.47333327369938541669100296412222937D+00 - px(23)= 0.55630204160312520623134123151678925D+00 - px(24)= 0.65330734108265942786541154430622162D+00 - px(25)= 0.76673441875356785236785315155850152D+00 - px(26)= 0.89936464142274902321479563230619771D+00 - px(27)= 0.10544413316663370284059399235987637D+01 - px(28)= 0.12357468356208411453551987100600846D+01 - px(29)= 0.14476928958143963682846712437006035D+01 - px(30)= 0.16954269342764845689137922115223447D+01 - px(31)= 0.19849575816646326604339901913933094D+01 - px(32)= 0.23233038211583159605827592291675721D+01 - px(33)= 0.27186736189401563011050998256085975D+01 - px(34)= 0.31806801589007531007613766645355501D+01 - px(35)= 0.37206072396727045282926190726802162D+01 - px(36)= 0.43517407927213647930655693816847034D+01 - px(37)= 0.50897921727561576322594218577457258D+01 - px(38)= 0.59534532497729963738598299894774892D+01 - px(39)= 0.69651478712196520721782791126144665D+01 - px(40)= 0.81520877402662049285349974267562675D+01 - px(41)= 0.95478213967711421151781315680800985D+01 - px(42)= 0.11194623319536797531467272595329506D+02 - px(43)= 0.13147404152263011059682758275653972D+02 - px(44)= 0.15480595341123512509584325553694590D+02 - px(45)= 0.18301477075836510534702330167968349D+02 - px(46)= 0.21779637522565706043537218918652790D+02 - px(47)= 0.26226933347488070796043946213342574D+02 - px(48)= 0.32415929770269557720899017070958329D+02 - pw( 1)= 0.75999449282242525498888775227070420D-03 - pw( 2)= 0.17789442907920029044278935841370033D-02 - pw( 3)= 0.28233592185581488393945265516117315D-02 - pw( 4)= 0.39067912705549311303324714399927545D-02 - pw( 5)= 0.50457409167121635114256894882489535D-02 - pw( 6)= 0.62592215195201857161143217251824231D-02 - pw( 7)= 0.75695311456923674293727515403732119D-02 - pw( 8)= 0.90032044940945145010068536558863271D-02 - pw( 9)= 0.10592085506839418968886878624905230D-01 - pw(10)= 0.12374483314193827446279695880913988D-01 - pw(11)= 0.14396352965559528046529852650738501D-01 - pw(12)= 0.16712429540159363311802064676696312D-01 - pw(13)= 0.19387262265658075348794679025267961D-01 - pw(14)= 0.22496156590284673178523069618335189D-01 - pw(15)= 0.26126125032895265676414993536188666D-01 - pw(16)= 0.30377032282566849853328475248140570D-01 - pw(17)= 0.35363150724079551172748521328984452D-01 - pw(18)= 0.41215303456842402904128482278539693D-01 - pw(19)= 0.48083691328284411416338444917462566D-01 - pw(20)= 0.56141427938196717060539102662208221D-01 - pw(21)= 0.65588776105316823321919603829156998D-01 - pw(22)= 0.76658094133956830129535839504567249D-01 - pw(23)= 0.89619543419632664042116217247700679D-01 - pw(24)= 0.10478766192717970201342332439078370D+00 - pw(25)= 0.12252896123742271255427206161487662D+00 - pw(26)= 0.14327075828486752574672706211131092D+00 - pw(27)= 0.16751151313821774235835757278858330D+00 - pw(28)= 0.19583302163349966411912965960570620D+00 - pw(29)= 0.22891492049551029451481007469340092D+00 - pw(30)= 0.26755212283797206211340817029488241D+00 - pw(31)= 0.31267604416833112127254011348613261D+00 - pw(32)= 0.36538085205037423661137648346933768D+00 - pw(33)= 0.42695655668634922335366944118783047D+00 - pw(34)= 0.49893168999297620297105696955480497D+00 - pw(35)= 0.58312983017364951950857474996168896D+00 - pw(36)= 0.68174673032193870559875685439839375D+00 - pw(37)= 0.79745906091752647870294820280336694D+00 - pw(38)= 0.93358322488211662322908533451921218D+00 - pw(39)= 0.10943162513760950011430094815688344D+01 - pw(40)= 0.12851165678797307366335354072388698D+01 - pw(41)= 0.15133343910778260993594764587330245D+01 - pw(42)= 0.17893135308834332452949764559455513D+01 - pw(43)= 0.21284497101212601640922819767089646D+01 - pw(44)= 0.25553800629427160180922388763367066D+01 - pw(45)= 0.31135609716998794233456050246781466D+01 - pw(46)= 0.38911620012948672120134104867773046D+01 - pw(47)= 0.51125134158119019180657428081205730D+01 - pw(48)= 0.76826855076385203411408423144047970D+01 -endif -if(kn == 49) then - px( 1)= 0.28982169661609259925974979296199715D-03 - px( 2)= 0.15311074628945562334530690364685912D-02 - px( 3)= 0.37809891427061498988417968394892015D-02 - px( 4)= 0.70693075551075721251308809528074262D-02 - px( 5)= 0.11440114051043380640580809174873546D-01 - px( 6)= 0.16953896205505568651585337372141638D-01 - px( 7)= 0.23690155524198397880316141824091359D-01 - px( 8)= 0.31750757826730201808237900276821105D-01 - px( 9)= 0.41264185607363735111209847472395680D-01 - px(10)= 0.52390788296171869395123759706096667D-01 - px(11)= 0.65329088415812943007951010158851832D-01 - px(12)= 0.80323145971104464698417160245071838D-01 - px(13)= 0.97670923692398180646763841694521778D-01 - px(14)= 0.11773356193272553739176999855177642D+00 - px(15)= 0.14094549927124676527452589226795674D+00 - px(16)= 0.16782548211787615810300942831732404D+00 - px(17)= 0.19898867709139685594599685847717735D+00 - px(18)= 0.23516028351061490409755316887724481D+00 - px(19)= 0.27719118480638011914316315022611666D+00 - px(20)= 0.32607625000818758221137748238031822D+00 - px(21)= 0.38297591208948031388506643958671483D+00 - px(22)= 0.44924164526126338307000295982995931D+00 - px(23)= 0.52644597452520216232192817378809657D+00 - px(24)= 0.61641769981161764547143482902113440D+00 - px(25)= 0.72128311119138160917726410382682506D+00 - px(26)= 0.84351411110478227605310059636301640D+00 - px(27)= 0.98598434482611919288728229535556820D+00 - px(28)= 0.11520346774611530676609320335187635D+01 - px(29)= 0.13455496592189661370306885171963069D+01 - px(30)= 0.15710470166859281003411527674326210D+01 - px(31)= 0.18337827389082834345983893278310805D+01 - px(32)= 0.21398750605157030327208430120547271D+01 - px(33)= 0.24964516861943417767039670584804599D+01 - px(34)= 0.29118261213763337159423664681275174D+01 - px(35)= 0.33957112485642962686719816570055312D+01 - px(36)= 0.39594817726282308325228740791813504D+01 - px(37)= 0.46165026215025079679688539374253711D+01 - px(38)= 0.53825491684099448048962771604007379D+01 - px(39)= 0.62763596401674826758369077814162276D+01 - px(40)= 0.73203847840971806343130509762279760D+01 - px(41)= 0.85418435921307030922239809237588315D+01 - px(42)= 0.99742749209607261981040189301013931D+01 - px(43)= 0.11659933902343682924575082774292738D+02 - px(44)= 0.13653717512175632633212031175648009D+02 - px(45)= 0.16030079451319704520212844485652403D+02 - px(46)= 0.18896419514536338819621757969794054D+02 - px(47)= 0.22422681781072229358731374027395797D+02 - px(48)= 0.26921701181159762662101522272189023D+02 - px(49)= 0.33169237709310133472812163437610434D+02 - pw( 1)= 0.74423705557468321912970974468971512D-03 - pw( 2)= 0.17416585473194369192981311717828790D-02 - pw( 3)= 0.27630202767273380717880972293549723D-02 - pw( 4)= 0.38209254857123832686825908868042634D-02 - pw( 5)= 0.49307262850810818969143173604709699D-02 - pw( 6)= 0.61100327216620297105479543777143453D-02 - pw( 7)= 0.73793929142489472766538930130422414D-02 - pw( 8)= 0.87631380820224889920133086400922484D-02 - pw( 9)= 0.10290337290113942038466358200627605D-01 - pw(10)= 0.11995832288470777205381700382547014D-01 - pw(11)= 0.13921304955008894938778083468431898D-01 - pw(12)= 0.16116315842092009170407309532189491D-01 - pw(13)= 0.18639261057732707055179140040607199D-01 - pw(14)= 0.21558238862705300435120252889679016D-01 - pw(15)= 0.24951892641527327511221421263371520D-01 - pw(16)= 0.28910375260277562929269397616107752D-01 - pw(17)= 0.33536621862509534402237961505810400D-01 - pw(18)= 0.38948101356497020534071631819477111D-01 - pw(19)= 0.45279154574446597766189875771988559D-01 - pw(20)= 0.52683958194920854977905259654150493D-01 - pw(21)= 0.61340113116439272718820743217394536D-01 - pw(22)= 0.71452855489322219691816819653732876D-01 - pw(23)= 0.83259918269799951973839847297358939D-01 - pw(24)= 0.97037114546374637815371486877102608D-01 - pw(25)= 0.11310475960635753107742085931930780D+00 - pw(26)= 0.13183509368952202468340313538628788D+00 - pw(27)= 0.15366091498274177979540995552638069D+00 - pw(28)= 0.17908568983953526314195097583061572D+00 - pw(29)= 0.20869548441677582027963091462587493D+00 - pw(30)= 0.24317317246706239890716235938537564D+00 - pw(31)= 0.28331553768501980083448486564952605D+00 - pw(32)= 0.33005413638688135108429388605654924D+00 - pw(33)= 0.38448116653084696134266394851745611D+00 - pw(34)= 0.44788218293156059076871251650381853D+00 - pw(35)= 0.52177844169743715495966266074704591D+00 - pw(36)= 0.60798318304777180959943540325649013D+00 - pw(37)= 0.70867868475109945308012661652305988D+00 - pw(38)= 0.82652519802949145510247997374762292D+00 - pw(39)= 0.96482036576439726113478189858839344D+00 - pw(40)= 0.11277413304728328935012105843530012D+01 - pw(41)= 0.13207276384535346696169033036111123D+01 - pw(42)= 0.15511151994361137315814825047961978D+01 - pw(43)= 0.18292440906220512275179521250816934D+01 - pw(44)= 0.21705274773423284009902208179302011D+01 - pw(45)= 0.25996616983900981131162894691162348D+01 - pw(46)= 0.31602501106700710441030906881720498D+01 - pw(47)= 0.39408219589198821554199569326106068D+01 - pw(48)= 0.51667323905056736314902652284285907D+01 - pw(49)= 0.77475392524477876553320371372226975D+01 -endif -if(kn == 50) then - px( 1)= 0.28394351112953297748994559783102511D-03 - px( 2)= 0.14998911954688725774856401484033629D-02 - px( 3)= 0.37031733634109224832474380329849766D-02 - px( 4)= 0.69218127312350228406485064567062889D-02 - px( 5)= 0.11197087943784824700478952929697149D-01 - px( 6)= 0.16585564985245590131106268275448670D-01 - px( 7)= 0.23161412500517665834595664639261402D-01 - px( 8)= 0.31019403043677038055911010779170235D-01 - px( 9)= 0.40278717647655643037391307728658037D-01 - px(10)= 0.51087642190675989042234002017798487D-01 - px(11)= 0.63629213809160599368347959049041399D-01 - px(12)= 0.78127829475675329032296849094039453D-01 - px(13)= 0.94856775739499734452627500076424815D-01 - px(14)= 0.11414660148335876745918230546444594D+00 - px(15)= 0.13639426440178507981483041317230735D+00 - px(16)= 0.16207305783142124363194134470205347D+00 - px(17)= 0.19174346191330080395615598185222217D+00 - px(18)= 0.22606522557261662878988725676839652D+00 - px(19)= 0.26581112483702385355663297234020894D+00 - px(20)= 0.31188292620679003206265405632135533D+00 - px(21)= 0.36533011131301634013423764015010912D+00 - px(22)= 0.42737191711071555589396273204369958D+00 - px(23)= 0.49942324705020639374541286536052638D+00 - px(24)= 0.58312503667951418472684968751768941D+00 - px(25)= 0.68037972242344487980438298325566643D+00 - px(26)= 0.79339256658260940896953648094143840D+00 - px(27)= 0.92471973446804368904394005417532194D+00 - px(28)= 0.10773242035057135225415445929049064D+01 - px(29)= 0.12546408175407001854792111537007726D+01 - px(30)= 0.14606520990918797361653158063467201D+01 - px(31)= 0.16999768257560608388590285583482666D+01 - px(32)= 0.19779739082773843717702150700803880D+01 - px(33)= 0.23008648456607864889580088643288349D+01 - px(34)= 0.26758790850855905290663568936243531D+01 - px(35)= 0.31114281531421848676256035865658428D+01 - px(36)= 0.36173167280945029265548597355573382D+01 - px(37)= 0.42050023489136640359196188798116726D+01 - px(38)= 0.48879209793922610146974780441291671D+01 - px(39)= 0.56819045073984933891246336517741761D+01 - px(40)= 0.66057308705566818707545720531514149D+01 - px(41)= 0.76818723662188017987959388336177329D+01 - px(42)= 0.89375516656298253147474053069187414D+01 - px(43)= 0.10406296477480503239729452647809424D+02 - px(44)= 0.12130343567869783578305872825995901D+02 - px(45)= 0.14164579787764828196349642136898031D+02 - px(46)= 0.16583487005376427804066458589286026D+02 - px(47)= 0.19494592188944424125485472332517264D+02 - px(48)= 0.23068202782732093092557399540639023D+02 - px(49)= 0.27618134557703623179690912010041751D+02 - px(50)= 0.33923321036009768333728618541703920D+02 - pw( 1)= 0.72912399684308635017096115410335056D-03 - pw( 2)= 0.17059216228519449314704147393870548D-02 - pw( 3)= 0.27052579385463033654518158126252911D-02 - pw( 4)= 0.37388707495255014610097719618737042D-02 - pw( 5)= 0.48210691945239681765906911252349642D-02 - pw( 6)= 0.59681965658636724574936823069437344D-02 - pw( 7)= 0.71992306962236770905536136519431856D-02 - pw( 8)= 0.85365357689533034566484478187242343D-02 - pw( 9)= 0.10006713677868850439161417859632759D-01 - pw(10)= 0.11641531351319812896113767691847514D-01 - pw(11)= 0.13478885680498147705267761047018866D-01 - pw(12)= 0.15563753671054168683182913698539199D-01 - pw(13)= 0.17949077884564351780048811341841315D-01 - pw(14)= 0.20696568103487745734831198958981091D-01 - pw(15)= 0.23877459843864173187990416648243479D-01 - pw(16)= 0.27573340091582106763900700105376174D-01 - pw(17)= 0.31877198204074685512995246247087960D-01 - pw(18)= 0.36894860399462428730109186000241340D-01 - pw(19)= 0.42746921785947037355880124311792745D-01 - pw(20)= 0.49571228532988571977201451960147272D-01 - pw(21)= 0.57525917359503163899312028414966321D-01 - pw(22)= 0.66793006913272852902897497204087823D-01 - pw(23)= 0.77582552855288975335680322806095329D-01 - pw(24)= 0.90137412508095941124740779645748160D-01 - pw(25)= 0.10473870396294021438411474581832967D+00 - pw(26)= 0.12171208319009365070220694175921390D+00 - pw(27)= 0.14143500171709279543618029697282644D+00 - pw(28)= 0.16434515151851001863886624876712371D+00 - pw(29)= 0.19095035999365251682074028234920175D+00 - pw(30)= 0.22184027575178191861381490980142531D+00 - pw(31)= 0.25770029856713080526288855613179455D+00 - pw(32)= 0.29932837398793730342885176518781178D+00 - pw(33)= 0.34765552544247195237609202772613096D+00 - pw(34)= 0.40377138372726537143521552364635440D+00 - pw(35)= 0.46895657668751681273934682099459395D+00 - pw(36)= 0.54472479692767961940426388856919793D+00 - pw(37)= 0.63287890678130081055700714208328112D+00 - pw(38)= 0.73558798250487807944749198073740843D+00 - pw(39)= 0.85549650522124124655227075039375323D+00 - pw(40)= 0.99588443073362582872294183483256617D+00 - pw(41)= 0.11609105355219986209989146786182259D+01 - pw(42)= 0.13559974384949635725484334023159422D+01 - pw(43)= 0.15884690610914334611993355370110858D+01 - pw(44)= 0.18686642910659719608475187905997900D+01 - pw(45)= 0.22120162660997500537540215058314098D+01 - pw(46)= 0.26432826564193229543032457728289448D+01 - pw(47)= 0.32062148179068205963629824250927723D+01 - pw(48)= 0.39897000025706165355011208836568943D+01 - pw(49)= 0.52201085267504392582124308457577559D+01 - pw(50)= 0.78114350904888610285435916161990246D+01 -endif -if(kn == 51) then - px( 1)= 0.27830048331694745146626670725003848D-03 - px( 2)= 0.14699332467589495012214296420135614D-02 - px( 3)= 0.36285373076700958382889506232860957D-02 - px( 4)= 0.67804640421568805932750563900953938D-02 - px( 5)= 0.10964449641475153555650317586921338D-01 - px( 6)= 0.16233474841656748209131399424045480D-01 - px( 7)= 0.22656849606742837535759660703189982D-01 - px( 8)= 0.30322907994226545082257492807954794D-01 - px( 9)= 0.39342419929775285730416663052397862D-01 - px(10)= 0.49852801135275356633188650023787799D-01 - px(11)= 0.62023178388698049878991488778075058D-01 - px(12)= 0.76060329272036397821127225875497380D-01 - px(13)= 0.92215469045402426815676149451909456D-01 - px(14)= 0.11079181963288772179472869891025245D+00 - px(15)= 0.13215289192145648970655359123013550D+00 - px(16)= 0.15673146375949390138182262372756365D+00 - px(17)= 0.18503934411172685168403222212810987D+00 - px(18)= 0.21767815425667758980667415659377157D+00 - px(19)= 0.25535148902919646272821292167480645D+00 - px(20)= 0.29887891101338956958304222194317216D+00 - px(21)= 0.34921226930606420176060402127989888D+00 - px(22)= 0.40745483836472093899131085922052085D+00 - px(23)= 0.47488376920818361679977841816170633D+00 - px(24)= 0.55297635865326739663234570169538373D+00 - px(25)= 0.64344068535303042620990947995897496D+00 - px(26)= 0.74825123790612149826635700308884120D+00 - px(27)= 0.86969026995696913003053787424248180D+00 - px(28)= 0.10103957605313364799613553988071921D+01 - px(29)= 0.11734170393027801618266450011528511D+01 - px(30)= 0.13622793663980858168640978903164547D+01 - px(31)= 0.15810590528845442660473438585783950D+01 - px(32)= 0.18344711002533369402909515294518176D+01 - px(33)= 0.21279718700902070387593931590109607D+01 - px(34)= 0.24678800386227544163678584032999521D+01 - px(35)= 0.28615201544104645116093866929518600D+01 - px(36)= 0.33173946748043709269495501699195293D+01 - px(37)= 0.38453926882082167185869634333011899D+01 - px(38)= 0.44570470958112522799784043488271277D+01 - px(39)= 0.51658576054751924442339769813379872D+01 - px(40)= 0.59877058293844537789530450556075720D+01 - px(41)= 0.69414034941300311714983901016833412D+01 - px(42)= 0.80494397879808445218602940428136405D+01 - px(43)= 0.93390380570666107887290025527994856D+01 - px(44)= 0.10843713859839632308899371210210092D+02 - px(45)= 0.12605686843057786008572981971452711D+02 - px(46)= 0.14679837296994134584444426535907765D+02 - px(47)= 0.17140680920657630892046419784075754D+02 - px(48)= 0.20095878907672599896988947474029035D+02 - px(49)= 0.23716108785958653467610019526488283D+02 - px(50)= 0.28316168939449414612178766745269557D+02 - px(51)= 0.34678144812502309409979036489309933D+02 - pw( 1)= 0.71461661125073618577714141168068574D-03 - pw( 2)= 0.16716385878603382275615092392545966D-02 - pw( 3)= 0.26499087137809206936489894716951881D-02 - pw( 4)= 0.36603737280291944817450300201489801D-02 - pw( 5)= 0.47163932617577667464231453856574222D-02 - pw( 6)= 0.58331641163766286245746932185477944D-02 - pw( 7)= 0.70282510906775725331369422805168084D-02 - pw( 8)= 0.83222573157223186581625603590243717D-02 - pw( 9)= 0.97395848045625290059070106665033066D-02 - pw(10)= 0.11309266338983814689829245251610443D-01 - pw(11)= 0.13065837815189701273276681449362080D-01 - pw(12)= 0.15050206824622500427636027066505286D-01 - pw(13)= 0.17310471604431563174843365788224820D-01 - pw(14)= 0.19902665286202140668402395991957144D-01 - pw(15)= 0.22891446750016473901306940204721219D-01 - pw(16)= 0.26350819616702910168573443501795867D-01 - pw(17)= 0.30365009714194435950502154476602598D-01 - pw(18)= 0.35029644330122852911457703936925401D-01 - pw(19)= 0.40453348181743022646044235704229779D-01 - pw(20)= 0.46759819486523129479791042090399526D-01 - pw(21)= 0.54090403179921190709407057449782309D-01 - pw(22)= 0.62607156912964304588823654347725686D-01 - pw(23)= 0.72496411785130069944054386511386143D-01 - pw(24)= 0.83972854815470973907373202544753383D-01 - pw(25)= 0.97284192841846608857881098157165865D-01 - pw(26)= 0.11271649103721755058596361082284142D+00 - pw(27)= 0.13060031219247571052268358514865611D+00 - pw(28)= 0.15131781797811823631871970343701060D+00 - pw(29)= 0.17531103556726408969705872673699634D+00 - pw(30)= 0.20309154904134215752202092778295388D+00 - pw(31)= 0.23525195397891395078569762059044835D+00 - pw(32)= 0.27247952854009146289412622775860388D+00 - pw(33)= 0.31557274490479512420214647476118810D+00 - pw(34)= 0.36546150205090068791751615370418030D+00 - pw(35)= 0.42323235431744446559543345018447882D+00 - pw(36)= 0.49016062156217930068156275755723687D+00 - pw(37)= 0.56775223265564612092706940030068368D+00 - pw(38)= 0.65779970939872780221136201150769308D+00 - pw(39)= 0.76245925865498574135551621597788489D+00 - pw(40)= 0.88436027001142532199927186614862192D+00 - pw(41)= 0.10267660753266581074365019471728486D+01 - pw(42)= 0.11938185471963720076518527335025432D+01 - pw(43)= 0.13909252171844327541246452342529830D+01 - pw(44)= 0.16254001662180292675140925291992103D+01 - pw(45)= 0.19075834129441296076092154965785291D+01 - pw(46)= 0.22529303361525593567401491175421270D+01 - pw(47)= 0.26862617892200971765670039600369766D+01 - pw(48)= 0.32514778994133779406256785815727141D+01 - pw(49)= 0.40378220858095016239167419409137174D+01 - pw(50)= 0.52726701882528895779137444037618021D+01 - pw(51)= 0.78744042074131644922750515876542347D+01 -endif -if(kn == 52) then - px( 1)= 0.27287878963778979318704099764976979D-03 - px( 2)= 0.14411589143135589309910830756029883D-02 - px( 3)= 0.35568888680936775757132750276695479D-02 - px( 4)= 0.66448808369926751816655531142947976D-02 - px( 5)= 0.10741535203421646235306609286779033D-01 - px( 6)= 0.15896548124765745985567116234974430D-01 - px( 7)= 0.22174792928607557748215031029525379D-01 - px( 8)= 0.29658745389012720604963104746455902D-01 - px( 9)= 0.38451550078535438960644609304592703D-01 - px(10)= 0.48680802154773981897502480211717432D-01 - px(11)= 0.60503099784552369897160088385145451D-01 - px(12)= 0.74109391214600302911242894598461603D-01 - px(13)= 0.89731100249233358933686871661989877D-01 - px(14)= 0.10764697753584156876969725074282997D+00 - px(15)= 0.12819061301398605430851761152295284D+00 - px(16)= 0.15175857698395808054368327545045674D+00 - px(17)= 0.17881924018609623916197948354619579D+00 - px(18)= 0.20992244196465392763256291141326856D+00 - px(19)= 0.24571029757551810503161176844891960D+00 - px(20)= 0.28692952833595684927481466342699473D+00 - px(21)= 0.33444574647578471028054731992057917D+00 - px(22)= 0.38926013793139080772351691924522347D+00 - px(23)= 0.45252898276941636881394275842894023D+00 - px(24)= 0.52558645699160000314972991962565110D+00 - px(25)= 0.60997118585865879765197048254341330D+00 - px(26)= 0.70745707342364955349644867464416164D+00 - px(27)= 0.82008901627275511415852311384357046D+00 - px(28)= 0.95022422122473475017786104441461093D+00 - px(29)= 0.11005799888944176702824780847485411D+01 - px(30)= 0.12742890037078181125625630187828027D+01 - px(31)= 0.14749633979156047676958663456164965D+01 - px(32)= 0.17067691516542664863528589999065341D+01 - px(33)= 0.19745127831074488312044849464771686D+01 - px(34)= 0.22837428183447160072986023000468876D+01 - px(35)= 0.26408692804764055817543358579323038D+01 - px(36)= 0.30533055131576702465080896333143573D+01 - px(37)= 0.35296382295469342100204889068765045D+01 - px(38)= 0.40798340368281393790208425276818839D+01 - px(39)= 0.47154942919829600315975357986809138D+01 - px(40)= 0.54501757769677042873771351203731296D+01 - px(41)= 0.62998036929575447450649046764701569D+01 - px(42)= 0.72832182898545812637798931719272845D+01 - px(43)= 0.84229216047310103819935078476256484D+01 - px(44)= 0.97461351851146406033204591955381092D+01 - px(45)= 0.11286361832018459964955857919026558D+02 - px(46)= 0.13085805530780086242892657357654051D+02 - px(47)= 0.15199343565107891341827069072222156D+02 - px(48)= 0.17701530875675334700619075035054618D+02 - px(49)= 0.20700169445006682515691632852528091D+02 - px(50)= 0.24366312928914848142376439425799257D+02 - px(51)= 0.29015743374923417716784066471530138D+02 - px(52)= 0.35433676271462821390664880501380081D+02 - pw( 1)= 0.70067921248512656450750262380470989D-03 - pw( 2)= 0.16387221028734144344416061643064582D-02 - pw( 3)= 0.25968227302892772688118502909161922D-02 - pw( 4)= 0.35852033221618217540588075672036645D-02 - pw( 5)= 0.46163571334057459414651619546435755D-02 - pw( 6)= 0.57044407177542779516540853577376776D-02 - pw( 7)= 0.68657439258466701284209801337979038D-02 - pw( 8)= 0.81192888642852760066695573485214022D-02 - pw( 9)= 0.94875105621027290016976460295608743D-02 - pw(10)= 0.10997004127430955685924352255220372D-01 - pw(11)= 0.12679313613778005716132640549939369D-01 - pw(12)= 0.14571725519850453422967526162847044D-01 - pw(13)= 0.16718025683719221072849622604121732D-01 - pw(14)= 0.19169191632602931509237238888017571D-01 - pw(15)= 0.21984027773632325146538486813698425D-01 - pw(16)= 0.25229801526425832500489421276402268D-01 - pw(17)= 0.28982985579859711073310312330962579D-01 - pw(18)= 0.33330232727989722055601631534296414D-01 - pw(19)= 0.38369695026986305327273292677722862D-01 - pw(20)= 0.44212758195453754037273486501473222D-01 - pw(21)= 0.50986218113528798819802588773272107D-01 - pw(22)= 0.58834899219859082186151202608595314D-01 - pw(23)= 0.67924711805560266218170048520749527D-01 - pw(24)= 0.78446161793212774417972991998090425D-01 - pw(25)= 0.90618353089338093281832862102708959D-01 - pw(26)= 0.10469355154807580861182066828267783D+00 - pw(27)= 0.12096240801949377693281668581524884D+00 - pw(28)= 0.13975996681192581185584791335037507D+00 - pw(29)= 0.16147261854647111561641477797041919D+00 - pw(30)= 0.18654619771836799217214159212739546D+00 - pw(31)= 0.21549548172910307786548214775429476D+00 - pw(32)= 0.24891542853693721010435838752243091D+00 - pw(33)= 0.28749460731095023155833868404490821D+00 - pw(34)= 0.33203145035164386811290281590906319D+00 - pw(35)= 0.38345421620808062938143753495150014D+00 - pw(36)= 0.44284595340052667020006768746408026D+00 - pw(37)= 0.51147637334768443075077758376770258D+00 - pw(38)= 0.59084351705391666304914198628651331D+00 - pw(39)= 0.68272966822652633355509618616923460D+00 - pw(40)= 0.78927854268555272214405511831212374D+00 - pw(41)= 0.91310513570381533776392939171880649D+00 - pw(42)= 0.10574572006171471782817226512789618D+01 - pw(43)= 0.12264611065174215544931924431769419D+01 - pw(44)= 0.14255110268002246494143307711209540D+01 - pw(45)= 0.16619131890762190850734830449377183D+01 - pw(46)= 0.19460108584649183568553387986228161D+01 - pw(47)= 0.22932836878593317486878267058034769D+01 - pw(48)= 0.27286173208169332405445956554845995D+01 - pw(49)= 0.32960612083291505532244371231002347D+01 - pw(50)= 0.40852129572893008083223241182607456D+01 - pw(51)= 0.53244443718708726283429813785502501D+01 - pw(52)= 0.79364763274033460235909669710486787D+01 -endif -if(kn == 53) then - px( 1)= 0.26766570337770727528170071246520492D-03 - px( 2)= 0.14134994888717316196512141359660423D-02 - px( 3)= 0.34880516519107403970758408932728686D-02 - px( 4)= 0.65147144546990128201893001657184119D-02 - px( 5)= 0.10527738385818098548917273507385571D-01 - px( 6)= 0.15573804518451021912970981414950172D-01 - px( 7)= 0.21713726252830234938777297490158659D-01 - px( 8)= 0.29024636763636674603190500526652300D-01 - px( 9)= 0.37602750842526664184644761079770188D-01 - px(10)= 0.47566769031392334409003814047662918D-01 - px(11)= 0.59061976687480610814082596844534784D-01 - px(12)= 0.72265065129161553717681820608077034D-01 - px(13)= 0.87389667514901234333023139285043850D-01 - px(14)= 0.10469256811716735138855696367380459D+00 - px(15)= 0.12448052659100388617653071564391793D+00 - px(16)= 0.14711767656647036665309334227739246D+00 - px(17)= 0.17303351979129595018460716414084922D+00 - px(18)= 0.20273163525968242510069482779745799D+00 - px(19)= 0.23679933260205764383581082765297944D+00 - px(20)= 0.27591857089423386003627193488624766D+00 - px(21)= 0.32087851933017891678519245554349762D+00 - px(22)= 0.37259015544714682612264560035248998D+00 - px(23)= 0.43210329558213462840685308411772863D+00 - px(24)= 0.50062645115169427006331566594320087D+00 - px(25)= 0.57954991863929172590111454477122563D+00 - px(26)= 0.67047254853929904297053555520671948D+00 - px(27)= 0.77523270078396767247020251586833006D+00 - px(28)= 0.89594398099521297070364552635590329D+00 - px(29)= 0.10350364637633983911134434941276054D+01 - px(30)= 0.11953042494029295972874450876803876D+01 - px(31)= 0.13799603766985451495079757379930050D+01 - px(32)= 0.15927003388200029769046949954648636D+01 - px(33)= 0.18377757429440148519213952318668567D+01 - px(34)= 0.21200800469036983170430724963400520D+01 - px(35)= 0.24452488452376621255574183769872919D+01 - px(36)= 0.28197779343346462148849901776278631D+01 - px(37)= 0.32511634739679835132078134711319616D+01 - px(38)= 0.37480701572008120067601493824866792D+01 - px(39)= 0.43205356864545827578744429205984365D+01 - px(40)= 0.49802234963192223418120333779580923D+01 - px(41)= 0.57407413466949074338019856932754499D+01 - px(42)= 0.66180524896462577279982649502155498D+01 - px(43)= 0.76310210235736203134564527928377033D+01 - px(44)= 0.88021583399572758584848067436385530D+01 - px(45)= 0.10158682176971824690390966321630474D+02 - px(46)= 0.11734082305033337839638441179290729D+02 - px(47)= 0.13570548670500422261838718291627263D+02 - px(48)= 0.15722959067717953266995872699981198D+02 - px(49)= 0.18265912835593835999181449267925846D+02 - px(50)= 0.21307358846087134932506642776378880D+02 - px(51)= 0.25018732302848078195180809407087663D+02 - px(52)= 0.29716799310767845023829953456809187D+02 - px(53)= 0.36189883296712405656605145166567620D+02 - pw( 1)= 0.68727895311546056672417632556060663D-03 - pw( 2)= 0.16070919274300689247610198780862261D-02 - pw( 3)= 0.25458627429238886643700982329323737D-02 - pw( 4)= 0.35131488282427671750845547282299960D-02 - pw( 5)= 0.45206512132016346669133873690489887D-02 - pw( 6)= 0.55815803493477684696883612785461534D-02 - pw( 7)= 0.67110727810055987477008256080221629D-02 - pw( 8)= 0.79267274337364857488746447630284210D-02 - pw( 9)= 0.92492158987355566092832344937099689D-02 - pw(10)= 0.10702954335651349887056984538693118D-01 - pw(11)= 0.12316817159979657159477748940914048D-01 - pw(12)= 0.14124861608422353998695630535116428D-01 - pw(13)= 0.16167026688242225669124227980203198D-01 - pw(14)= 0.18489777850309322975434696546792003D-01 - pw(15)= 0.21146695079329887928180449259296579D-01 - pw(16)= 0.24199043898015304232347701717858443D-01 - pw(17)= 0.27716412274949780256699383093693372D-01 - pw(18)= 0.31777522531875989191876025347365721D-01 - pw(19)= 0.36471323604726302437811675512366006D-01 - pw(20)= 0.41898438814168148316223242028468085D-01 - pw(21)= 0.48173004736710585958446082731843080D-01 - pw(22)= 0.55424906910282799399495351801946886D-01 - pw(23)= 0.63802408146079784845031445360740918D-01 - pw(24)= 0.73475174123291445512755683573657914D-01 - pw(25)= 0.84637721415891914195614185274215305D-01 - pw(26)= 0.97513337772431529356043555523731762D-01 - pw(27)= 0.11235854926743224508497153739859653D+00 - pw(28)= 0.12946823338997306958433565807904794D+00 - pw(29)= 0.14918150326264123120255847449750618D+00 - pw(30)= 0.17188851951158054500285261747356087D+00 - pw(31)= 0.19803842749441616169319688491554627D+00 - pw(32)= 0.22814867480320173542334621460640035D+00 - pw(33)= 0.26281604587933027973782593098613157D+00 - pw(34)= 0.30272987012943215567370818850475517D+00 - pw(35)= 0.34868803701893003485156344297085159D+00 - pw(36)= 0.40161671758475143724478782201274920D+00 - pw(37)= 0.46259509691660897469854774936577795D+00 - pw(38)= 0.53288704858998218510998379047232847D+00 - pw(39)= 0.61398266720080789199956346065784397D+00 - pw(40)= 0.70765415497287477021582999321525042D+00 - pw(41)= 0.81603315079286754570846341816085723D+00 - pw(42)= 0.94172096259934976721215747985393842D+00 - pw(43)= 0.10879507869255648374416119995114567D+01 - pw(44)= 0.12588348263164235013060998002290594D+01 - pw(45)= 0.14597555182997943253538598754860232D+01 - pw(46)= 0.16980130743144824162948847512548702D+01 - pw(47)= 0.19839559340047588104729344306844787D+01 - pw(48)= 0.23330898446405588037915551380406672D+01 - pw(49)= 0.27703666290115585782188239169095417D+01 - pw(50)= 0.33399854209196729287026099030910255D+01 - pw(51)= 0.41318959182038353492816091595673578D+01 - pw(52)= 0.53754564214506164841297568531690314D+01 - pw(53)= 0.79976793171584998097855897868836215D+01 -endif -if(kn == 54) then - px( 1)= 0.26264930117348429139290996151281804D-03 - px( 2)= 0.13868906738912660879236605780496720D-02 - px( 3)= 0.34218609389365101881877496771598208D-02 - px( 4)= 0.63896402666404078488978957960735342D-02 - px( 5)= 0.10322496845387460828947292469844905D-01 - px( 6)= 0.15264338738352449380895430335814458D-01 - px( 7)= 0.21272256455056810596723516130341971D-01 - px( 8)= 0.28418500058997028295178690126035519D-01 - px( 9)= 0.36792971921502915811121828397263107D-01 - px(10)= 0.46506297013822420204383294802715589D-01 - px(11)= 0.57693520466708188814704660142883650D-01 - px(12)= 0.70518461365129452425223403139712676D-01 - px(13)= 0.85178722402024364366080809274553805D-01 - px(14)= 0.10191132396313916367597436543864246D+00 - px(15)= 0.12099891148146303511007328280200670D+00 - px(16)= 0.14277649208960370764126858545635037D+00 - px(17)= 0.16763870128060588007507633768412165D+00 - px(18)= 0.19604767989373694212165892917330544D+00 - px(19)= 0.22854173844903711798992185084177464D+00 - px(20)= 0.26574507414867220998786541508194950D+00 - px(21)= 0.30837886575038189865518112878881341D+00 - px(22)= 0.35727409810156122358477142059464743D+00 - px(23)= 0.41338647141242328011007195602712556D+00 - px(24)= 0.47781374735909608618161123128437916D+00 - px(25)= 0.55181589013188334829869817262449587D+00 - px(26)= 0.63683838468354892203676383796234712D+00 - px(27)= 0.73453915990617498625433330235628987D+00 - px(28)= 0.84681961134902351472977037719948889D+00 - px(29)= 0.97586030619425103922072317930864173D+00 - px(30)= 0.11241620640819289778534857523108689D+01 - px(31)= 0.12945932455227004001780815602771556D+01 - px(32)= 0.14904442534576446040956072788546865D+01 - px(33)= 0.17154904765677712078499252639933687D+01 - px(34)= 0.19740651959466252026766816200100743D+01 - px(35)= 0.22711443710983615908928413364407126D+01 - px(36)= 0.26124457644974208978288838251819154D+01 - px(37)= 0.30045456290378285221329470833486270D+01 - px(38)= 0.34550172827009750892780305838794194D+01 - px(39)= 0.39725975078115629937328151923773107D+01 - px(40)= 0.45673891239659119753724152909459628D+01 - px(41)= 0.52511117619579365605498903719507942D+01 - px(42)= 0.60374185967841005869825918871706947D+01 - px(43)= 0.69423059418930191122711841795147301D+01 - px(44)= 0.79846576055279832356713682899398131D+01 - px(45)= 0.91869913294368492995853064476380612D+01 - px(46)= 0.10576519438659101773107722114056823D+02 - px(47)= 0.12186718685254033866362337537527451D+02 - px(48)= 0.14059766727898227283287996455084238D+02 - px(49)= 0.16250545334714196738451761057164415D+02 - px(50)= 0.18833703150856317701270861469808901D+02 - px(51)= 0.21917341606064316865260155485126217D+02 - px(52)= 0.25673282302478359060862571209297198D+02 - px(53)= 0.30419275213506007788817412536110986D+02 - px(54)= 0.36946729449797287732704130194371723D+02 - pw( 1)= 0.67438506760335604670255898997892204D-03 - pw( 2)= 0.15766730795758068292367252096595575D-02 - pw( 3)= 0.24969010052497357618191188751223522D-02 - pw( 4)= 0.34440151585437669876044394575278031D-02 - pw( 5)= 0.44289906416819842278218395665719176D-02 - pw( 6)= 0.54641754472827296169646638159218540D-02 - pw( 7)= 0.65636602656678083773300472655876417D-02 - pw( 8)= 0.77437596216345299454674361542457447D-02 - pw( 9)= 0.90235600454359303552602178356724465D-02 - pw(10)= 0.10425525098028000202217706397525802D-01 - pw(11)= 0.11976141415278903324803398081755125D-01 - pw(12)= 0.13706580129463991816022845593243222D-01 - pw(13)= 0.15653341765432086164765180782288484D-01 - pw(14)= 0.17858856637993859978963097726835278D-01 - pw(15)= 0.20372031993532375434976430093094102D-01 - pw(16)= 0.23248770947110602095264163700609205D-01 - pw(17)= 0.26552527146765384924391807859062013D-01 - pw(18)= 0.30354987338193146683949319082938468D-01 - pw(19)= 0.34736978572221172926784909895879575D-01 - pw(20)= 0.39789676392153673565656352708108709D-01 - pw(21)= 0.45616156632160377923066059864692116D-01 - pw(22)= 0.52333303181858472447283586460111768D-01 - pw(23)= 0.60074068936523280823710913114576385D-01 - pw(24)= 0.68990089360969502777474092821797731D-01 - pw(25)= 0.79254662793149992222714102849062817D-01 - pw(26)= 0.91066132125499050058268572358948574D-01 - pw(27)= 0.10465172413589772775444532528313942D+00 - pw(28)= 0.12027192389960804141840562034167157D+00 - pw(29)= 0.13822548329913942877038198233392259D+00 - pw(30)= 0.15885518710020893698686679081438881D+00 - pw(31)= 0.18255453080015552034691247423767820D+00 - pw(32)= 0.20977550590982571093431208426850228D+00 - pw(33)= 0.24103774649612254567565390373246475D+00 - pw(34)= 0.27693937431815337409878200394680401D+00 - pw(35)= 0.31817000170668241587144774061721975D+00 - pw(36)= 0.36552653149467718606906358503261169D+00 - pw(37)= 0.41993266332639492829188554992634159D+00 - pw(38)= 0.48246342603227571148223509952223773D+00 - pw(39)= 0.55437668888757955166034857478955110D+00 - pw(40)= 0.63715459820483206603034925190427611D+00 - pw(41)= 0.73255947628580611457525194256778522D+00 - pw(42)= 0.84271132620425470830602701627255624D+00 - pw(43)= 0.97019847802255398271823872088809738D+00 - pw(44)= 0.11182405648635278467279403261340421D+01 - pw(45)= 0.12909368955309583178279432436240182D+01 - pw(46)= 0.14936596913948188663729129981816013D+01 - pw(47)= 0.17337048448489904526596452227676677D+01 - pw(48)= 0.20214277264670434357889736680445585D+01 - pw(49)= 0.23723618064324051170579931680799604D+01 - pw(50)= 0.28115262822357354520836275890201874D+01 - pw(51)= 0.33832701615264074051804266553052254D+01 - pw(52)= 0.41778930411553237976334045242367218D+01 - pw(53)= 0.54257303561036728308093456793849096D+01 - pw(54)= 0.80580396891998440461864612470334815D+01 -endif -if(kn == 55) then - px( 1)= 0.25781894164194012434309377930977151D-03 - px( 2)= 0.13612751031594850206775668181403743D-02 - px( 3)= 0.33581698458852455723214904600493989D-02 - px( 4)= 0.62693690413467873882583513720977616D-02 - px( 5)= 0.10125310136665687971355854015542415D-01 - px( 6)= 0.14967346351298576942771412456996075D-01 - px( 7)= 0.20849147888131304033053596047200679D-01 - px( 8)= 0.27838492572990899694403556585128016D-01 - px( 9)= 0.36019520279673976527480271121267550D-01 - px(10)= 0.45495507499439646885138144349015201D-01 - px(11)= 0.56392208694907062019327988961512485D-01 - px(12)= 0.68861794220674272839206633722494427D-01 - px(13)= 0.83087389784370585294971502938757785D-01 - px(14)= 0.99288194535557030324060332733237417D-01 - px(15)= 0.11772513411433556959001042015562167D+00 - px(16)= 0.13870700468575135671370372276360989D+00 - px(17)= 0.16259709487800222734179448321576442D+00 - px(18)= 0.18982033572772041366926851284912359D+00 - px(19)= 0.22087111215348614429940107012361562D+00 - px(20)= 0.25632195206452177680013231303976781D+00 - px(21)= 0.29683337125272468206259039405023979D+00 - px(22)= 0.34316518501958180617486163242847902D+00 - px(23)= 0.39618960592837583486273888758703435D+00 - px(24)= 0.45690644457913117767731277056887464D+00 - px(25)= 0.52646073119585672648955063711186170D+00 - px(26)= 0.60616309005755762712921556869139875D+00 - px(27)= 0.69751323092341496562260118366147217D+00 - px(28)= 0.80222697243582393516449526286850739D+00 - px(29)= 0.92226728168578278287496843309443837D+00 - px(30)= 0.10598799021605760545446954338407357D+01 - px(31)= 0.12176342516911872309482334694729854D+01 - px(32)= 0.13984704081792695653834769855727069D+01 - px(33)= 0.16057531729310064020228982054108760D+01 - px(34)= 0.18433344234846220454903450952844757D+01 - px(35)= 0.21156252611660201772094982169604363D+01 - px(36)= 0.24276798551755632926179359953601148D+01 - px(37)= 0.27852934330452205314765679671405731D+01 - px(38)= 0.31951176405057028717258322130425570D+01 - px(39)= 0.36647976062397870566073557102018905D+01 - px(40)= 0.42031366779907243753664331178612278D+01 - px(41)= 0.48202972331234718143106561639280279D+01 - px(42)= 0.55280496786915488679928146699422870D+01 - px(43)= 0.63400875319084531166198158934082316D+01 - px(44)= 0.72724356766190935880980645516629891D+01 - px(45)= 0.83439939737765991151533907320407321D+01 - px(46)= 0.95772839454414406682247983833388953D+01 - px(47)= 0.10999511253155353900722704602341888D+02 - px(48)= 0.12644139881765944224874013113714956D+02 - px(49)= 0.14553337047736742937007375682160668D+02 - px(50)= 0.16781991883653111960448246164544552D+02 - px(51)= 0.19404807001908978674636447631480543D+02 - px(52)= 0.22530041649957455873120088574563768D+02 - px(53)= 0.26329908156600549662558562174411129D+02 - px(54)= 0.31123139662609753954221361691834122D+02 - px(55)= 0.37704208661211514525413812039019937D+02 - pw( 1)= 0.66197010017150176035001943503791612D-03 - pw( 2)= 0.15473986838973462236228078334406893D-02 - pw( 3)= 0.24498237063923531340555158367628164D-02 - pw( 4)= 0.33776287859319922812719456819175282D-02 - pw( 5)= 0.43411225775038708068974684768107364D-02 - pw( 6)= 0.53518651954260805354105863317050609D-02 - pw( 7)= 0.64229967432330231179415055321225939D-02 - pw( 8)= 0.75696698103592431759756449888418760D-02 - pw( 9)= 0.88095427367590507023412664390713554D-02 - pw(10)= 0.10163325161348951569439668089332658D-01 - pw(11)= 0.11655363240567140813674097334911855D-01 - pw(12)= 0.13314243348515058027067949270881268D-01 - pw(13)= 0.15173386635166955733192695121243376D-01 - pw(14)= 0.17271608028279645041513857207256865D-01 - pw(15)= 0.19653626901657341159725115198646941D-01 - pw(16)= 0.22370543498465587594423534082963911D-01 - pw(17)= 0.25480329057857957803698239091828550D-01 - pw(18)= 0.29048405984491612079844523910580431D-01 - pw(19)= 0.33148404854392620654285014862228099D-01 - pw(20)= 0.37863173170688566168679168381831541D-01 - pw(21)= 0.43286083546221674152033551748574709D-01 - pw(22)= 0.49522660152701866580552473522264330D-01 - pw(23)= 0.56692523854953065740425656620598762D-01 - pw(24)= 0.64931653102898776070657891627893174D-01 - pw(25)= 0.74394966967471873327666467149784361D-01 - pw(26)= 0.85259253137910454276787766290602372D-01 - pw(27)= 0.97726482395605256876361711073457773D-01 - pw(28)= 0.11202756962445724523518796251371951D+00 - pw(29)= 0.12842665975742201914453661139337216D+00 - pw(30)= 0.14722603673926230968313687924515801D+00 - pw(31)= 0.16877177710790063461389831494800560D+00 - pw(32)= 0.19346030041623174522759333491248744D+00 - pw(33)= 0.22174601068744206398277053899636196D+00 - pw(34)= 0.25415028231904386989193251104458640D+00 - pw(35)= 0.29127212894662705701804000206750184D+00 - pw(36)= 0.33380101778748297671825708760982179D+00 - pw(37)= 0.38253247505964083905642476699352139D+00 - pw(38)= 0.43838740189609651989190850364739389D+00 - pw(39)= 0.50243643541811971842045067743697266D+00 - pw(40)= 0.57593132895181935261643585397322053D+00 - pw(41)= 0.66034632680141149455034526996759720D+00 - pw(42)= 0.75743410948623930877746056875550536D+00 - pw(43)= 0.86930350487407763483137649785144665D+00 - pw(44)= 0.99853057127864209173395623916848905D+00 - pw(45)= 0.11483223403291556759761625679375804D+01 - pw(46)= 0.13227664361633706905774477225292859D+01 - pw(47)= 0.15272262858299299746420952422140745D+01 - pw(48)= 0.17689950292216856169402049903686857D+01 - pw(49)= 0.20584365667376688423615789562990280D+01 - pw(50)= 0.24111135465716154769717216299933567D+01 - pw(51)= 0.28521135641444982391089026540840319D+01 - pw(52)= 0.34259355468040848187361378226527105D+01 - pw(53)= 0.42232267265008794416077388243344182D+01 - pw(54)= 0.54752904412621487070917591943555763D+01 - pw(55)= 0.81175842524558588852537391429064074D+01 -endif -if(kn == 56) then - px( 1)= 0.25316296361047986979222704481695825D-03 - px( 2)= 0.13365901075979378376059799633963547D-02 - px( 3)= 0.32968187867088652972746128579161298D-02 - px( 4)= 0.61535889280423947494589016534393146D-02 - px( 5)= 0.99356438145931069871255487366443905D-02 - px( 6)= 0.14681977810167156277573501598213788D-01 - px( 7)= 0.20443111678193472909091734497338465D-01 - px( 8)= 0.27282717458052044439797443722558727D-01 - px( 9)= 0.35279661237909244138393562138044293D-01 - px(10)= 0.44530515113621224893066466054989604D-01 - px(11)= 0.55152581833421451048846493819641517D-01 - px(12)= 0.67287461982589797279652325494947566D-01 - px(13)= 0.81105172698463510127775615113455155D-01 - px(14)= 0.96808802192712951405093425956382214D-01 - px(15)= 0.11463966370426778674305108921559686D+00 - px(16)= 0.13488290707039924587391549901601733D+00 - px(17)= 0.15787356616502069558695735577341045D+00 - px(18)= 0.18400306948311470201893098688372094D+00 - px(19)= 0.21372631172205091315476055948506776D+00 - px(20)= 0.24756945964825417876144186941921348D+00 - px(21)= 0.28613872754179262008712244069824825D+00 - px(22)= 0.33013039508573925156714087895320715D+00 - px(23)= 0.38034235430595768392206349401494352D+00 - px(24)= 0.43768747182717257038476928803614522D+00 - px(25)= 0.50320905100551354990170632643254963D+00 - px(26)= 0.57809868566859133942855647933251305D+00 - px(27)= 0.66371681885395609603081291900385610D+00 - px(28)= 0.76161635782113158520389067408690172D+00 - px(29)= 0.87356975058892174416448481275277481D+00 - px(30)= 0.10015999992219932485646137689436440D+01 - px(31)= 0.11480161722880751230648197166792158D+01 - px(32)= 0.13154540866711925408169656894558134D+01 - px(33)= 0.15069229633419957830339656064577451D+01 - px(34)= 0.17258590324121035302982461790011709D+01 - px(35)= 0.19761872844199233411044622815807802D+01 - px(36)= 0.22623928590392104330658026022941003D+01 - px(37)= 0.25896039617088162412729887555211206D+01 - px(38)= 0.29636887519045083121995707405502705D+01 - px(39)= 0.33913694283389338916129064504517107D+01 - px(40)= 0.38803578612346352983211862453492671D+01 - px(41)= 0.44395187703404766581147350250334258D+01 - px(42)= 0.50790689079212912344545291525845662D+01 - px(43)= 0.58108244499131239038620507188960897D+01 - px(44)= 0.66485146168191270558987216454001789D+01 - px(45)= 0.76081888077726672589265324613407006D+01 - px(46)= 0.87087596930040374091019857045701857D+01 - px(47)= 0.99727503684543062151028368853863376D+01 - px(48)= 0.11427358864753300284154140246256655D+02 - px(49)= 0.13106036822460464759217266858454354D+02 - px(50)= 0.15050942893772414274408031537252686D+02 - px(51)= 0.17316976836533264840463009058352564D+02 - px(52)= 0.19978899582721245916009884455806908D+02 - px(53)= 0.23145132959066649402957025145184967D+02 - px(54)= 0.26988283646585129742188842153188599D+02 - px(55)= 0.31828066291287360025547176837209814D+02 - px(56)= 0.38461992963555384952401394913962473D+02 - pw( 1)= 0.65000398579028612344523107765589882D-03 - pw( 2)= 0.15191959531332206241117313092588598D-02 - pw( 3)= 0.24045082453807306790230685497814259D-02 - pw( 4)= 0.33138053120687689905848031561566419D-02 - pw( 5)= 0.42567825727886822265204218213429885D-02 - pw( 6)= 0.52442786039205566768969009014580993D-02 - pw( 7)= 0.62885672143814305898041140071920990D-02 - pw( 8)= 0.74037469337422375540169840316881048D-02 - pw( 9)= 0.86061856387838724696966695518776445D-02 - pw(10)= 0.99150131408851259539215604893253718D-02 - pw(11)= 0.11352651712245621514495559004367745D-01 - pw(12)= 0.12945367107597103270871461076597819D-01 - pw(13)= 0.14723816350026995714384635716386299D-01 - pw(14)= 0.16723567921281172025119870508991203D-01 - pw(15)= 0.18985579339702223096072943965050960D-01 - pw(16)= 0.21556638092713088509079450535846802D-01 - pw(17)= 0.24489800737865776493594940653820355D-01 - pw(18)= 0.27844892154300886517220254607831059D-01 - pw(19)= 0.31689141187091977270960955143508341D-01 - pw(20)= 0.36098024102012043921594821940947720D-01 - pw(21)= 0.41156366531808391430233301214909888D-01 - pw(22)= 0.46959728529751547723837911521797776D-01 - pw(23)= 0.53616077334483042468992867566071648D-01 - pw(24)= 0.61247744777900280040241146591130434D-01 - pw(25)= 0.69993670734405775857440933512557351D-01 - pw(26)= 0.80011946473114843210018552446024989D-01 - pw(27)= 0.91482687623385083454637087474649108D-01 - pw(28)= 0.10461128275712219242263696233465467D+00 - pw(29)= 0.11963207950030344698937570198730592D+00 - pw(30)= 0.13681258639855890430946376814155933D+00 - pw(31)= 0.15645828725394505928296678986188780D+00 - pw(32)= 0.17891818774495993299205732107037433D+00 - pw(33)= 0.20459124493705478925339465025762632D+00 - pw(34)= 0.23393387292296502681950858384393993D+00 - pw(35)= 0.26746877815837016930288816174815801D+00 - pw(36)= 0.30579546472966641872754674677137451D+00 - pw(37)= 0.34960287592590148622876766322786034D+00 - pw(38)= 0.39968482435244330897498051519252302D+00 - pw(39)= 0.45695914023983362115797429164759424D+00 - pw(40)= 0.52249188739719558018523893289646747D+00 - pw(41)= 0.59752864128661802876559766037133258D+00 - pw(42)= 0.68353583227497725169589493456556878D+00 - pw(43)= 0.78225676673770165676669761907089189D+00 - pw(44)= 0.89578957040518890914540096033762699D+00 - pw(45)= 0.10266987265790718339050961200134187D+01 - pw(46)= 0.11781795916828648269313771182072094D+01 - pw(47)= 0.13543092491251640404184331203718613D+01 - pw(48)= 0.15604436574176829660651085498995850D+01 - pw(49)= 0.18038746446191042006169647129755746D+01 - pw(50)= 0.20949760879837258079028998605170623D+01 - pw(51)= 0.24493410971001376006947638889013500D+01 - pw(52)= 0.28921265037563651103311910556760302D+01 - pw(53)= 0.34679810059374950896183058369361721D+01 - pw(54)= 0.42678969880522956032021408252168488D+01 - pw(55)= 0.55241361350341697933563283440847313D+01 - pw(56)= 0.81763098024027352046055737635386992D+01 -endif -if(kn == 57) then - px( 1)= 0.24868815335455860789772463829278776D-03 - px( 2)= 0.13128708050967756872359979837581267D-02 - px( 3)= 0.32378911411275754242453920014506796D-02 - px( 4)= 0.60424464389102377765134145508034116D-02 - px( 5)= 0.97537140744139523885808398942622953D-02 - px( 6)= 0.14408512776948249945597359956767419D-01 - px( 7)= 0.20054465803924087173957630667856565D-01 - px( 8)= 0.26751478412571506944557766303264093D-01 - px( 9)= 0.34573592603908669936189665703461548D-01 - px(10)= 0.43611265799596486187161513111221049D-01 - px(11)= 0.53974113832253153451922084032312655D-01 - px(12)= 0.65794147604388044446693612420778637D-01 - px(13)= 0.79229515286929499370440341287450519D-01 - px(14)= 0.94468740236461490024906772241244278D-01 - px(15)= 0.11173542497728950196695294189711635D+00 - px(16)= 0.13129338283356472153949413479135588D+00 - px(17)= 0.15345217055234222351399145423334369D+00 - px(18)= 0.17857303240016059517363555848698504D+00 - px(19)= 0.20707532489301037480311227730726369D+00 - px(20)= 0.23944355888568364571611377084388653D+00 - px(21)= 0.27623525598764641501713800314236035D+00 - px(22)= 0.31808985711331926597193028676339348D+00 - px(23)= 0.36573893965362420393418300064924317D+00 - px(24)= 0.42001800250816678509315665005653842D+00 - px(25)= 0.48188007571772045658000477338572683D+00 - px(26)= 0.55241141366947723712332135298933352D+00 - px(27)= 0.63284954422789175567060868091083275D+00 - px(28)= 0.72460397335389008193055638126809504D+00 - px(29)= 0.82927988618893103652768340546531032D+00 - px(30)= 0.94870524097232991512519332944115950D+00 - px(31)= 0.10849617219948828718866932170405878D+01 - px(32)= 0.12404201040143688064542721435857076D+01 - px(33)= 0.14177806871234317057617629235375845D+01 - px(34)= 0.16201195944991234133183611776590787D+01 - px(35)= 0.18509418957597958737177594237457576D+01 - px(36)= 0.21142427406807205051181553190404025D+01 - px(37)= 0.24145779838341929681853920899859986D+01 - px(38)= 0.27571461831926801287738694538112624D+01 - px(39)= 0.31478844142641227801141212544520951D+01 - px(40)= 0.35935811310102033845744816463442230D+01 - px(41)= 0.41020104413877221957804467425939289D+01 - px(42)= 0.46820938302391253474628239056559849D+01 - px(43)= 0.53440978451440606526835982001965101D+01 - px(44)= 0.60998800348958771477889866745643881D+01 - px(45)= 0.69632012890426281929489833882602384D+01 - px(46)= 0.79501320435284778078392913107490947D+01 - px(47)= 0.90795950558462101790938546852706470D+01 - px(48)= 0.10374113225206633587054123726355172D+02 - px(49)= 0.11860876304821553920803819021800250D+02 - px(50)= 0.13573324197835825157192895761042269D+02 - px(51)= 0.15553608911071130591094968547049722D+02 - px(52)= 0.17856644009398398334027651945364308D+02 - px(53)= 0.20557252972621646881571546529989799D+02 - px(54)= 0.23764024975150934121434317734353805D+02 - px(55)= 0.27649965226105594436613117812938989D+02 - px(56)= 0.32535770450738589606101398766605691D+02 - px(57)= 0.39221977092927801982893352173282336D+02 - pw( 1)= 0.63850406831505099738052476325581961D-03 - pw( 2)= 0.14921037986204782494894229786059731D-02 - pw( 3)= 0.23610114029305975624758426308210372D-02 - pw( 4)= 0.32526128541318438561754430100405078D-02 - pw( 5)= 0.41760397770836242676248541207424580D-02 - pw( 6)= 0.51414703399848581702756105177606384D-02 - pw( 7)= 0.61603891216042467763280301070012439D-02 - pw( 8)= 0.72459385496994110627368996655782062D-02 - pw( 9)= 0.84133204261124725154997593044037002D-02 - pw(10)= 0.96802413566072434418561861828501851D-02 - pw(11)= 0.11067394131957919015201993364016383D-01 - pw(12)= 0.12598960733898038415484720369114255D-01 - pw(13)= 0.14303116690973784728070109566032249D-01 - pw(14)= 0.16212514694032192842344075317239116D-01 - pw(15)= 0.18364731480288355645565506966903663D-01 - pw(16)= 0.20802678116001007719911702716244623D-01 - pw(17)= 0.23574997949263392825181094032890825D-01 - pw(18)= 0.26736501608313518430082020410358951D-01 - pw(19)= 0.30348704821972667356360038317766333D-01 - pw(20)= 0.34480535573107543829081934087773551D-01 - pw(21)= 0.39209262449594068155056630028607624D-01 - pw(22)= 0.44621673558809258382775767008026743D-01 - pw(23)= 0.50815515123079612189194336850658426D-01 - pw(24)= 0.57901188015516540741960287519003280D-01 - pw(25)= 0.66003700690181420273674403867109883D-01 - pw(26)= 0.75264885586780504125481386653660311D-01 - pw(27)= 0.85845899110248805481309245561278484D-01 - pw(28)= 0.97930039537017286368934956545788095D-01 - pw(29)= 0.11172593126446497910307506265827343D+00 - pw(30)= 0.12747113783580932628750351291841410D+00 - pw(31)= 0.14543628129357976189998302298577329D+00 - pw(32)= 0.16592976340385132601117576239709225D+00 - pw(33)= 0.18930320741347600799686592812399379D+00 - pw(34)= 0.21595777019635163082254656264589623D+00 - pw(35)= 0.24635151796681218166031807821910603D+00 - pw(36)= 0.28100812005137097745126867432418044D+00 - pw(37)= 0.32052720324236309080623061527301095D+00 - pw(38)= 0.36559683724134186969654741198617812D+00 - pw(39)= 0.41700881002095348086483260374822063D+00 - pw(40)= 0.47567763253365164936264867132129384D+00 - pw(41)= 0.54266463607013432598384544728159736D+00 - pw(42)= 0.61920917583183104118650977432015768D+00 - pw(43)= 0.70676996963437778493319526328106689D+00 - pw(44)= 0.80708121914722438188398403384545040D+00 - pw(45)= 0.92223080476089922545257576125601390D+00 - pw(46)= 0.10547722906765838946076024117162519D+01 - pw(47)= 0.12078902142342952742295563718452734D+01 - pw(48)= 0.13856521611695241681852077225258712D+01 - pw(49)= 0.15934078194682780100598346033527563D+01 - pw(50)= 0.18384490024798631769423787694389017D+01 - pw(51)= 0.21311608735825934018647767200886737D+01 - pw(52)= 0.24871681932345592025950623054739285D+01 - pw(53)= 0.29316978875123326489119117358318389D+01 - pw(54)= 0.35095485536224900059615141531182325D+01 - pw(55)= 0.43120561559297553662108510075135951D+01 - pw(56)= 0.55724340409598520094151224510004002D+01 - pw(57)= 0.82344141971751154156256216589492573D+01 -endif -if(kn == 58) then - px( 1)= 0.24434033394451184831332088274972614D-03 - px( 2)= 0.12898297855758084794990735012824795D-02 - px( 3)= 0.31806717757901164181366233077269143D-02 - px( 4)= 0.59345896357739671411939250656689172D-02 - px( 5)= 0.95773007935647589740131510133326933D-02 - px( 6)= 0.14143599916031750336288464659818307D-01 - px( 7)= 0.19678421471099466334328004427638463D-01 - px( 8)= 0.26238185206866063929609421458766724D-01 - px( 9)= 0.33892482730395244265993385655932016D-01 - px(10)= 0.42726147636423146495787112568669502D-01 - px(11)= 0.52841746547136186264664886791106024D-01 - px(12)= 0.64362519550464307152961734809637400D-01 - px(13)= 0.77435783944099717432445083945677444D-01 - px(14)= 0.92236796334736021064525541096205779D-01 - px(15)= 0.10897304960833762497368263739776640D+00 - px(16)= 0.12788897058127535161532867316020021D+00 - px(17)= 0.14927098966869470910429970787632708D+00 - px(18)= 0.17345298118495822795560109857844673D+00 - px(19)= 0.20082212068477187733619503336787526D+00 - px(20)= 0.23182526492157509727427115110610957D+00 - px(21)= 0.26697601677385238610408934208835716D+00 - px(22)= 0.30686267976111180750915583977482233D+00 - px(23)= 0.35215732947079012938113786105805915D+00 - px(24)= 0.40362623554087235036623549904310772D+00 - px(25)= 0.46214186627722416843651050017300999D+00 - px(26)= 0.52869670779772405928275015906498196D+00 - px(27)= 0.60441913758500942132872353014906915D+00 - px(28)= 0.69059161190173186977923080505129435D+00 - px(29)= 0.78867145852021566336079167775472318D+00 - px(30)= 0.90031461037842010940804732183197484D+00 - px(31)= 0.10274026720013124888351576363117054D+01 - px(32)= 0.11720737797358867139870870772361403D+01 - px(33)= 0.13367578014779413237552856365102921D+01 - px(34)= 0.15242165260123579138727083350791305D+01 - px(35)= 0.17375896232753198202855353912673167D+01 - px(36)= 0.19804473251817493601350778629165516D+01 - px(37)= 0.22568509975250307614430520806868566D+01 - px(38)= 0.25714230695306034625372310347575688D+01 - px(39)= 0.29294281925088078526353597436284922D+01 - px(40)= 0.33368680628505624786084197643705525D+01 - px(41)= 0.38005931431891197468681483744664603D+01 - px(42)= 0.43284356654318541562597981524559705D+01 - px(43)= 0.49293699827199698643746537542247355D+01 - px(44)= 0.56137088447382837752405108085311518D+01 - px(45)= 0.63933479763797142778167198658680473D+01 - px(46)= 0.72820772388702724076103924229296585D+01 - px(47)= 0.82959860217191960273479806666773572D+01 - px(48)= 0.94540058246887833757920831704609262D+01 - px(49)= 0.10778658868884060967669218732823142D+02 - px(50)= 0.12297127123681394967235226888868496D+02 - px(51)= 0.14042840296906789602475373898648457D+02 - px(52)= 0.16057946341424185599637733181299734D+02 - px(53)= 0.18397376144609008660129440553886667D+02 - px(54)= 0.21136019829441359432294268771943368D+02 - px(55)= 0.24382637830465314053970048093781607D+02 - px(56)= 0.28310635883229105824662724848457888D+02 - px(57)= 0.33241688156491882652244516947157465D+02 - px(58)= 0.39979324854730350560380760310991567D+02 - pw( 1)= 0.62733109048409588703595816867276199D-03 - pw( 2)= 0.14657936036557848053358560746138226D-02 - pw( 3)= 0.23188039412803108240913072658971970D-02 - pw( 4)= 0.31933033503497732665912080929284740D-02 - pw( 5)= 0.40979006451946182384357721499095629D-02 - pw( 6)= 0.50421643688026695837303498584333168D-02 - pw( 7)= 0.60368540232012649771110859344923619D-02 - pw( 8)= 0.70942376382161779284891095459836907D-02 - pw( 9)= 0.82284548942576416691892667668446138D-02 - pw(10)= 0.94559203825706345816982076964964040D-02 - pw(11)= 0.10795760712764688238988523701079555D-01 - pw(12)= 0.12270273749594030379848145803872545D-01 - pw(13)= 0.13905392767905254997074541953428596D-01 - pw(14)= 0.15731135789435791324224040917880572D-01 - pw(15)= 0.17782024053941809054144110879593548D-01 - pw(16)= 0.20097465728382493095345225347477278D-01 - pw(17)= 0.22722120795621834323838288686748639D-01 - pw(18)= 0.25706285556264310174964040630928441D-01 - pw(19)= 0.29106352315788903005352971189456581D-01 - pw(20)= 0.32985404610440883148326461811537831D-01 - pw(21)= 0.37413999049710482757623335110546207D-01 - pw(22)= 0.42471166374810272031277917315265662D-01 - pw(23)= 0.48245645122530704885901911499992297D-01 - pw(24)= 0.54837348649234653699497897577026759D-01 - pw(25)= 0.62359063229977042722185054531332040D-01 - pw(26)= 0.70938380290334218998361325437647613D-01 - pw(27)= 0.80719876243549163356586986213036934D-01 - pw(28)= 0.91867565718926819665093909060870324D-01 - pw(29)= 0.10456766632576442199326583277715739D+00 - pw(30)= 0.11903172509910440103985964305294291D+00 - pw(31)= 0.13550016907993941758980848856857065D+00 - pw(32)= 0.15424635639536902570179058036488656D+00 - pw(33)= 0.17558122145590420153365262455970893D+00 - pw(34)= 0.19985863067967063227542616526431877D+00 - pw(35)= 0.22748159641691842637396351126311924D+00 - pw(36)= 0.25890954059680923306019616384762021D+00 - pw(37)= 0.29466686202428234212246962384893466D+00 - pw(38)= 0.33535315105334115324523894344163479D+00 - pw(39)= 0.38165552584862205474730474843490408D+00 - pw(40)= 0.43436375602075320325783351993460999D+00 - pw(41)= 0.49438912403001643077208212506747590D+00 - pw(42)= 0.56278840340808053436660241948913705D+00 - pw(43)= 0.64079498868817746739885701660482042D+00 - pw(44)= 0.72986023375748618090030528388862540D+00 - pw(45)= 0.83170968157393847753518524949795654D+00 - pw(46)= 0.94842152201009860199494152865552426D+00 - pw(47)= 0.10825390752513919957077425880259437D+01 - pw(48)= 0.12372368612987946539691608858506276D+01 - pw(49)= 0.14165738915071239610049524678978730D+01 - pw(50)= 0.16258946038408501735019290717589660D+01 - pw(51)= 0.18724918616166796018089411475069718D+01 - pw(52)= 0.21667631278657422335640585222239716D+01 - pw(53)= 0.25243656135025940850141001255242091D+01 - pw(54)= 0.29705966338517197699939016107637128D+01 - pw(55)= 0.35504038802739003258026682341550974D+01 - pw(56)= 0.43554634619204172726228503521716960D+01 - pw(57)= 0.56199288682781632030749398177962085D+01 - pw(58)= 0.82915991164603216241594990786246181D+01 -endif -if(kn == 59) then - px( 1)= 0.24017044279779292814629839617243450D-03 - px( 2)= 0.12677357311318245204340430050679257D-02 - px( 3)= 0.31258222629100185940076720172210997D-02 - px( 4)= 0.58312499497336295459920293987796347D-02 - px( 5)= 0.94083841957770971773603088185375410D-02 - px( 6)= 0.13890148671899779318807930205580027D-01 - px( 7)= 0.19318997801708323654202530335822085D-01 - px( 8)= 0.25748144263471581820366574897844328D-01 - px( 9)= 0.33243095406506599413902998194831696D-01 - px(10)= 0.41883537206821979634762367168491752D-01 - px(11)= 0.51765600230040784734413248495245234D-01 - px(12)= 0.63004534953194280528857501029401263D-01 - px(13)= 0.75737811688489614503254126944665394D-01 - px(14)= 0.90128644081599828866050645306435530D-01 - px(15)= 0.10636991817245456656640674998882425D+00 - px(16)= 0.12468849734796975917708256298381635D+00 - px(17)= 0.14534987449428730815870543082646967D+00 - px(18)= 0.16866316194781867947963273417414352D+00 - px(19)= 0.19498644807014926777480289949553731D+00 - px(20)= 0.22473260025151679743882657221256944D+00 - px(21)= 0.25837564665252656619516749971741362D+00 - px(22)= 0.29645791158221640514428954276611043D+00 - px(23)= 0.33959810551825596456611237045554791D+00 - px(24)= 0.38850058094912455808801515111809716D+00 - px(25)= 0.44396596557949377138507728624091020D+00 - px(26)= 0.50690338304849533824426898328468191D+00 - px(27)= 0.57834447492359249583873987633512907D+00 - px(28)= 0.65945945037149605268093152551109157D+00 - px(29)= 0.75157541311753698549281538653947915D+00 - px(30)= 0.85619724923675556880411715666490139D+00 - px(31)= 0.97503140385009140556694003085244755D+00 - px(32)= 0.11100129303358947930559321811557219D+01 - px(33)= 0.12633362636366230834317390982154572D+01 - px(34)= 0.14374902524376435821063430688457459D+01 - px(35)= 0.16352980880451859683554892839580029D+01 - px(36)= 0.18599628978969427131632789868222316D+01 - px(37)= 0.21151199396574527695286927569279424D+01 - px(38)= 0.24048965537486799801854034104807364D+01 - px(39)= 0.27339813311614543030519151016136162D+01 - px(40)= 0.31077043634013766751037593199231006D+01 - px(41)= 0.35321310127538594381872851430342547D+01 - px(42)= 0.40141724497033117837380156117688880D+01 - px(43)= 0.45617173671065324231220714157123560D+01 - px(44)= 0.51837909802502262421170872012101186D+01 - px(45)= 0.58907499495757681122761284245660513D+01 - px(46)= 0.66945256944244027786731357744463874D+01 - px(47)= 0.76089345000234897625689957165319158D+01 - px(48)= 0.86500822381865882984046085645261005D+01 - px(49)= 0.98369069052844445818188741380057179D+01 - px(50)= 0.11191928172141132463081500916511436D+02 - px(51)= 0.12742318869317979750127012420299866D+02 - px(52)= 0.14521497812382557030759980660881216D+02 - px(53)= 0.16571609002403235432729979108495388D+02 - px(54)= 0.18947601696583022289134197021834890D+02 - px(55)= 0.21724434819141199485567406823338980D+02 - px(56)= 0.25011043569653595241704228425344735D+02 - px(57)= 0.28981239539754668238717062448027987D+02 - px(58)= 0.33957682861442574052880069059391550D+02 - px(59)= 0.40746915059614258028591599305878563D+02 - pw( 1)= 0.61661580843002818474199774713820435D-03 - pw( 2)= 0.14405704070514471179034636856714158D-02 - pw( 3)= 0.22783669060606481294489644369246025D-02 - pw( 4)= 0.31365358110962922351769249715961996D-02 - pw( 5)= 0.40232040175153947001508790877931979D-02 - pw( 6)= 0.49473803193663214737587320055863858D-02 - pw( 7)= 0.59191612038299517714653169073172258D-02 - pw( 8)= 0.69500182956008661963671339567494311D-02 - pw( 9)= 0.80531272876872073780988243944927837D-02 - pw(10)= 0.92437335233207542919233267694815444D-02 - pw(11)= 0.10539549268603537847804050413706850D-01 - pw(12)= 0.11961173126551657822377551429583253D-01 - pw(13)= 0.13532517058206043238763114996607351D-01 - pw(14)= 0.15281223576152454708908655408040014D-01 - pw(15)= 0.17239057603729693052163344051819092D-01 - pw(16)= 0.19442266458961476989325558184455495D-01 - pw(17)= 0.21931917469546401944382556564398834D-01 - pw(18)= 0.24754242433746641427431268228835038D-01 - pw(19)= 0.27961035103797151420897993567315053D-01 - pw(20)= 0.31610155514745908523448651363558848D-01 - pw(21)= 0.35766190345276654301560678957468317D-01 - pw(22)= 0.40501304167252803397704165063681030D-01 - pw(23)= 0.45896299064297235092911388363815978D-01 - pw(24)= 0.52041886574768681501627601462919698D-01 - pw(25)= 0.59040170127350183777203679746281117D-01 - pw(26)= 0.67006338167425270345951219198202241D-01 - pw(27)= 0.76070575665505304542628473598135760D-01 - pw(28)= 0.86380211831507007368019890251542642D-01 - pw(29)= 0.98102132636666032695755579145966437D-01 - pw(30)= 0.11142549737101106673605436812360518D+00 - pw(31)= 0.12656480912918894407887994146661412D+00 - pw(32)= 0.14376340063800550321586640229070301D+00 - pw(33)= 0.16329741041010875411766070944591531D+00 - pw(34)= 0.18548034139204283612129505343431894D+00 - pw(35)= 0.21066831722133520869183148142544999D+00 - pw(36)= 0.23926618297652114852711313447455029D+00 - pw(37)= 0.27173464220061857023595757633130234D+00 - pw(38)= 0.30859868602838502541227655874922030D+00 - pw(39)= 0.35045766213824028933896241045941680D+00 - pw(40)= 0.39799746405898883695366965337609207D+00 - pw(41)= 0.45200551538659797325171404962194440D+00 - pw(42)= 0.51338951072222608042425293298227461D+00 - pw(43)= 0.58320130681121704145811176462102830D+00 - pw(44)= 0.66266801682533587367291708503855157D+00 - pw(45)= 0.75323338727944523148159051268884834D+00 - pw(46)= 0.85661417022141803759749168951905898D+00 - pw(47)= 0.97487886818484736512715176832767718D+00 - pw(48)= 0.11105607080548591676445817081025167D+01 - pw(49)= 0.12668244926674778382406916781590943D+01 - pw(50)= 0.14477211050240886139248358867709201D+01 - pw(51)= 0.16585903209293941982874152551002654D+01 - pw(52)= 0.19067267819120553533052269878705101D+01 - pw(53)= 0.22025411213234530205031685493395576D+01 - pw(54)= 0.25617240229328119031730477599209231D+01 - pw(55)= 0.30096444005352699081587070335073080D+01 - pw(56)= 0.35914005587611010508966597641663763D+01 - pw(57)= 0.43990110029151586972435889347654638D+01 - pw(58)= 0.56675740145564914236518411943414860D+01 - pw(59)= 0.83489730609712306624049954354151761D+01 -endif -end subroutine wts500 -end MODULE WTS500_MOD diff --git a/src/trans/sedrenames.txt b/src/trans/sedrenames.txt index e9122bd03..b1d9e9a37 100644 --- a/src/trans/sedrenames.txt +++ b/src/trans/sedrenames.txt @@ -141,5 +141,3 @@ s/VDTUV_MOD/VDTUV_MOD_VARIANTDESIGNATOR/g s/VDTUVAD_MOD/VDTUVAD_MOD_VARIANTDESIGNATOR/g s/VORDIV_TO_UV/VORDIV_TO_UV_VARIANTDESIGNATOR/g s/WRITE_LEGPOL_MOD/WRITE_LEGPOL_MOD_VARIANTDESIGNATOR/g -s/wts500_mod/wts500_mod_VARIANTDESIGNATOR/g -s/WTS500_MOD/WTS500_MOD_VARIANTDESIGNATOR/g From 4a508881603cc2e5836103e7a8757fa7acd234ac Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 12 Sep 2024 11:47:13 +0000 Subject: [PATCH 64/86] Move interpol_decomp_mod.F90 to common --- src/trans/common/CMakeLists.txt | 1 + src/trans/{cpu/algor => common/internal}/interpol_decomp_mod.F90 | 0 src/trans/sedrenames.txt | 1 - 3 files changed, 1 insertion(+), 1 deletion(-) rename src/trans/{cpu/algor => common/internal}/interpol_decomp_mod.F90 (100%) diff --git a/src/trans/common/CMakeLists.txt b/src/trans/common/CMakeLists.txt index 850609a3a..e1a2102a8 100644 --- a/src/trans/common/CMakeLists.txt +++ b/src/trans/common/CMakeLists.txt @@ -18,6 +18,7 @@ list( APPEND ectrans_common_src internal/cpledn_mod.F90 internal/field_split_mod.F90 internal/gawl_mod.F90 + internal/interpol_decomp_mod.F90 internal/sugaw_mod.F90 internal/supol_mod.F90 internal/supolf_mod.F90 diff --git a/src/trans/cpu/algor/interpol_decomp_mod.F90 b/src/trans/common/internal/interpol_decomp_mod.F90 similarity index 100% rename from src/trans/cpu/algor/interpol_decomp_mod.F90 rename to src/trans/common/internal/interpol_decomp_mod.F90 diff --git a/src/trans/sedrenames.txt b/src/trans/sedrenames.txt index b1d9e9a37..c33ad4ec4 100644 --- a/src/trans/sedrenames.txt +++ b/src/trans/sedrenames.txt @@ -52,7 +52,6 @@ s/GPNORM_TRANS_CTL_MOD/GPNORM_TRANS_CTL_MOD_VARIANTDESIGNATOR/g s/gpnorm_trans( *($|\(| |\*))/gpnorm_trans_VARIANTDESIGNATOR\1/g s/GPNORM_TRANS( *($|\(| |\*))/GPNORM_TRANS_VARIANTDESIGNATOR\1/g s/INIGPTR_MOD/INIGPTR_MOD_VARIANTDESIGNATOR/g -s/INTERPOL_DECOMP_MOD/INTERPOL_DECOMP_MOD_VARIANTDESIGNATOR/g s/INV_TRANS_CTL_MOD/INV_TRANS_CTL_MOD_VARIANTDESIGNATOR/g s/INV_TRANS_CTLAD_MOD/INV_TRANS_CTLAD_MOD_VARIANTDESIGNATOR/g s/inv_trans( *($|\(| |\*))/inv_trans_VARIANTDESIGNATOR\1/g From 5a2ff3d1feb5d7151967476d2be3b171512ed70f Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 12 Sep 2024 12:10:59 +0000 Subject: [PATCH 65/86] Move setup_geom_mod.F90 to common --- src/trans/common/CMakeLists.txt | 1 + .../internal/setup_geom_mod.F90 | 2 +- src/trans/gpu/internal/setup_geom_mod.F90 | 109 ------------------ src/trans/sedrenames.txt | 1 - 4 files changed, 2 insertions(+), 111 deletions(-) rename src/trans/{cpu => common}/internal/setup_geom_mod.F90 (98%) delete mode 100755 src/trans/gpu/internal/setup_geom_mod.F90 diff --git a/src/trans/common/CMakeLists.txt b/src/trans/common/CMakeLists.txt index e1a2102a8..362937d0d 100644 --- a/src/trans/common/CMakeLists.txt +++ b/src/trans/common/CMakeLists.txt @@ -34,6 +34,7 @@ list( APPEND ectrans_common_src internal/set2pe_mod.F90 internal/eq_regions_mod.F90 internal/pre_suleg_mod.F90 + internal/setup_geom_mod.F90 internal/shuffle_mod.F90 internal/sump_trans0_mod.F90 internal/sustaonl_mod.F90 diff --git a/src/trans/cpu/internal/setup_geom_mod.F90 b/src/trans/common/internal/setup_geom_mod.F90 similarity index 98% rename from src/trans/cpu/internal/setup_geom_mod.F90 rename to src/trans/common/internal/setup_geom_mod.F90 index 68de63ed1..bf1f9b779 100644 --- a/src/trans/cpu/internal/setup_geom_mod.F90 +++ b/src/trans/common/internal/setup_geom_mod.F90 @@ -12,7 +12,7 @@ MODULE SETUP_GEOM_MOD CONTAINS SUBROUTINE SETUP_GEOM -USE PARKIND1 ,ONLY : JPRD, JPIM +USE EC_PARKIND ,ONLY : JPRD, JPIM USE TPM_GEN ,ONLY : NOUT, NPRINTLEV USE TPM_DIM ,ONLY : R diff --git a/src/trans/gpu/internal/setup_geom_mod.F90 b/src/trans/gpu/internal/setup_geom_mod.F90 deleted file mode 100755 index 12d89af51..000000000 --- a/src/trans/gpu/internal/setup_geom_mod.F90 +++ /dev/null @@ -1,109 +0,0 @@ -! (C) Copyright 2000- ECMWF. -! (C) Copyright 2000- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE SETUP_GEOM_MOD -CONTAINS -SUBROUTINE SETUP_GEOM - -USE PARKIND1, ONLY: JPRD, JPIM -USE TPM_GEN, ONLY: NOUT, NPRINTLEV -USE TPM_DIM, ONLY: R -USE TPM_DISTR, ONLY: D -USE TPM_FIELDS, ONLY: F -USE TPM_GEOMETRY, ONLY: G -! - -IMPLICIT NONE - -REAL(KIND=JPRD) :: ZSQM2(R%NDGL) -INTEGER(KIND=JPIM) :: IDGLU(0:R%NSMAX,R%NDGNH) -INTEGER(KIND=JPIM) :: JGL,JM,NSMAXLIN - -LOGICAL :: LLP1,LLP2 - -! ------------------------------------------------------------------ - -IF(.NOT.D%LGRIDONLY) THEN - - LLP1 = NPRINTLEV>0 - LLP2 = NPRINTLEV>1 - - IF(LLP1) WRITE(NOUT,*) '=== ENTER ROUTINE SETUP_GEOM ===' - - ALLOCATE (G%NMEN(R%NDGL)) - IF(LLP2)WRITE(NOUT,9) 'G%NMEN ',SIZE(G%NMEN ),SHAPE(G%NMEN ) - - NSMAXLIN = R%NDGL-1 - IF (R%NSMAX>=NSMAXLIN .OR. .NOT. G%LREDUCED_GRID) THEN - ! linear or full grid - DO JGL=1,R%NDGL - G%NMEN(JGL) = MIN(R%NSMAX,(G%NLOEN(JGL)-1)/2) - ENDDO - ELSEIF (R%NSMAX>=R%NDGL*2/3-1) THEN - ! quadratic grid - ZSQM2(:) = 3*(NSMAXLIN-R%NSMAX)/R%NDGL*F%R1MU2(:) - G%NMEN(1) = MIN(R%NSMAX,INT(REAL(G%NLOEN(1)-1,JPRD)/(2.0_JPRD+ZSQM2(1)))) - DO JGL=2,R%NDGNH - G%NMEN(JGL) = MIN(R%NSMAX,MAX(G%NMEN(JGL-1),& - &INT(REAL(G%NLOEN(JGL)-1,JPRD)/(2.0_JPRD+ ZSQM2(JGL))))) - ENDDO - ! * SOUTHERN HEMISPHERE : - G%NMEN(R%NDGL) = MIN(R%NSMAX,INT(REAL(G%NLOEN(R%NDGL)-1,JPRD)/(2.0_JPRD+ZSQM2(R%NDGL)))) - DO JGL=R%NDGL-1, R%NDGNH+1, -1 - G%NMEN(JGL) = MIN(R%NSMAX,MAX(G%NMEN(JGL+1),& - &INT(REAL(G%NLOEN(JGL)-1,JPRD)/(2.0_JPRD+ ZSQM2(JGL))))) - ENDDO - ELSE - ! cubic grid - ZSQM2(:) = F%R1MU2(:) - G%NMEN(1) = MIN(R%NSMAX,INT(REAL(G%NLOEN(1)-1,JPRD)/(2.0_JPRD+ZSQM2(1)))-1) - DO JGL=2,R%NDGNH - G%NMEN(JGL) = MIN(R%NSMAX,MAX(G%NMEN(JGL-1),& - &INT(REAL(G%NLOEN(JGL)-1,JPRD)/(2.0_JPRD+ ZSQM2(JGL)))-1)) - ENDDO - ! * SOUTHERN HEMISPHERE : - G%NMEN(R%NDGL) = MIN(R%NSMAX,INT(REAL(G%NLOEN(R%NDGL)-1,JPRD)/(2.0_JPRD+ZSQM2(R%NDGL)))-1) - DO JGL=R%NDGL-1, R%NDGNH+1, -1 - G%NMEN(JGL) = MIN(R%NSMAX,MAX(G%NMEN(JGL+1),& - &INT(REAL(G%NLOEN(JGL)-1,JPRD)/(2.0_JPRD+ ZSQM2(JGL)))-1)) - ENDDO - ENDIF - IF(LLP1) THEN - WRITE(NOUT,FMT='('' (JGL,G%NLOEN,G%NMEN) '')') - WRITE(NOUT,FMT='(8(1X,''('',I4,I4,I4,'')''))')& - &(JGL,G%NLOEN(JGL),G%NMEN(JGL),JGL=1,R%NDGL) - ENDIF - ALLOCATE(G%NDGLU(0:R%NSMAX)) - IF(LLP2)WRITE(NOUT,9) 'G%NDGLU ',SIZE(G%NDGLU ),SHAPE(G%NDGLU ) - IDGLU(:,:) = 0 - G%NDGLU(:) = 0 - DO JGL=1,R%NDGNH - DO JM=0,G%NMEN(JGL) - IDGLU(JM,JGL) = 1 - ENDDO - ENDDO - DO JM=0,R%NSMAX - DO JGL=1,R%NDGNH - G%NDGLU(JM) = G%NDGLU(JM)+IDGLU(JM,JGL) - ENDDO - ENDDO - IF(LLP1) THEN - WRITE(NOUT,FMT='('' (JM,G%NDGLU) '')') - WRITE(NOUT,FMT='(10(1X,''('',I4,I4,'')''))')& - &(JM,G%NDGLU(JM),JM=0,R%NSMAX) - ENDIF - -ENDIF - -! ------------------------------------------------------------------ -9 FORMAT(1X,'ARRAY ',A10,' ALLOCATED ',8I8) - -END SUBROUTINE SETUP_GEOM -END MODULE SETUP_GEOM_MOD diff --git a/src/trans/sedrenames.txt b/src/trans/sedrenames.txt index c33ad4ec4..813bbc674 100644 --- a/src/trans/sedrenames.txt +++ b/src/trans/sedrenames.txt @@ -96,7 +96,6 @@ s/seefmm_mix/seefmm_mix_VARIANTDESIGNATOR/g s/SEEFMM_MIX/SEEFMM_MIX_VARIANTDESIGNATOR/g s/SET_RESOL_MOD/SET_RESOL_MOD_VARIANTDESIGNATOR/g s/SETUP_DIMS_MOD/SETUP_DIMS_MOD_VARIANTDESIGNATOR/g -s/SETUP_GEOM_MOD/SETUP_GEOM_MOD_VARIANTDESIGNATOR/g s/SETUP_TRANS( *($|\(| |\*))/SETUP_TRANS_VARIANTDESIGNATOR\1/g s/setup_trans( *($|\(| |\*|\.h))/setup_trans_VARIANTDESIGNATOR\1/g s/specnorm/specnorm_VARIANTDESIGNATOR/g From d9ccd88279008a2de78e9b1310dacf0a79d5b96b Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Thu, 12 Sep 2024 12:44:38 +0000 Subject: [PATCH 66/86] Move setup_dims_mod.F90 to common --- src/trans/common/CMakeLists.txt | 1 + .../internal/setup_dims_mod.F90 | 11 ++-- src/trans/cpu/external/setup_trans.F90 | 7 +-- src/trans/cpu/internal/setup_dims_mod.F90 | 50 ------------------- src/trans/gpu/external/setup_trans.F90 | 6 +-- src/trans/sedrenames.txt | 1 - 6 files changed, 14 insertions(+), 62 deletions(-) rename src/trans/{gpu => common}/internal/setup_dims_mod.F90 (90%) mode change 100755 => 100644 delete mode 100644 src/trans/cpu/internal/setup_dims_mod.F90 diff --git a/src/trans/common/CMakeLists.txt b/src/trans/common/CMakeLists.txt index 362937d0d..7dd28c41d 100644 --- a/src/trans/common/CMakeLists.txt +++ b/src/trans/common/CMakeLists.txt @@ -34,6 +34,7 @@ list( APPEND ectrans_common_src internal/set2pe_mod.F90 internal/eq_regions_mod.F90 internal/pre_suleg_mod.F90 + internal/setup_dims_mod.F90 internal/setup_geom_mod.F90 internal/shuffle_mod.F90 internal/sump_trans0_mod.F90 diff --git a/src/trans/gpu/internal/setup_dims_mod.F90 b/src/trans/common/internal/setup_dims_mod.F90 old mode 100755 new mode 100644 similarity index 90% rename from src/trans/gpu/internal/setup_dims_mod.F90 rename to src/trans/common/internal/setup_dims_mod.F90 index db6e47bfb..b672d84d4 --- a/src/trans/gpu/internal/setup_dims_mod.F90 +++ b/src/trans/common/internal/setup_dims_mod.F90 @@ -11,11 +11,13 @@ MODULE SETUP_DIMS_MOD CONTAINS SUBROUTINE SETUP_DIMS +! EXPECTED TO BE SET ALREADY: +! - NSMAX +! - NTMAX +! - NDGL -USE PARKIND1, ONLY: JPIM -USE TPM_DIM, ONLY: R -USE TPM_FLT, ONLY: S -! +USE EC_PARKIND ,ONLY : JPIM +USE TPM_DIM ,ONLY : R IMPLICIT NONE @@ -38,7 +40,6 @@ SUBROUTINE SETUP_DIMS R%NLEI1 = R%NSMAX+4+MOD(R%NSMAX+4+1,2) R%NLEI3 = R%NDGNH+MOD(R%NDGNH+2,2) -IF (S%LSOUTHPNM) R%NLEI3=2*R%NLEI3 R%NLED3 = R%NTMAX+2+MOD(R%NTMAX+3,2) R%NLED4 = R%NTMAX+3+MOD(R%NTMAX+4,2) diff --git a/src/trans/cpu/external/setup_trans.F90 b/src/trans/cpu/external/setup_trans.F90 index bd394ddd1..cbe7df786 100644 --- a/src/trans/cpu/external/setup_trans.F90 +++ b/src/trans/cpu/external/setup_trans.F90 @@ -337,11 +337,15 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& WRITE(NOUT,*) 'FFTW is now mandatory so this option is deprecated' ENDIF +! Setup distribution independent dimensions +CALL SETUP_DIMS + S%LSOUTHPNM=.FALSE. IF(PRESENT(PSTRET)) THEN IF (ABS(PSTRET-1.0_JPRD)>100._JPRD*EPSILON(1._JPRD)) THEN G%RSTRET=PSTRET S%LSOUTHPNM=.TRUE. + R%NLEI3=2*R%NLEI3 ! double ENDIF ENDIF @@ -388,9 +392,6 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& ! Setup resolution dependent structures ! ------------------------------------- -! Setup distribution independent dimensions -CALL SETUP_DIMS - ! First part of setup of distributed environment CALL SUMP_TRANS_PRELEG diff --git a/src/trans/cpu/internal/setup_dims_mod.F90 b/src/trans/cpu/internal/setup_dims_mod.F90 deleted file mode 100644 index 97449f9be..000000000 --- a/src/trans/cpu/internal/setup_dims_mod.F90 +++ /dev/null @@ -1,50 +0,0 @@ -! (C) Copyright 2000- ECMWF. -! (C) Copyright 2000- Meteo-France. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. -! - -MODULE SETUP_DIMS_MOD -CONTAINS -SUBROUTINE SETUP_DIMS - -USE PARKIND1 ,ONLY : JPIM - -USE TPM_DIM ,ONLY : R -USE TPM_FLT ,ONLY : S -! - -IMPLICIT NONE - -INTEGER(KIND=JPIM) :: JM,JN,ISPOLEG - -! ------------------------------------------------------------------ - -ISPOLEG = 0 -DO JM=0,R%NSMAX - DO JN=JM,R%NTMAX+1 - ISPOLEG = ISPOLEG+1 - ENDDO -ENDDO -R%NSPOLEG = ISPOLEG - -R%NSPEC_G = (R%NSMAX+1)*(R%NSMAX+2)/2 -R%NSPEC2_G = R%NSPEC_G*2 - -R%NDGNH = (R%NDGL+1)/2 - -R%NLEI1 = R%NSMAX+4+MOD(R%NSMAX+4+1,2) -R%NLEI3 = R%NDGNH+MOD(R%NDGNH+2,2) -IF (S%LSOUTHPNM) R%NLEI3=2*R%NLEI3 - -R%NLED3 = R%NTMAX+2+MOD(R%NTMAX+3,2) -R%NLED4 = R%NTMAX+3+MOD(R%NTMAX+4,2) - -! ------------------------------------------------------------------ - -END SUBROUTINE SETUP_DIMS -END MODULE SETUP_DIMS_MOD diff --git a/src/trans/gpu/external/setup_trans.F90 b/src/trans/gpu/external/setup_trans.F90 index f474ccba2..85c6bf7ab 100755 --- a/src/trans/gpu/external/setup_trans.F90 +++ b/src/trans/gpu/external/setup_trans.F90 @@ -374,12 +374,15 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& D%LCPNMONLY=LDPNMONLY ENDIF +! Setup distribution independent dimensions +CALL SETUP_DIMS S%LSOUTHPNM=.FALSE. IF(PRESENT(PSTRET)) THEN IF (ABS(PSTRET-1.0_JPRBT)>100._JPRBT*EPSILON(1._JPRBT)) THEN G%RSTRET=PSTRET S%LSOUTHPNM=.TRUE. + R%NLEI3=2*R%NLEI3 ! double ENDIF ENDIF @@ -422,9 +425,6 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& ! Setup resolution dependent structures ! ------------------------------------- -! Setup distribution independent dimensions -CALL SETUP_DIMS - ! First part of setup of distributed environment CALL SUMP_TRANS_PRELEG diff --git a/src/trans/sedrenames.txt b/src/trans/sedrenames.txt index 813bbc674..49cb51484 100644 --- a/src/trans/sedrenames.txt +++ b/src/trans/sedrenames.txt @@ -95,7 +95,6 @@ s/READ_LEGPOL_MOD/READ_LEGPOL_MOD_VARIANTDESIGNATOR/g s/seefmm_mix/seefmm_mix_VARIANTDESIGNATOR/g s/SEEFMM_MIX/SEEFMM_MIX_VARIANTDESIGNATOR/g s/SET_RESOL_MOD/SET_RESOL_MOD_VARIANTDESIGNATOR/g -s/SETUP_DIMS_MOD/SETUP_DIMS_MOD_VARIANTDESIGNATOR/g s/SETUP_TRANS( *($|\(| |\*))/SETUP_TRANS_VARIANTDESIGNATOR\1/g s/setup_trans( *($|\(| |\*|\.h))/setup_trans_VARIANTDESIGNATOR\1/g s/specnorm/specnorm_VARIANTDESIGNATOR/g From a7237a08880e425789ccaad1eb1133388ea99749 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Fri, 13 Sep 2024 07:33:17 +0000 Subject: [PATCH 67/86] Remove spurious print --- src/trans/gpu/internal/ledir_mod.F90 | 5 ----- src/trans/gpu/internal/leinv_mod.F90 | 4 ---- 2 files changed, 9 deletions(-) diff --git a/src/trans/gpu/internal/ledir_mod.F90 b/src/trans/gpu/internal/ledir_mod.F90 index 7655ab632..7f305f90c 100755 --- a/src/trans/gpu/internal/ledir_mod.F90 +++ b/src/trans/gpu/internal/ledir_mod.F90 @@ -154,11 +154,6 @@ SUBROUTINE LEDIR(ALLOCATOR,ZINPS,ZINPA,ZINPS0,ZINPA0,ZOUT,ZOUT0,POA1,KF_FS) !$ACC& PRESENT(ZAA,ZAS,POA1,D_OFFSETS_GEMM1,D_OFFSETS_GEMM2) #endif - ! anti-symmetric - IF(KMLOC0 > 0) THEN - WRITE(NOUT,*) 'computing m=0 in double precision' - ENDIF - IF (LSYNC_TRANS) THEN #ifdef ACCGPU !$ACC WAIT(1) diff --git a/src/trans/gpu/internal/leinv_mod.F90 b/src/trans/gpu/internal/leinv_mod.F90 index 5f62d0a7a..590d6c163 100755 --- a/src/trans/gpu/internal/leinv_mod.F90 +++ b/src/trans/gpu/internal/leinv_mod.F90 @@ -152,10 +152,6 @@ SUBROUTINE LEINV(ALLOCATOR,PIA,ZINP,ZINP0,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,KF_LEG) !$ACC& PRESENT(R_NSMAX,G_NDGLU,D_OFFSETS_GEMM2) #endif - IF (KMLOC0 > 0) THEN - WRITE(NOUT,*) 'computing m=0 in double precision' - ENDIF - ! READ 2:NSMAX+3 !IF KM=0 and NSMAX is 6: From 68e6ab92b277fe55ace6f087a980319a417cf2bb Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Fri, 13 Sep 2024 08:05:07 +0000 Subject: [PATCH 68/86] Add --niter-warmup command line option to be able to configure warmup iterations --- src/programs/ectrans-benchmark.F90 | 58 ++++++++++++++++-------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/src/programs/ectrans-benchmark.F90 b/src/programs/ectrans-benchmark.F90 index 7701fd1f5..49b6a7e80 100644 --- a/src/programs/ectrans-benchmark.F90 +++ b/src/programs/ectrans-benchmark.F90 @@ -51,9 +51,6 @@ program ectrans_benchmark ! Number of points in top/bottom latitudes integer(kind=jpim), parameter :: min_octa_points = 20 -! Number of warm up steps (for which timing statistics should be ignored) -integer(kind=jpim), parameter :: n_warm_up = 2 - integer(kind=jpim) :: istack, getstackusage real(kind=jprd), dimension(1) :: zmaxerr(5), zerr(5) real(kind=jprd) :: zmaxerrg @@ -68,6 +65,7 @@ program ectrans_benchmark integer(kind=jpim) :: iters = 10 ! Number of iterations for transform test integer(kind=jpim) :: nfld = 1 ! Number of scalar fields integer(kind=jpim) :: nlev = 1 ! Number of vertical levels +integer(kind=jpim) :: iters_warmup = 2 ! Number of warm up steps (for which timing statistics should be ignored) integer(kind=jpim) :: nflevg integer(kind=jpim) :: ndgl ! Number of latitudes @@ -223,7 +221,7 @@ program ectrans_benchmark luse_mpi = detect_mpirun() ! Setup -call get_command_line_arguments(nsmax, cgrid, iters, nfld, nlev, lvordiv, lscders, luvders, & +call get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, nlev, lvordiv, lscders, luvders, & & luseflt, nopt_mem_tr, nproma, verbosity, ldump_values, lprint_norms, lmeminfo, nprtrv, nprtrw, ncheck) if (cgrid == '') cgrid = cubic_octahedral_gaussian_grid(nsmax) call parse_grid(cgrid, ndgl, nloen) @@ -571,16 +569,16 @@ program ectrans_benchmark if (verbosity >= 0 .and. myproc == 1) then write(nout,'(" ")') - write(nout,'(a,i6,a,f9.2,a)') "ectrans_benchmark initialisation, on",nproc,& + write(nout,'(a,i0,a,f9.2,a)') "ectrans_benchmark initialisation, on ",nproc,& & " tasks, took",ztinit," sec" write(nout,'(" ")') endif if (iters <= 0) call abor1('ectrans_benchmark:iters <= 0') -allocate(ztstep(iters+n_warm_up)) -allocate(ztstep1(iters+n_warm_up)) -allocate(ztstep2(iters+n_warm_up)) +allocate(ztstep(iters+iters_warmup)) +allocate(ztstep1(iters+iters_warmup)) +allocate(ztstep2(iters+iters_warmup)) if (verbosity >= 1 .and. myproc == 1) then write(nout,'(a)') '======= Start of spectral transforms =======' @@ -595,12 +593,12 @@ program ectrans_benchmark gstats_lstats = .false. -write(nout,'(a,i5,a,i5,a)') 'Running for ', iters, ' iterations with', n_warm_up, & - & 'extra warm-up iterations' +write(nout,'(a,i0,a,i0,a)') 'Running for ', iters, ' iterations with ', iters_warmup, & + & ' extra warm-up iterations' write(nout,'(" ")') -do jstep = 1, iters+n_warm_up - if (jstep == n_warm_up + 1) then +do jstep = 1, iters+iters_warmup + if (jstep == iters_warmup + 1) then gstats_lstats = .true. ztloop = timef() endif @@ -706,8 +704,6 @@ program ectrans_benchmark if (myproc == 1) then zmaxerr(:) = -999.0 do ifld = 1, 1 - write(nout,*) "znormsp", znormsp - flush(nout) zerr(1) = abs(znormsp1(ifld)/znormsp(ifld) - 1.0_jprb) zmaxerr(1) = max(zmaxerr(1), zerr(1)) enddo @@ -843,15 +839,15 @@ program ectrans_benchmark ! Calculate timings !=================================================================================================== -ztstepavg = sum(ztstep(n_warm_up+1:)) -ztstepmin = minval(ztstep(n_warm_up+1:)) -ztstepmax = maxval(ztstep(n_warm_up+1:)) -ztstepavg1 = sum(ztstep1(n_warm_up+1:)) -ztstepmin1 = minval(ztstep1(n_warm_up+1:)) -ztstepmax1 = maxval(ztstep1(n_warm_up+1:)) -ztstepavg2 = sum(ztstep2(n_warm_up+1:)) -ztstepmin2 = minval(ztstep2(n_warm_up+1:)) -ztstepmax2 = maxval(ztstep2(n_warm_up+1:)) +ztstepavg = sum(ztstep(iters_warmup+1:)) +ztstepmin = minval(ztstep(iters_warmup+1:)) +ztstepmax = maxval(ztstep(iters_warmup+1:)) +ztstepavg1 = sum(ztstep1(iters_warmup+1:)) +ztstepmin1 = minval(ztstep1(iters_warmup+1:)) +ztstepmax1 = maxval(ztstep1(iters_warmup+1:)) +ztstepavg2 = sum(ztstep2(iters_warmup+1:)) +ztstepmin2 = minval(ztstep2(iters_warmup+1:)) +ztstepmax2 = maxval(ztstep2(iters_warmup+1:)) if (luse_mpi) then call mpl_allreduce(ztloop, 'sum', ldreprod=.false.) @@ -874,15 +870,15 @@ program ectrans_benchmark ztstepavg = (ztstepavg/real(nproc,jprb))/real(iters,jprd) ztloop = ztloop/real(nproc,jprd) ztstep(:) = ztstep(:)/real(nproc,jprd) -ztstepmed = get_median(ztstep(n_warm_up+1:)) +ztstepmed = get_median(ztstep(iters_warmup+1:)) ztstepavg1 = (ztstepavg1/real(nproc,jprb))/real(iters,jprd) ztstep1(:) = ztstep1(:)/real(nproc,jprd) -ztstepmed1 = get_median(ztstep1(n_warm_up+1:)) +ztstepmed1 = get_median(ztstep1(iters_warmup+1:)) ztstepavg2 = (ztstepavg2/real(nproc,jprb))/real(iters,jprd) ztstep2(:) = ztstep2(:)/real(nproc,jprd) -ztstepmed2 = get_median(ztstep2(n_warm_up+1:)) +ztstepmed2 = get_median(ztstep2(iters_warmup+1:)) write(nout,'(a)') '======= Start of time step stats =======' write(nout,'(" ")') @@ -1056,13 +1052,14 @@ subroutine parsing_failed(message) !=================================================================================================== -subroutine get_command_line_arguments(nsmax, cgrid, iters, nfld, nlev, lvordiv, lscders, luvders, & +subroutine get_command_line_arguments(nsmax, cgrid, iters, iters_warmup, nfld, nlev, lvordiv, lscders, luvders, & & luseflt, nopt_mem_tr, nproma, verbosity, ldump_values, lprint_norms, & & lmeminfo, nprtrv, nprtrw, ncheck) integer, intent(inout) :: nsmax ! Spectral truncation character(len=16), intent(inout) :: cgrid ! Spectral truncation integer, intent(inout) :: iters ! Number of iterations for transform test + integer, intent(inout) :: iters_warmup ! Number of iterations for transform test integer, intent(inout) :: nfld ! Number of scalar fields integer, intent(inout) :: nlev ! Number of vertical levels logical, intent(inout) :: lvordiv ! Also transform vorticity/divergence @@ -1107,6 +1104,11 @@ subroutine get_command_line_arguments(nsmax, cgrid, iters, nfld, nlev, lvordiv, if (iters < 1) then call parsing_failed("Invalid argument for -n: must be > 0") end if + case('--niter-warmup') + iters_warmup = get_int_value('--niter-warmup', iarg) + if (iters_warmup < 0) then + call parsing_failed("Invalid argument for --niter-warmup: must be >= 0") + end if ! Parse spectral truncation argument case('-t', '--truncation') nsmax = get_int_value('-t', iarg) @@ -1245,6 +1247,8 @@ subroutine print_help(unit) & (cubic relation)" write(nout, "(a)") " -n, --niter NITER Run for this many inverse/direct transform& & iterations (default = 10)" + write(nout, "(a)") " --niter-warmup Number of warm up iterations,& + & for which timing statistics should be ignored (default = 2)" write(nout, "(a)") " -f, --nfld NFLD Number of scalar fields (default = 1)" write(nout, "(a)") " -l, --nlev NLEV Number of vertical levels (default = 1)" write(nout, "(a)") " --vordiv Also transform vorticity-divergence to wind" From ba7e9f36a73f8cef6427866bb79c08573c6f1649 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Fri, 13 Sep 2024 08:06:25 +0000 Subject: [PATCH 69/86] Make default for --niter-warmup 3 if not specified, experimentally a better value --- src/programs/ectrans-benchmark.F90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/programs/ectrans-benchmark.F90 b/src/programs/ectrans-benchmark.F90 index 49b6a7e80..aaff0f448 100644 --- a/src/programs/ectrans-benchmark.F90 +++ b/src/programs/ectrans-benchmark.F90 @@ -65,7 +65,7 @@ program ectrans_benchmark integer(kind=jpim) :: iters = 10 ! Number of iterations for transform test integer(kind=jpim) :: nfld = 1 ! Number of scalar fields integer(kind=jpim) :: nlev = 1 ! Number of vertical levels -integer(kind=jpim) :: iters_warmup = 2 ! Number of warm up steps (for which timing statistics should be ignored) +integer(kind=jpim) :: iters_warmup = 3 ! Number of warm up steps (for which timing statistics should be ignored) integer(kind=jpim) :: nflevg integer(kind=jpim) :: ndgl ! Number of latitudes @@ -1248,7 +1248,7 @@ subroutine print_help(unit) write(nout, "(a)") " -n, --niter NITER Run for this many inverse/direct transform& & iterations (default = 10)" write(nout, "(a)") " --niter-warmup Number of warm up iterations,& - & for which timing statistics should be ignored (default = 2)" + & for which timing statistics should be ignored (default = 3)" write(nout, "(a)") " -f, --nfld NFLD Number of scalar fields (default = 1)" write(nout, "(a)") " -l, --nlev NLEV Number of vertical levels (default = 1)" write(nout, "(a)") " --vordiv Also transform vorticity-divergence to wind" From 17e852df187711f7bb40ac427841834ad4469c08 Mon Sep 17 00:00:00 2001 From: Lukas Mosimann Date: Mon, 16 Sep 2024 04:56:23 -0700 Subject: [PATCH 70/86] add ungrouepd gemms --- CMakeLists.txt | 5 + README.md | 1 + src/trans/gpu/CMakeLists.txt | 1 + src/trans/gpu/algor/hicfft.hip.cpp | 249 ++++++++++++++----------- src/trans/gpu/external/setup_trans.F90 | 3 + 5 files changed, 151 insertions(+), 108 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a8652716a..9a8ff71aa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -140,6 +140,11 @@ ecbuild_add_option( FEATURE GPU_GRAPHS_GEMM CONDITION HAVE_GPU DESCRIPTION "Enable graph-based optimisation of Legendre transform GEMM kernel" ) + ecbuild_add_option( FEATURE GPU_GRAPHS_FFT + DEFAULT ON + CONDITION HAVE_GPU + DESCRIPTION "Enable graph-based optimisation of Legendre transform FFT kernel" ) + if( BUILD_SHARED_LIBS ) set( GPU_STATIC_DEFAULT OFF ) else() diff --git a/README.md b/README.md index 90000c1c0..82cf05f87 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,7 @@ Extra options can be added to the `cmake` command to control the build: Specific extra options exist for GPU installation: - `-DENABLE_GPU_AWARE_MPI=` default=OF - `-DENABLE_GPU_GRAPHS_GEMM=` default=ON + - `-DENABLE_GPU_GRAPHS_FFT=` default=ON - `-DENABLE_CUTLASS=` default=OFF - `-DENABLE_3XTF32=` default=OFF diff --git a/src/trans/gpu/CMakeLists.txt b/src/trans/gpu/CMakeLists.txt index 37f21db0d..6eae5bf8c 100644 --- a/src/trans/gpu/CMakeLists.txt +++ b/src/trans/gpu/CMakeLists.txt @@ -61,6 +61,7 @@ ecbuild_add_library( $<${HAVE_CUTLASS}:USE_CUTLASS> $<${HAVE_CUTLASS_3XTF32}:USE_CUTLASS_3XTF32> $<${HAVE_GPU_GRAPHS_GEMM}:USE_GRAPHS_GEMM> + $<${HAVE_GPU_GRAPHS_FFT}:USE_GRAPHS_FFT> ) ectrans_target_fortran_module_directory( diff --git a/src/trans/gpu/algor/hicfft.hip.cpp b/src/trans/gpu/algor/hicfft.hip.cpp index b7a8581a8..22931c464 100644 --- a/src/trans/gpu/algor/hicfft.hip.cpp +++ b/src/trans/gpu/algor/hicfft.hip.cpp @@ -34,9 +34,38 @@ struct Float { using cmplx = hipfftComplex; }; +template class hicfft_plan { + using real = typename Type::real; + using cmplx = typename Type::cmplx; + +public: + void exec(real *data_real, cmplx *data_complex) const { + real *data_real_l = &data_real[offset]; + cmplx *data_complex_l = &data_complex[offset / 2]; + if constexpr (Direction == HIPFFT_R2C) + fftSafeCall(hipfftExecR2C(handle, data_real_l, data_complex_l)); + else if constexpr (Direction == HIPFFT_C2R) + fftSafeCall(hipfftExecC2R(handle, data_complex_l, data_real_l)); + else if constexpr (Direction == HIPFFT_D2Z) + fftSafeCall(hipfftExecD2Z(handle, data_real_l, data_complex_l)); + else if constexpr (Direction == HIPFFT_Z2D) + fftSafeCall(hipfftExecZ2D(handle, data_complex_l, data_real_l)); + } + void set_stream(cudaStream_t stream) { + fftSafeCall(hipfftSetStream(handle, stream)); + } + hicfft_plan(hipfftHandle handle_, int offset_) + : handle(handle_), offset(offset_) {} + +private: + hipfftHandle handle; + int offset; +}; + // kfield -> handles template auto &get_fft_plan_cache() { - static std::unordered_map> fftPlansCache; + static std::unordered_map>> + fftPlansCache; return fftPlansCache; } // kfield -> graphs @@ -58,137 +87,141 @@ void free_fft_cache(float *, size_t) { get_ptr_cache().clear(); } - +template +std::vector> plan_all(int kfield, int *loens, + int nfft, int *offsets) { + static constexpr bool is_forward = + Direction == HIPFFT_R2C || Direction == HIPFFT_D2Z; + + auto key = kfield; + auto &fftPlansCache = get_fft_plan_cache(); + auto fftPlans = fftPlansCache.find(key); + if (fftPlans == fftPlansCache.end()) { + // the fft plans do not exist yet + std::vector> newPlans; + newPlans.reserve(nfft); + for (int i = 0; i < nfft; ++i) { + int nloen = loens[i]; + + hipfftHandle plan; + fftSafeCall(hipfftCreate(&plan)); + int dist = offsets[i + 1] - offsets[i]; + int embed[] = {1}; + fftSafeCall(hipfftPlanMany( + &plan, 1, &nloen, embed, 1, is_forward ? dist : dist / 2, embed, 1, + is_forward ? dist / 2 : dist, Direction, kfield)); + newPlans.emplace_back(plan, kfield * offsets[i]); + } + fftPlansCache.insert({key, newPlans}); + } + return fftPlansCache.find(key)->second; +} template -void execute_fft_new(typename Type::real *data_real, typename Type::cmplx *data_complex, - int kfield, int *loens, int *offsets, int nfft, void *growing_allocator) { +void run_group_graph(typename Type::real *data_real, + typename Type::cmplx *data_complex, int kfield, int *loens, + int *offsets, int nfft, void *growing_allocator) { growing_allocator_register_free_c(growing_allocator, free_fft_cache); - constexpr bool is_forward = Direction == HIPFFT_R2C || Direction == HIPFFT_D2Z; - using real = typename Type::real; - using cmplx = typename Type::cmplx; - - /* static std::unordered_map allocationCache; // nloens -> ptr */ -//* static std::unordered_map> fftPlansCache; // kfield -> handles -//* static std::unordered_map graphCache; // kfield -> graphs - // if the pointers are changed, we need to update the graph -//* static std::unordered_map> ptrCache; // kfield -> ptrs auto &ptrCache = get_ptr_cache(); // kfield -> ptrs auto &graphCache = get_graph_cache(); // kfield -> graphs - auto ptrs = ptrCache.find(kfield); - if (ptrs != ptrCache.end() && ( - ptrs->second.first != data_real || ptrs->second.second != data_complex)) { - // the plan is cached, but the pointers are not correct. we remove and delete the graph, - // but we keep the FFT plans, if this happens more often, we should cache this... - std::cout << "WARNING FFT: POINTER CHANGE --> THIS MIGHT BE SLOW" - << std::endl; - HIC_CHECK(hipGraphExecDestroy(graphCache[kfield])); - graphCache.erase(kfield); - ptrCache.erase(kfield); + auto key = kfield; + auto ptrs = ptrCache.find(key); + if (ptrs != ptrCache.end() && (ptrs->second.first != data_real || + ptrs->second.second != data_complex)) { + // the plan is cached, but the pointers are not correct. we remove and + // delete the graph, but we keep the FFT plans, if this happens more often, + // we should cache this... + std::cout << "WARNING FFT: POINTER CHANGE --> THIS MIGHT BE SLOW" + << std::endl; + HIC_CHECK(hipGraphExecDestroy(graphCache[key])); + graphCache.erase(key); + ptrCache.erase(key); } -//* auto &fftPlansCache = -//* get_fft_plan_cache(); // kfield -> handles - auto graph = graphCache.find(kfield); + auto graph = graphCache.find(key); if (graph == graphCache.end()) { - // this graph does not exist yet - - auto &fftPlansCache = - get_fft_plan_cache(); // kfield -> handles - auto fftPlans = fftPlansCache.find(kfield); - if (fftPlans == fftPlansCache.end()) { - // the fft plans do not exist yet - std::vector newPlans; - newPlans.resize(nfft); - for (int i = 0; i < nfft; ++i) { - int nloen = loens[i]; - - hipfftHandle plan; - fftSafeCall(hipfftCreate(&plan)); - int dist = offsets[i+1] - offsets[i]; - int embed[] = {1}; - //fftSafeCall(hipfftPlanMany(&plan, 1, &nloen, embed, 1, dist, embed, - // 1, dist / 2, Direction, kfield)); - fftSafeCall(hipfftPlanMany(&plan, 1, &nloen, embed, 1, is_forward ? dist : dist / 2, embed, - 1, is_forward ? dist / 2 : dist, Direction, kfield)); - newPlans[i] = plan; - } - fftPlansCache.insert({kfield, newPlans}); - } - fftPlans = fftPlansCache.find(kfield); - - // create a temporary stream - hipStream_t stream; - HIC_CHECK(hipStreamCreate(&stream)); - - for (auto &plan : fftPlans->second) // set the streams - fftSafeCall(hipfftSetStream(plan, stream)); - - // now create the graph - hipGraph_t new_graph; - hipGraphCreate(&new_graph, 0); - for (int i = 0; i < nfft; ++i) { - int offset = offsets[i]; - real *data_real_l = &data_real[kfield * offset]; - cmplx *data_complex_l = &data_complex[kfield * offset / 2]; - HIC_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal)); - if constexpr(Direction == HIPFFT_R2C) - fftSafeCall(hipfftExecR2C(fftPlans->second[i], data_real_l, data_complex_l)); - else if constexpr(Direction == HIPFFT_C2R) - fftSafeCall(hipfftExecC2R(fftPlans->second[i], data_complex_l, data_real_l)); - else if constexpr(Direction == HIPFFT_D2Z) - fftSafeCall(hipfftExecD2Z(fftPlans->second[i], data_real_l, data_complex_l)); - else if constexpr(Direction == HIPFFT_Z2D) - fftSafeCall(hipfftExecZ2D(fftPlans->second[i], data_complex_l, data_real_l)); - hipGraph_t my_graph; - HIC_CHECK(hipStreamEndCapture(stream, &my_graph)); - hipGraphNode_t my_node; - HIC_CHECK(hipGraphAddChildGraphNode(&my_node, new_graph, nullptr, 0, my_graph)); - } - hipGraphExec_t instance; - HIC_CHECK(hipGraphInstantiate(&instance, new_graph, NULL, NULL, 0)); - HIC_CHECK(hipStreamDestroy(stream)); - HIC_CHECK(hipGraphDestroy(new_graph)); - - graphCache.insert({kfield, instance}); - ptrCache.insert({kfield, std::make_pair(data_real, data_complex)}); + // this graph does not exist yet + auto plans = plan_all(kfield, loens, nfft, offsets); + + // create a temporary stream + hipStream_t stream; + HIC_CHECK(hipStreamCreate(&stream)); + + for (auto &plan : plans) // set the streams + plan.set_stream(stream); + + // now create the graph + hipGraph_t new_graph; + hipGraphCreate(&new_graph, 0); + for (auto &plan : plans) { + HIC_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal)); + plan.exec(data_real, data_complex); + hipGraph_t my_graph; + HIC_CHECK(hipStreamEndCapture(stream, &my_graph)); + hipGraphNode_t my_node; + HIC_CHECK( + hipGraphAddChildGraphNode(&my_node, new_graph, nullptr, 0, my_graph)); + } + hipGraphExec_t instance; + HIC_CHECK(hipGraphInstantiate(&instance, new_graph, NULL, NULL, 0)); + HIC_CHECK(hipStreamDestroy(stream)); + HIC_CHECK(hipGraphDestroy(new_graph)); + + graphCache.insert({key, instance}); + ptrCache.insert({key, std::make_pair(data_real, data_complex)}); } - HIC_CHECK(hipGraphLaunch(graphCache.at(kfield), 0)); + HIC_CHECK(hipGraphLaunch(graphCache.at(key), 0)); HIC_CHECK(hipDeviceSynchronize()); } -} // namespace +template +void run_group(typename Type::real *data_real, + typename Type::cmplx *data_complex, int kfield, int *loens, + int *offsets, int nfft, void *growing_allocator) { + auto plans = plan_all(kfield, loens, nfft, offsets); + + for (auto &plan : plans) + plan.exec(data_real, data_complex); + HIC_CHECK(hipDeviceSynchronize()); +} +} // namespace extern "C" { +#ifdef USE_GRAPHS_FFT +#define RUN run_group_graph +#else +#define RUN run_group +#endif void execute_dir_fft_float(float *data_real, hipfftComplex *data_complex, - int kfield, int *loens, int *offsets, int nfft, - void *growing_allocator) { - execute_fft_new(data_real, data_complex, kfield, loens, offsets, - nfft, growing_allocator); + int kfield, int *loens, int *offsets, int nfft, + void *growing_allocator) { + RUN(data_real, data_complex, kfield, loens, offsets, nfft, + growing_allocator); } void execute_inv_fft_float(hipfftComplex *data_complex, float *data_real, - int kfield, int *loens, int *offsets, int nfft, - void *growing_allocator) { - execute_fft_new(data_real, data_complex, kfield, loens, offsets, - nfft, growing_allocator); + int kfield, int *loens, int *offsets, int nfft, + void *growing_allocator) { + RUN(data_real, data_complex, kfield, loens, offsets, nfft, + growing_allocator); } -void execute_dir_fft_double(double *data_real, hipfftDoubleComplex *data_complex, - int kfield, int *loens, int *offsets, int nfft, - void *growing_allocator) { - execute_fft_new(data_real, data_complex, kfield, loens, - offsets, nfft, growing_allocator); +void execute_dir_fft_double(double *data_real, + hipfftDoubleComplex *data_complex, int kfield, + int *loens, int *offsets, int nfft, + void *growing_allocator) { + RUN(data_real, data_complex, kfield, loens, offsets, nfft, + growing_allocator); } -void execute_inv_fft_double(hipfftDoubleComplex *data_complex, double *data_real, - int kfield, int *loens, int *offsets, int nfft, - void *growing_allocator) { - execute_fft_new(data_real, data_complex, kfield, loens, - offsets, nfft, growing_allocator); +void execute_inv_fft_double(hipfftDoubleComplex *data_complex, + double *data_real, int kfield, int *loens, + int *offsets, int nfft, void *growing_allocator) { + RUN(data_real, data_complex, kfield, loens, offsets, nfft, + growing_allocator); } +#undef RUN } - diff --git a/src/trans/gpu/external/setup_trans.F90 b/src/trans/gpu/external/setup_trans.F90 index bdb59be24..bc65a834a 100755 --- a/src/trans/gpu/external/setup_trans.F90 +++ b/src/trans/gpu/external/setup_trans.F90 @@ -211,6 +211,9 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,& #ifdef USE_GPU_AWARE_MPI WRITE(NOUT,'(A)') " - GPU-aware MPI" #endif +#ifdef USE_GRAPHS_FFT + WRITE(NOUT,'(A)') " - graph-based FFT scheduling" +#endif #ifdef USE_GRAPHS_GEMM WRITE(NOUT,'(A)') " - graph-based GEMM scheduling" #endif From ab0edc2ac825ddf0b52c1244f317fb64e2541be8 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Mon, 16 Sep 2024 13:08:03 +0000 Subject: [PATCH 71/86] Fixup diagnostic prints missing since #141 --- src/trans/gpu/CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/trans/gpu/CMakeLists.txt b/src/trans/gpu/CMakeLists.txt index 37f21db0d..626a98c20 100644 --- a/src/trans/gpu/CMakeLists.txt +++ b/src/trans/gpu/CMakeLists.txt @@ -147,8 +147,11 @@ foreach( prec dp sp ) $<${HAVE_OMP}:OpenMP::OpenMP_Fortran> $<${HAVE_MPI}:MPI::MPI_Fortran> PRIVATE_DEFINITIONS ${GPU_RUNTIME}GPU ${GPU_OFFLOAD}GPU - ECTRANS_HAVE_MPI=${ectrans_HAVE_MPI} + $<${HAVE_CUTLASS}:USE_CUTLASS> + $<${HAVE_CUTLASS_3XTF32}:USE_CUTLASS_3XTF32> + $<${HAVE_GPU_GRAPHS_GEMM}:USE_GRAPHS_GEMM> $<${HAVE_GPU_AWARE_MPI}:USE_GPU_AWARE_MPI> + ECTRANS_HAVE_MPI=${ectrans_HAVE_MPI} ) ectrans_target_fortran_module_directory( From 949d35bace856bdbd17a8655c8e6a86e9dab1c2f Mon Sep 17 00:00:00 2001 From: Lukas Mosimann Date: Mon, 16 Sep 2024 06:16:20 -0700 Subject: [PATCH 72/86] add gpu graphs define --- src/trans/gpu/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/trans/gpu/CMakeLists.txt b/src/trans/gpu/CMakeLists.txt index c93c506a2..da652f16f 100644 --- a/src/trans/gpu/CMakeLists.txt +++ b/src/trans/gpu/CMakeLists.txt @@ -151,6 +151,7 @@ foreach( prec dp sp ) $<${HAVE_CUTLASS}:USE_CUTLASS> $<${HAVE_CUTLASS_3XTF32}:USE_CUTLASS_3XTF32> $<${HAVE_GPU_GRAPHS_GEMM}:USE_GRAPHS_GEMM> + $<${HAVE_GPU_GRAPHS_FFT}:USE_GRAPHS_FFT> $<${HAVE_GPU_AWARE_MPI}:USE_GPU_AWARE_MPI> ECTRANS_HAVE_MPI=${ectrans_HAVE_MPI} ) From 587c535716f4f2f91bd2d26a642d8fac6546501c Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Mon, 16 Sep 2024 17:53:16 +0300 Subject: [PATCH 73/86] Update paths to HIP includes to support ROCm > 6 Co-authored-by: Paul Mullowney --- src/trans/gpu/algor/hicblas_hip.h | 2 +- src/trans/gpu/algor/hicfft_hip.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/trans/gpu/algor/hicblas_hip.h b/src/trans/gpu/algor/hicblas_hip.h index 1fa0d0cb2..a4c60b424 100644 --- a/src/trans/gpu/algor/hicblas_hip.h +++ b/src/trans/gpu/algor/hicblas_hip.h @@ -18,7 +18,7 @@ #pragma clang diagnostic push #pragma clang diagnostic ignored "-W#pragma-messages" #endif -#include "hipblas.h" +#include "hipblas/hipblas.h" #ifdef __clang__ #pragma clang diagnostic pop #endif diff --git a/src/trans/gpu/algor/hicfft_hip.h b/src/trans/gpu/algor/hicfft_hip.h index 3fc176eda..b780ba3e1 100644 --- a/src/trans/gpu/algor/hicfft_hip.h +++ b/src/trans/gpu/algor/hicfft_hip.h @@ -16,7 +16,7 @@ #pragma clang diagnostic ignored "-W#pragma-messages" #endif #include -#include "hipfft.h" +#include "hipfft/hipfft.h" #ifdef __clang__ #pragma clang diagnostic pop #endif From ede091b199b6f1bd1ff133fcb690e4e24619a8db Mon Sep 17 00:00:00 2001 From: lukasm91 Date: Mon, 16 Sep 2024 19:41:51 +0200 Subject: [PATCH 74/86] Update src/trans/gpu/algor/hicfft.hip.cpp Co-authored-by: Sam Hatfield --- src/trans/gpu/algor/hicfft.hip.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/trans/gpu/algor/hicfft.hip.cpp b/src/trans/gpu/algor/hicfft.hip.cpp index 22931c464..8278d9a4b 100644 --- a/src/trans/gpu/algor/hicfft.hip.cpp +++ b/src/trans/gpu/algor/hicfft.hip.cpp @@ -51,7 +51,7 @@ template class hicfft_plan { else if constexpr (Direction == HIPFFT_Z2D) fftSafeCall(hipfftExecZ2D(handle, data_complex_l, data_real_l)); } - void set_stream(cudaStream_t stream) { + void set_stream(hipStream_t stream) { fftSafeCall(hipfftSetStream(handle, stream)); } hicfft_plan(hipfftHandle handle_, int offset_) From 7c1746cf02931064509af3792f1262c50c0f1160 Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Thu, 19 Sep 2024 12:19:03 +0100 Subject: [PATCH 75/86] Fix typo in TRGTOL --- src/trans/gpu/internal/trgtol_mod.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/trans/gpu/internal/trgtol_mod.F90 b/src/trans/gpu/internal/trgtol_mod.F90 index e9375fb77..6ec388d71 100755 --- a/src/trans/gpu/internal/trgtol_mod.F90 +++ b/src/trans/gpu/internal/trgtol_mod.F90 @@ -582,7 +582,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, #endif #else !! this is safe-but-slow fallback for running without GPU-aware MPI - !$ACC UPDATE HOST(ZCOMBUFS) IF(ISEND_COUNT > 0) + !$ACC UPDATE HOST(ZCOMBUFS) IF(ISEND_COUNTS > 0) #endif ! Receive loop......................................................... DO INR=1,IRECV_COUNTS From 629848408b9ecba99269a019a52db6c742270d30 Mon Sep 17 00:00:00 2001 From: lukasm91 Date: Thu, 19 Sep 2024 14:42:41 +0200 Subject: [PATCH 76/86] Fix tabs Co-authored-by: Sam Hatfield --- src/trans/gpu/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/trans/gpu/CMakeLists.txt b/src/trans/gpu/CMakeLists.txt index da652f16f..a9b16ba8b 100644 --- a/src/trans/gpu/CMakeLists.txt +++ b/src/trans/gpu/CMakeLists.txt @@ -61,7 +61,7 @@ ecbuild_add_library( $<${HAVE_CUTLASS}:USE_CUTLASS> $<${HAVE_CUTLASS_3XTF32}:USE_CUTLASS_3XTF32> $<${HAVE_GPU_GRAPHS_GEMM}:USE_GRAPHS_GEMM> - $<${HAVE_GPU_GRAPHS_FFT}:USE_GRAPHS_FFT> + $<${HAVE_GPU_GRAPHS_FFT}:USE_GRAPHS_FFT> ) ectrans_target_fortran_module_directory( @@ -151,7 +151,7 @@ foreach( prec dp sp ) $<${HAVE_CUTLASS}:USE_CUTLASS> $<${HAVE_CUTLASS_3XTF32}:USE_CUTLASS_3XTF32> $<${HAVE_GPU_GRAPHS_GEMM}:USE_GRAPHS_GEMM> - $<${HAVE_GPU_GRAPHS_FFT}:USE_GRAPHS_FFT> + $<${HAVE_GPU_GRAPHS_FFT}:USE_GRAPHS_FFT> $<${HAVE_GPU_AWARE_MPI}:USE_GPU_AWARE_MPI> ECTRANS_HAVE_MPI=${ectrans_HAVE_MPI} ) From db24ae0d05b1676eca3ea76d20b5540d9a1eed11 Mon Sep 17 00:00:00 2001 From: lukasm91 Date: Fri, 20 Sep 2024 08:30:54 +0200 Subject: [PATCH 77/86] Update CMakeLists.txt --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9a8ff71aa..1da7151ab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -143,7 +143,7 @@ ecbuild_add_option( FEATURE GPU_GRAPHS_GEMM ecbuild_add_option( FEATURE GPU_GRAPHS_FFT DEFAULT ON CONDITION HAVE_GPU - DESCRIPTION "Enable graph-based optimisation of Legendre transform FFT kernel" ) + DESCRIPTION "Enable graph-based optimisation of FFT kernels" ) if( BUILD_SHARED_LIBS ) set( GPU_STATIC_DEFAULT OFF ) From 81bd1f4951e1557a687f1c416b355c5a36aa5982 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Tue, 24 Sep 2024 11:37:34 +0200 Subject: [PATCH 78/86] Use Fortran linker for ectrans_test_setup_trans0 --- tests/CMakeLists.txt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 6fa0e061e..9ba9cfe51 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -32,7 +32,7 @@ if( HAVE_TESTS ) endif() if( NOT HAVE_DOUBLE_PRECISION ) list( APPEND _test_args "-DCOMPONENTS=single" ) - endif() + endif() add_test( NAME ectrans_test_install COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test-install.sh ${_test_args} ) @@ -46,10 +46,11 @@ if( HAVE_TESTS ) endif() ecbuild_add_executable( - TARGET ectrans_test_setup_trans0 - SOURCES trans/test_setup_trans0.F90 - LIBS ectrans_common - NOINSTALL) + TARGET ectrans_test_setup_trans0 + SOURCES trans/test_setup_trans0.F90 + LIBS ectrans_common + LINKER_LANGUAGE Fortran + NOINSTALL) set( ntasks 0 ) if( HAVE_MPI ) list( APPEND ntasks 1 2 ) From c3db3f6ab32bc484a96199934515b604ab63ac26 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Tue, 24 Sep 2024 11:37:57 +0200 Subject: [PATCH 79/86] Search for OpenACC only if explicitly enabled --- CMakeLists.txt | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1da7151ab..e2c5b8e81 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,14 +34,16 @@ ecbuild_add_option( FEATURE OMP DESCRIPTION "Support for OpenMP shared memory parallelism" REQUIRED_PACKAGES "OpenMP COMPONENTS Fortran" ) -if( ${CMAKE_VERSION} VERSION_LESS "3.25" AND (NOT DEFINED ENABLE_ACC OR ENABLE_ACC ) ) - # See https://gitlab.kitware.com/cmake/cmake/-/issues/23691, fixed in CMake 3.25 - # (TL;DR: FindOpenACC sets OpenACC__FOUND correctly but does not set - # OpenACC_FOUND unless all three C, CXX, and Fortran have been found - even if - # only one language has been requested via COMPONENTS) - find_package( OpenACC COMPONENTS Fortran ) - if( OpenACC_Fortran_FOUND ) - set( OpenACC_FOUND ON ) +if( ${CMAKE_VERSION} VERSION_LESS "3.25" ) + if ( ECTRANS_ENABLE_ACC OR (NOT DEFINED ECTRANS_ENABLE_ACC AND ENABLE_ACC) ) + # See https://gitlab.kitware.com/cmake/cmake/-/issues/23691, fixed in CMake 3.25 + # (TL;DR: FindOpenACC sets OpenACC__FOUND correctly but does not set + # OpenACC_FOUND unless all three C, CXX, and Fortran have been found - even if + # only one language has been requested via COMPONENTS) + find_package( OpenACC COMPONENTS Fortran ) + if( OpenACC_Fortran_FOUND ) + set( OpenACC_FOUND ON ) + endif() endif() endif() From 697684d8680d9616afa46b631d8ded4f94cc4d98 Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Thu, 19 Sep 2024 12:27:40 +0300 Subject: [PATCH 80/86] Add option to not do in-place FFT This is currently disabled for cuFFT but enabled for hipFFT. In-place FFTs seem to be an issue for ROCm at the moment. This is a temporary workaround. --- src/trans/gpu/CMakeLists.txt | 7 ++++ src/trans/gpu/internal/dir_trans_ctl_mod.F90 | 2 +- src/trans/gpu/internal/ftdir_mod.F90 | 30 +++++++++++++--- src/trans/gpu/internal/ftinv_mod.F90 | 38 +++++++++++++++----- src/trans/gpu/internal/inv_trans_ctl_mod.F90 | 2 +- 5 files changed, 64 insertions(+), 15 deletions(-) diff --git a/src/trans/gpu/CMakeLists.txt b/src/trans/gpu/CMakeLists.txt index a9b16ba8b..3fcdfb9c5 100644 --- a/src/trans/gpu/CMakeLists.txt +++ b/src/trans/gpu/CMakeLists.txt @@ -27,6 +27,7 @@ if( HAVE_HIP ) algor/hicblas_gemm.hip.cpp algor/hicfft.hip.cpp ) + ecbuild_info("warn: IN_PLACE_FFT not defined for hipFFT") elseif( HAVE_CUDA ) set( GPU_RUNTIME "CUDA" ) set( ECTRANS_GPU_HIP_LIBRARIES CUDA::cufft CUDA::cublas nvhpcwrapnvtx CUDA::cudart ) @@ -34,6 +35,7 @@ elseif( HAVE_CUDA ) algor/hicblas_gemm.cuda.cu algor/hicfft.cuda.cu ) + ecbuild_info("warn: IN_PLACE_FFT defined for cuFFT") else() ecbuild_info("warn: HIP and CUDA not found") endif() @@ -166,6 +168,11 @@ foreach( prec dp sp ) target_compile_definitions( ectrans_gpu_${prec} PRIVATE TRANS_SINGLE PARKINDTRANS_SINGLE ) endif() + # cuFFT can do in-place FFT, hipFFT cannot + if( HAVE_CUDA ) + target_compile_definitions( ectrans_gpu_${prec} PRIVATE IN_PLACE_FFT ) + endif() + if( HAVE_OMP AND CMAKE_Fortran_COMPILER_ID MATCHES Cray ) # Propagate flags as link options for downstream targets. Only required for Cray target_link_options( ectrans_gpu_${prec} INTERFACE diff --git a/src/trans/gpu/internal/dir_trans_ctl_mod.F90 b/src/trans/gpu/internal/dir_trans_ctl_mod.F90 index af873d76d..954251074 100755 --- a/src/trans/gpu/internal/dir_trans_ctl_mod.F90 +++ b/src/trans/gpu/internal/dir_trans_ctl_mod.F90 @@ -146,7 +146,7 @@ SUBROUTINE DIR_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_UV,KF_SCALARS,& ALLOCATOR = MAKE_BUFFERED_ALLOCATOR() HTRGTOL = PREPARE_TRGTOL(ALLOCATOR,KF_GP,KF_FS) IF (KF_FS > 0) THEN - HFTDIR = PREPARE_FTDIR() + HFTDIR = PREPARE_FTDIR(ALLOCATOR,KF_FS) HTRLTOM_PACK = PREPARE_TRLTOM_PACK(ALLOCATOR, KF_FS) HTRLTOM = PREPARE_TRLTOM(ALLOCATOR, KF_FS) HTRLTOM_UNPACK = PREPARE_TRLTOM_UNPACK(ALLOCATOR, KF_FS) diff --git a/src/trans/gpu/internal/ftdir_mod.F90 b/src/trans/gpu/internal/ftdir_mod.F90 index 65e50138a..e8801ac9d 100755 --- a/src/trans/gpu/internal/ftdir_mod.F90 +++ b/src/trans/gpu/internal/ftdir_mod.F90 @@ -10,19 +10,35 @@ ! MODULE FTDIR_MOD + USE BUFFERED_ALLOCATOR_MOD ,ONLY : ALLOCATION_RESERVATION_HANDLE IMPLICIT NONE PRIVATE PUBLIC :: FTDIR, FTDIR_HANDLE, PREPARE_FTDIR TYPE FTDIR_HANDLE + TYPE(ALLOCATION_RESERVATION_HANDLE) :: HREEL_COMPLEX END TYPE CONTAINS - FUNCTION PREPARE_FTDIR() RESULT(HFTDIR) + FUNCTION PREPARE_FTDIR(ALLOCATOR,KF_FS) RESULT(HFTDIR) + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE TPM_DISTR, ONLY: D + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE + USE ISO_C_BINDING, ONLY: C_SIZE_T + IMPLICIT NONE + + TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR + INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS TYPE(FTDIR_HANDLE) :: HFTDIR - END FUNCTION + + REAL(KIND=JPRBT) :: DUMMY + +#ifndef IN_PLACE_FFT + HFTDIR%HREEL_COMPLEX = RESERVE(ALLOCATOR, INT(KF_FS*D%NLENGTF*SIZEOF(DUMMY), KIND=C_SIZE_T)) +#endif + END FUNCTION PREPARE_FTDIR SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD) !**** *FTDIR - Direct Fourier transform @@ -60,12 +76,13 @@ SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD) USE TPM_GEN, ONLY: LSYNC_TRANS USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT USE TPM_DISTR, ONLY: MYSETW, MYPROC, NPROC, D_NSTAGT0B, D_NSTAGTF,D_NPTRLS, & - & D_NPNTGTB0, D_NPROCM, D_NDGL_FS + & D_NPNTGTB0, D_NPROCM, D_NDGL_FS, D USE TPM_GEOMETRY, ONLY: G_NMEN, G_NLOEN - USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION USE TPM_HICFFT, ONLY: EXECUTE_DIR_FFT USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX + USE ISO_C_BINDING, ONLY: C_SIZE_T IMPLICIT NONE @@ -77,7 +94,12 @@ SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD) INTEGER(KIND=JPIM) :: KGL +#ifdef IN_PLACE_FFT PREEL_COMPLEX => PREEL_REAL +#else + CALL ASSIGN_PTR(PREEL_COMPLEX, GET_ALLOCATION(ALLOCATOR, HFTDIR%HREEL_COMPLEX),& + & 1_C_SIZE_T, int(KFIELD*D%NLENGTF*SIZEOF(PREEL_COMPLEX(1)),kind=c_size_t)) +#endif #ifdef ACCGPU !$ACC DATA PRESENT(PREEL_REAL, PREEL_COMPLEX, & diff --git a/src/trans/gpu/internal/ftinv_mod.F90 b/src/trans/gpu/internal/ftinv_mod.F90 index e939f42fc..bf59b384f 100755 --- a/src/trans/gpu/internal/ftinv_mod.F90 +++ b/src/trans/gpu/internal/ftinv_mod.F90 @@ -10,20 +10,33 @@ ! MODULE FTINV_MOD - USE BUFFERED_ALLOCATOR_MOD ,ONLY : BUFFERED_ALLOCATOR + USE BUFFERED_ALLOCATOR_MOD ,ONLY : BUFFERED_ALLOCATOR, ALLOCATION_RESERVATION_HANDLE IMPLICIT NONE PRIVATE PUBLIC :: FTINV, FTINV_HANDLE, PREPARE_FTINV TYPE FTINV_HANDLE + TYPE(ALLOCATION_RESERVATION_HANDLE) :: HREEL_REAL END TYPE CONTAINS - FUNCTION PREPARE_FTINV(ALLOCATOR) RESULT(HFTINV) + FUNCTION PREPARE_FTINV(ALLOCATOR,KF_FS) RESULT(HFTINV) + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE TPM_DISTR, ONLY: D + USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE + USE ISO_C_BINDING, ONLY: C_SIZE_T + IMPLICIT NONE TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR + INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS TYPE(FTINV_HANDLE) :: HFTINV + + REAL(KIND=JPRBT) :: DUMMY + +#ifndef IN_PLACE_FFT + HFTINV%HREEL_REAL = RESERVE(ALLOCATOR, int(D%NLENGTF*KF_FS*SIZEOF(DUMMY),kind=c_size_t)) +#endif END FUNCTION SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD) @@ -59,13 +72,15 @@ SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD) ! G. Mozdzynski (Jun 2015): Support alternative FFTs to FFTW ! ------------------------------------------------------------------ - USE TPM_GEN, ONLY: LSYNC_TRANS - USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT - USE TPM_DISTR, ONLY: MYSETW, D_NPTRLS, D_NDGL_FS, D_NSTAGTF - USE TPM_GEOMETRY, ONLY: G_NLOEN - USE TPM_HICFFT, ONLY: EXECUTE_INV_FFT - USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM - USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX + USE TPM_GEN, ONLY: LSYNC_TRANS + USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT + USE TPM_DISTR, ONLY: MYSETW, D_NPTRLS, D_NDGL_FS, D_NSTAGTF, D + USE TPM_GEOMETRY, ONLY: G_NLOEN + USE TPM_HICFFT, ONLY: EXECUTE_INV_FFT + USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM + USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX + USE BUFFERED_ALLOCATOR_MOD, ONLY: ASSIGN_PTR, GET_ALLOCATION + USE ISO_C_BINDING, ONLY: C_SIZE_T IMPLICIT NONE @@ -77,7 +92,12 @@ SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD) INTEGER(KIND=JPIM) :: KGL +#ifdef IN_PLACE_FFT PREEL_REAL => PREEL_COMPLEX +#else + CALL ASSIGN_PTR(PREEL_REAL, GET_ALLOCATION(ALLOCATOR, HFTINV%HREEL_REAL),& + & 1_C_SIZE_T, int(KFIELD*D%NLENGTF*SIZEOF(PREEL_REAL(1)),kind=c_size_t)) +#endif #ifdef OMPGPU #endif diff --git a/src/trans/gpu/internal/inv_trans_ctl_mod.F90 b/src/trans/gpu/internal/inv_trans_ctl_mod.F90 index c928feb70..804e47106 100644 --- a/src/trans/gpu/internal/inv_trans_ctl_mod.F90 +++ b/src/trans/gpu/internal/inv_trans_ctl_mod.F90 @@ -202,7 +202,7 @@ SUBROUTINE INV_TRANS_CTL(KF_UV_G,KF_SCALARS_G,KF_GP,KF_FS,KF_OUT_LT,& HTRMTOL = PREPARE_TRMTOL(ALLOCATOR,IF_LEG) HTRMTOL_UNPACK = PREPARE_TRMTOL_UNPACK(ALLOCATOR,IF_FOURIER) HFSC = PREPARE_FSC(ALLOCATOR) - HFTINV = PREPARE_FTINV(ALLOCATOR) + HFTINV = PREPARE_FTINV(ALLOCATOR,IF_FOURIER) ENDIF HTRLTOG = PREPARE_TRLTOG(ALLOCATOR,IF_FOURIER,KF_GP) From 89def359af096521f87d7cd4be956cd32bdc7a32 Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Fri, 20 Sep 2024 11:44:27 +0100 Subject: [PATCH 81/86] Fix case inconsistencies Co-authored-by: lukasm91 --- src/trans/gpu/internal/ftdir_mod.F90 | 2 +- src/trans/gpu/internal/ftinv_mod.F90 | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/trans/gpu/internal/ftdir_mod.F90 b/src/trans/gpu/internal/ftdir_mod.F90 index e8801ac9d..b9ffe4477 100755 --- a/src/trans/gpu/internal/ftdir_mod.F90 +++ b/src/trans/gpu/internal/ftdir_mod.F90 @@ -98,7 +98,7 @@ SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD) PREEL_COMPLEX => PREEL_REAL #else CALL ASSIGN_PTR(PREEL_COMPLEX, GET_ALLOCATION(ALLOCATOR, HFTDIR%HREEL_COMPLEX),& - & 1_C_SIZE_T, int(KFIELD*D%NLENGTF*SIZEOF(PREEL_COMPLEX(1)),kind=c_size_t)) + & 1_C_SIZE_T, INT(KFIELD*D%NLENGTF*SIZEOF(PREEL_COMPLEX(1)),KIND=C_SIZE_T)) #endif #ifdef ACCGPU diff --git a/src/trans/gpu/internal/ftinv_mod.F90 b/src/trans/gpu/internal/ftinv_mod.F90 index bf59b384f..49e43dd72 100755 --- a/src/trans/gpu/internal/ftinv_mod.F90 +++ b/src/trans/gpu/internal/ftinv_mod.F90 @@ -35,7 +35,7 @@ FUNCTION PREPARE_FTINV(ALLOCATOR,KF_FS) RESULT(HFTINV) REAL(KIND=JPRBT) :: DUMMY #ifndef IN_PLACE_FFT - HFTINV%HREEL_REAL = RESERVE(ALLOCATOR, int(D%NLENGTF*KF_FS*SIZEOF(DUMMY),kind=c_size_t)) + HFTINV%HREEL_REAL = RESERVE(ALLOCATOR, INT(D%NLENGTF*KF_FS*SIZEOF(DUMMY),KIND=C_SIZE_T)) #endif END FUNCTION @@ -96,7 +96,7 @@ SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD) PREEL_REAL => PREEL_COMPLEX #else CALL ASSIGN_PTR(PREEL_REAL, GET_ALLOCATION(ALLOCATOR, HFTINV%HREEL_REAL),& - & 1_C_SIZE_T, int(KFIELD*D%NLENGTF*SIZEOF(PREEL_REAL(1)),kind=c_size_t)) + & 1_C_SIZE_T, INT(KFIELD*D%NLENGTF*SIZEOF(PREEL_REAL(1)),KIND=C_SIZE_T)) #endif #ifdef OMPGPU From eb1302cd91d4b5d27461efc1d6ef39c58044600b Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Fri, 4 Oct 2024 11:11:56 +0100 Subject: [PATCH 82/86] Remove default values for TPM_GEN variables (#159) These gave the impression that TPM_GEN is the appropriate place to change the default values of these variables. This is not the case, because whatever value they take here is overwritten in SETUP_TRANS0. The latter is where default values should be set. --- src/trans/common/internal/tpm_gen.F90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/trans/common/internal/tpm_gen.F90 b/src/trans/common/internal/tpm_gen.F90 index 3ea42afc8..f153f4b5d 100644 --- a/src/trans/common/internal/tpm_gen.F90 +++ b/src/trans/common/internal/tpm_gen.F90 @@ -37,12 +37,12 @@ MODULE TPM_GEN ! 0 = Post IRECVs up-front, use ISENDs, use WAITANY to recv data (default) ! 1 = Use ISENDs, use blocking RECVs, add barrier at the end of each cycle ! 2 = Use buffered SENDs, use blocking RECVs, add barrier at the end of each cycle -INTEGER(KIND=JPIM) :: NTRANS_SYNC_LEVEL = 0 +INTEGER(KIND=JPIM) :: NTRANS_SYNC_LEVEL ! NSTACK_MEMORY_TR : optional memory strategy in gridpoint transpositions ! = 0 : prefer heap (slower but less memory consuming) ! > 0 : prefer stack (faster but more memory consuming) -INTEGER(KIND=JPIM) :: NSTACK_MEMORY_TR = 0 +INTEGER(KIND=JPIM) :: NSTACK_MEMORY_TR LOGICAL, ALLOCATABLE :: LENABLED(:) ! true: the resolution is enabled (it has been ! initialised and has not been released afterward) From 7b0928edeb15ac38fd1297ebe640a9f8831b50c9 Mon Sep 17 00:00:00 2001 From: lukasm91 Date: Fri, 4 Oct 2024 13:05:12 +0200 Subject: [PATCH 83/86] Pin buffers in benchmark (#158) Co-authored-by: Olivier Marsden --- src/programs/CMakeLists.txt | 14 +++++++++++--- src/programs/ectrans-benchmark.F90 | 14 ++++++++++---- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/programs/CMakeLists.txt b/src/programs/CMakeLists.txt index f0db3334b..d7f6013cf 100644 --- a/src/programs/CMakeLists.txt +++ b/src/programs/CMakeLists.txt @@ -36,9 +36,17 @@ foreach( program ectrans-benchmark ) parkind_${prec} trans_gpu_${prec} OpenACC::OpenACC_Fortran - ) - target_compile_definitions(${program}-gpu-${prec} PRIVATE VERSION="gpu") - endif() + DEFINITIONS + VERSION="gpu" + $<$:USE_PINNED> + ) + target_compile_options(${program}-gpu-${prec} PUBLIC + $<$:-cuda> + ) + target_link_options(${program}-gpu-${prec} PUBLIC + $<$:-cuda> + ) + endif() endforeach( prec ) endif( HAVE_GPU ) endforeach( program ) diff --git a/src/programs/ectrans-benchmark.F90 b/src/programs/ectrans-benchmark.F90 index aaff0f448..f9a12a8b3 100644 --- a/src/programs/ectrans-benchmark.F90 +++ b/src/programs/ectrans-benchmark.F90 @@ -9,6 +9,12 @@ program ectrans_benchmark +#ifdef USE_PINNED +#define PINNED_TAG , pinned +#else +#define PINNED_TAG +#endif + ! ! Spectral transform test ! @@ -95,18 +101,18 @@ program ectrans_benchmark real(kind=jprd) :: zaveave(0:jpmaxstat) ! Grid-point space data structures -real(kind=jprb), allocatable, target :: zgmv (:,:,:,:) ! Multilevel fields at t and t-dt -real(kind=jprb), allocatable, target :: zgmvs (:,:,:) ! Single level fields at t and t-dt +real(kind=jprb), allocatable, target PINNED_TAG :: zgmv (:,:,:,:) ! Multilevel fields at t and t-dt +real(kind=jprb), allocatable, target PINNED_TAG :: zgmvs (:,:,:) ! Single level fields at t and t-dt real(kind=jprb), pointer :: zgp3a (:,:,:,:) ! Multilevel fields at t and t-dt real(kind=jprb), pointer :: zgpuv (:,:,:,:) ! Multilevel fields at t and t-dt real(kind=jprb), pointer :: zgp2 (:,:,:) ! Single level fields at t and t-dt ! Spectral space data structures -real(kind=jprb), allocatable, target :: sp3d(:,:,:) +real(kind=jprb), allocatable, target PINNED_TAG :: sp3d(:,:,:) real(kind=jprb), pointer :: zspvor(:,:) => null() real(kind=jprb), pointer :: zspdiv(:,:) => null() real(kind=jprb), pointer :: zspsc3a(:,:,:) => null() -real(kind=jprb), allocatable :: zspsc2(:,:) +real(kind=jprb), allocatable PINNED_TAG :: zspsc2(:,:) logical :: lstack = .false. ! Output stack info logical :: luserpnm = .false. From 234c6a96bfb4ad36e2478b4c8f6545d82b85ed8e Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Tue, 1 Oct 2024 16:55:45 +0000 Subject: [PATCH 84/86] Workaround for ASYNC statements in TRMTOL for AMD GPUs Co-authored-by: Paul Mullowney --- src/trans/gpu/internal/trmtol_mod.F90 | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/trans/gpu/internal/trmtol_mod.F90 b/src/trans/gpu/internal/trmtol_mod.F90 index 4fc67d556..1ccc9ab3f 100755 --- a/src/trans/gpu/internal/trmtol_mod.F90 +++ b/src/trans/gpu/internal/trmtol_mod.F90 @@ -158,7 +158,11 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) #ifdef OMPGPU #endif #ifdef ACCGPU +#ifdef __HIP_PLATFORM_AMD__ # Workaround for AMD GPUs - ASYNC execution of this kernel gives numerical errors + !$ACC KERNELS DEFAULT(NONE) PRESENT(PFBUF,PFBUF_IN) COPYIN(FROM_RECV,TO_RECV,FROM_SEND,TO_SEND) +#else !$ACC KERNELS ASYNC(1) DEFAULT(NONE) PRESENT(PFBUF,PFBUF_IN) COPYIN(FROM_RECV,TO_RECV,FROM_SEND,TO_SEND) +#endif #endif PFBUF(FROM_RECV:TO_RECV) = PFBUF_IN(FROM_SEND:TO_SEND) #ifdef OMPGPU @@ -213,7 +217,9 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) CALL GSTATS(421,1) #ifdef ACCGPU +#ifndef __HIP_PLATFORM_AMD__ # Workaround for AMD GPUs - ASYNC execution of this kernel gives numerical errors !$ACC WAIT(1) +#endif #endif CALL GSTATS(807,1) ELSE From 6840754f2c9e8cd003da391012ee8ffa177c0bf5 Mon Sep 17 00:00:00 2001 From: Sam Hatfield Date: Wed, 2 Oct 2024 12:43:38 +0300 Subject: [PATCH 85/86] Workaround for supporting GPU-aware MPI on Cray/AMD platforms Co-authored-by: Paul Mullowney Co-authored-by: Willem Deconinck --- src/trans/gpu/internal/trgtol_mod.F90 | 1 + src/trans/gpu/internal/trltog_mod.F90 | 1 + src/trans/gpu/internal/trltom_mod.F90 | 3 ++- src/trans/gpu/internal/trmtol_mod.F90 | 7 +++++-- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/trans/gpu/internal/trgtol_mod.F90 b/src/trans/gpu/internal/trgtol_mod.F90 index 6ec388d71..7ec495ef8 100755 --- a/src/trans/gpu/internal/trgtol_mod.F90 +++ b/src/trans/gpu/internal/trgtol_mod.F90 @@ -115,6 +115,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, USE OML_MOD, ONLY: OML_MY_THREAD #if ECTRANS_HAVE_MPI USE MPI_F08, ONLY: MPI_COMM, MPI_REQUEST, MPI_FLOAT, MPI_DOUBLE + ! Missing: MPI_ISEND, MPI_IRECV on purpose due to cray-mpi bug (see https://github.com/ecmwf-ifs/ectrans/pull/157) #endif USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE TPM_TRANS, ONLY: NPROMA diff --git a/src/trans/gpu/internal/trltog_mod.F90 b/src/trans/gpu/internal/trltog_mod.F90 index 1c0a1990e..46dec8747 100755 --- a/src/trans/gpu/internal/trltog_mod.F90 +++ b/src/trans/gpu/internal/trltog_mod.F90 @@ -116,6 +116,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS #if ECTRANS_HAVE_MPI USE MPI_F08, ONLY: MPI_COMM, MPI_REQUEST, MPI_FLOAT, MPI_DOUBLE + ! Missing: MPI_ISEND, MPI_IRECV on purpose due to cray-mpi bug (see https://github.com/ecmwf-ifs/ectrans/pull/157) #endif USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE TPM_TRANS, ONLY: LDIVGP, LSCDERS, LUVDER, LVORGP, NPROMA diff --git a/src/trans/gpu/internal/trltom_mod.F90 b/src/trans/gpu/internal/trltom_mod.F90 index ab3556138..09596eba2 100755 --- a/src/trans/gpu/internal/trltom_mod.F90 +++ b/src/trans/gpu/internal/trltom_mod.F90 @@ -94,7 +94,8 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS) USE TPM_DISTR, ONLY: D, NPRTRW, NPROC, MYPROC, MYSETW USE TPM_GEN, ONLY: LSYNC_TRANS, NERR #if ECTRANS_HAVE_MPI - USE MPI_F08, ONLY: MPI_COMM, MPI_FLOAT, MPI_DOUBLE, MPI_ALLTOALLV + USE MPI_F08, ONLY: MPI_COMM, MPI_FLOAT, MPI_DOUBLE + ! Missing: MPI_ALLTOALLV on purpose due to cray-mpi bug (see https://github.com/ecmwf-ifs/ectrans/pull/157) #endif USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION diff --git a/src/trans/gpu/internal/trmtol_mod.F90 b/src/trans/gpu/internal/trmtol_mod.F90 index 1ccc9ab3f..2b8ca8978 100755 --- a/src/trans/gpu/internal/trmtol_mod.F90 +++ b/src/trans/gpu/internal/trmtol_mod.F90 @@ -95,6 +95,7 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) USE TPM_GEN, ONLY: LSYNC_TRANS, NERR #if ECTRANS_HAVE_MPI USE MPI_F08, ONLY: MPI_COMM, MPI_FLOAT, MPI_DOUBLE + ! Missing: MPI_ALLTOALLV on purpose due to cray-mpi bug (see https://github.com/ecmwf-ifs/ectrans/pull/157) #endif USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX @@ -158,7 +159,8 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) #ifdef OMPGPU #endif #ifdef ACCGPU -#ifdef __HIP_PLATFORM_AMD__ # Workaround for AMD GPUs - ASYNC execution of this kernel gives numerical errors +#ifdef __HIP_PLATFORM_AMD__ + ! Workaround for AMD GPUs - ASYNC execution of this kernel gives numerical errors !$ACC KERNELS DEFAULT(NONE) PRESENT(PFBUF,PFBUF_IN) COPYIN(FROM_RECV,TO_RECV,FROM_SEND,TO_SEND) #else !$ACC KERNELS ASYNC(1) DEFAULT(NONE) PRESENT(PFBUF,PFBUF_IN) COPYIN(FROM_RECV,TO_RECV,FROM_SEND,TO_SEND) @@ -217,7 +219,8 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) CALL GSTATS(421,1) #ifdef ACCGPU -#ifndef __HIP_PLATFORM_AMD__ # Workaround for AMD GPUs - ASYNC execution of this kernel gives numerical errors +#ifndef __HIP_PLATFORM_AMD__ + ! Workaround for AMD GPUs - ASYNC execution of this kernel gives numerical errors !$ACC WAIT(1) #endif #endif From 6381f11d96d58c69fc3d7930faae2ed6229559e5 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Fri, 11 Oct 2024 13:57:38 +0200 Subject: [PATCH 86/86] Version 1.5.0 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 88c5fb891..bc80560fa 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.4.0 +1.5.0