Transposition with SHMEM vs. send/recv
call shmem_barrier_all
do 150 kk=1,lmtot
ktag=ksendto(kk)
call shmem_put8( y(1+(ktag-1)*len), x(1,ksnding(kk), len, ipsndto(kk) )
continue
call shmem_barrier_all
call mpi_isend(x(1,ksnding(kk), len, mpireal, ipsndto(kk), ktag, mpicomm, iss(ltag), istat)
call mpi_irecv(y(1,krcving(kk), len, mpireal, iprcvfr(kk), ktag, mpicomm, iss(ltag), istat)
call mpi_wait_all(ltag,iss,istatm, istat)