@@ -833,27 +833,36 @@ MPL_STATIC_INLINE_PREFIX void MPIDI_OFI_gpu_rma_register(const void *buffer, siz
833833#undef CQ_D_HEAD
834834#undef CQ_D_TAIL
835835
836- MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_gpu_malloc_pack_buffer (void * * ptr , size_t pack_size )
837- {
838- if (MPIDI_OFI_ENABLE_HMEM ) {
839- return MPL_gpu_malloc_host (ptr , pack_size );
840- } else {
841- #ifdef MPL_DEFINE_ALIGNED_ALLOC
842- * ptr = MPL_aligned_alloc (256 , pack_size , MPL_MEM_BUFFER );
843- #else
844- * ptr = MPL_malloc (pack_size , MPL_MEM_BUFFER );
845- #endif
846- return 0 ;
836+ MPL_STATIC_INLINE_PREFIX void * MPIDI_OFI_malloc_pack_buffer (MPIR_Request * req , MPI_Aint pack_size )
837+ {
838+ void * pack_buf ;
839+ bool is_genq ;
840+ if (pack_size <= MPIR_CVAR_CH4_OFI_PIPELINE_CHUNK_SZ ) {
841+ int vci = MPIR_REQUEST_POOL_FROM_HANDLE (req -> handle );
842+ MPIDU_genq_private_pool_alloc_cell (MPIDI_OFI_global .per_vci [vci ].pipeline_pool , & pack_buf );
843+ is_genq = true;
844+ }
845+ if (!pack_buf ) {
846+ pack_buf = MPL_aligned_alloc (64 , pack_size , MPL_MEM_OTHER );
847+ is_genq = false;
847848 }
849+ if (pack_buf ) {
850+ MPIDI_OFI_REQUEST (req , noncontig .pack .pack_buffer ) = pack_buf ;
851+ MPIDI_OFI_REQUEST (req , noncontig .pack .is_genq ) = is_genq ;
852+ }
853+ return pack_buf ;
848854}
849855
850- MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_gpu_free_pack_buffer ( void * ptr )
856+ MPL_STATIC_INLINE_PREFIX void MPIDI_OFI_free_pack_buffer ( MPIR_Request * req )
851857{
852- if (MPIDI_OFI_ENABLE_HMEM ) {
853- return MPL_gpu_free_host (ptr );
854- } else {
855- MPL_free (ptr );
856- return 0 ;
858+ if (MPIDI_OFI_REQUEST (req , noncontig .pack .pack_buffer )) {
859+ if (MPIDI_OFI_REQUEST (req , noncontig .pack .is_genq )) {
860+ int vci = MPIR_REQUEST_POOL_FROM_HANDLE (req -> handle );
861+ MPIDU_genq_private_pool_free_cell (MPIDI_OFI_global .per_vci [vci ].pipeline_pool ,
862+ MPIDI_OFI_REQUEST (req , noncontig .pack .pack_buffer ));
863+ } else {
864+ MPL_free (MPIDI_OFI_REQUEST (req , noncontig .pack .pack_buffer ));
865+ }
857866 }
858867}
859868
0 commit comments