@@ -834,27 +834,36 @@ MPL_STATIC_INLINE_PREFIX void MPIDI_OFI_gpu_rma_register(const void *buffer, siz
834834#undef CQ_D_HEAD
835835#undef CQ_D_TAIL
836836
837- MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_gpu_malloc_pack_buffer (void * * ptr , size_t pack_size )
838- {
839- if (MPIDI_OFI_ENABLE_HMEM ) {
840- return MPL_gpu_malloc_host (ptr , pack_size );
841- } else {
842- #ifdef MPL_DEFINE_ALIGNED_ALLOC
843- * ptr = MPL_aligned_alloc (256 , pack_size , MPL_MEM_BUFFER );
844- #else
845- * ptr = MPL_malloc (pack_size , MPL_MEM_BUFFER );
846- #endif
847- return 0 ;
837+ MPL_STATIC_INLINE_PREFIX void * MPIDI_OFI_malloc_pack_buffer (MPIR_Request * req , MPI_Aint pack_size )
838+ {
839+ void * pack_buf ;
840+ bool is_genq ;
841+ if (pack_size <= MPIR_CVAR_CH4_OFI_PIPELINE_CHUNK_SZ ) {
842+ int vci = MPIR_REQUEST_POOL_FROM_HANDLE (req -> handle );
843+ MPIDU_genq_private_pool_alloc_cell (MPIDI_OFI_global .per_vci [vci ].pipeline_pool , & pack_buf );
844+ is_genq = true;
845+ }
846+ if (!pack_buf ) {
847+ pack_buf = MPL_aligned_alloc (64 , pack_size , MPL_MEM_OTHER );
848+ is_genq = false;
848849 }
850+ if (pack_buf ) {
851+ MPIDI_OFI_REQUEST (req , noncontig .pack .pack_buffer ) = pack_buf ;
852+ MPIDI_OFI_REQUEST (req , noncontig .pack .is_genq ) = is_genq ;
853+ }
854+ return pack_buf ;
849855}
850856
851- MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_gpu_free_pack_buffer ( void * ptr )
857+ MPL_STATIC_INLINE_PREFIX void MPIDI_OFI_free_pack_buffer ( MPIR_Request * req )
852858{
853- if (MPIDI_OFI_ENABLE_HMEM ) {
854- return MPL_gpu_free_host (ptr );
855- } else {
856- MPL_free (ptr );
857- return 0 ;
859+ if (MPIDI_OFI_REQUEST (req , noncontig .pack .pack_buffer )) {
860+ if (MPIDI_OFI_REQUEST (req , noncontig .pack .is_genq )) {
861+ int vci = MPIR_REQUEST_POOL_FROM_HANDLE (req -> handle );
862+ MPIDU_genq_private_pool_free_cell (MPIDI_OFI_global .per_vci [vci ].pipeline_pool ,
863+ MPIDI_OFI_REQUEST (req , noncontig .pack .pack_buffer ));
864+ } else {
865+ MPL_free (MPIDI_OFI_REQUEST (req , noncontig .pack .pack_buffer ));
866+ }
858867 }
859868}
860869
0 commit comments