@@ -528,32 +528,6 @@ Value emitPadding(Location loc, RewriterBase &rewriter,
528528                  triton::gpu::PaddedSharedEncodingAttr layout,
529529                  unsigned  bitwidth, Value smemOffset, bool  offsetInBytes);
530530
531- //  Emits IR to load data from shared memory into registers, or to store data
532- //  from registers into shared memory.
533- // 
534- //  You supply perVectorCallback, which is called once per group of register
535- //  elements to transfer.  You can use this callback to emit IR to load or store
536- //  data from or to shared memory.
537- // 
538- //  elemLlvmTy should be dstTy's element type converted to an LLVM-dialect type.
539- // 
540- //  If maxVecElems is provided, we won't vectorize more than this many elements.
541- // 
542- //  Returns true on success.
543- [[nodiscard]] bool  emitTransferBetweenRegistersAndShared (
544-     RankedTensorType registerTy, triton::gpu::MemDescType sharedTy,
545-     Type elemLlvmTy, std::optional<int32_t > maxVecElems,
546-     const  SharedMemoryObject &smemObj, Location loc, RewriterBase &rewriter,
547-     const  TargetInfoBase &target,
548-     std::function<void (VectorType, Value /* shmemAddr*/  )> perVectorCallback);
549- 
550- [[nodiscard]] bool  emitTransferBetweenRegistersAndShared (
551-     LinearLayout ®Layout, triton::gpu::MemDescType sharedTy, Type elemLlvmTy,
552-     std::optional<int32_t > maxVecElems, const  SharedMemoryObject &smemObj,
553-     Location loc, RewriterBase &rewriter, const  TargetInfoBase &target,
554-     Value laneId, Value warpId,
555-     std::function<void (VectorType, Value /* shmemAddr*/  )> perVectorCallback);
556- 
557531//  Close cousin of lowerLdStMatrix in MemoryOpToLLVM.cpp
558532//  We might want to merge them at some point, but having to support
559533//  ldmatrix.trans makes the code in lowerLdStMatrix a bit specific
0 commit comments