openvinotoolkit · WeldonWangwang · Mar 26, 2023 · Mar 26, 2023 · Mar 26, 2023 · Mar 26, 2023
@@ -338,65 +338,64 @@ vector<float, cols> online_softmax_update(matrix_ref<T, rows, cols> St, vector_r
     #define cm_load_transpose cm_load<lsc::Transpose>
     #define cm_load_vnni cm_load<lsc::VNNI>
     #define cm_store_normal cm_store
-#else
-    // simulation of LSC API using SVM API
-    template <typename T = int, unsigned NBlocks = 1, unsigned BlockH = 1, unsigned BlockW = 1>
-    inline void cm_load_normal(vector_ref<T, NBlocks*BlockH*BlockW> Res, const lsc::block_2d_desc<T, NBlocks, BlockH, BlockW> &Desc, int16_t Pred = 1) {
-        static_assert(NBlocks == 1);
-        auto pitch = Desc.get_pitch() + 1;
-        auto base = reinterpret_cast<svmptr_t>(Desc.get_base() + Desc.get_block_y()*pitch + Desc.get_block_x() * sizeof(T));
-        #pragma unroll
-        for(int i = 0; i < BlockH; i++) {
-            cm_svm_block_read(base + i * pitch, Res.select<BlockW, 1>(i*BlockW));
-        }
-    }
-
-    template <typename T = int, unsigned NBlocks = 1, unsigned BlockH = 1, unsigned BlockW = 1>
-    inline void cm_load_transpose(vector_ref<T, NBlocks*BlockW*BlockH> Res, const lsc::block_2d_desc<T, NBlocks, BlockH, BlockW> &Desc, int16_t Pred = 1) {
-        static_assert(NBlocks == 1);
-        auto pitch = Desc.get_pitch() + 1;
-        auto base = reinterpret_cast<svmptr_t>(Desc.get_base() + Desc.get_block_y()*pitch + Desc.get_block_x() * sizeof(T));
-        matrix<T, BlockH, BlockW> temp;
-        #pragma unroll
-        for(int i = 0; i < BlockH; i++) {
-            cm_svm_block_read(base + i * pitch, temp[i]);
-        }
-        Transpose2DMatrix(temp, Res.format<T, BlockW, BlockH>());
-    }
+    // // simulation of LSC API using SVM API
+    // template <typename T = int, unsigned NBlocks = 1, unsigned BlockH = 1, unsigned BlockW = 1>
+    // inline void cm_load_normal(vector_ref<T, NBlocks*BlockH*BlockW> Res, const lsc::block_2d_desc<T, NBlocks, BlockH, BlockW> &Desc, int16_t Pred = 1) {
+    //     static_assert(NBlocks == 1);
+    //     auto pitch = Desc.get_pitch() + 1;
+    //     auto base = reinterpret_cast<svmptr_t>(Desc.get_base() + Desc.get_block_y()*pitch + Desc.get_block_x() * sizeof(T));
+    //     #pragma unroll
+    //     for(int i = 0; i < BlockH; i++) {
+    //         cm_svm_block_read(base + i * pitch, Res.select<BlockW, 1>(i*BlockW));
+    //     }
+    // }
 
-    // in VNNI case, NBlocks is increasing along X dimension (increase cache-line usage)
-    template <typename T = int, unsigned NBlocks = 1, unsigned BlockH = 1, unsigned BlockW = 1>
-    inline void cm_load_vnni(vector_ref<T, NBlocks*BlockW*BlockH> Res, const lsc::block_2d_desc<T, NBlocks, BlockH, BlockW> &Desc, int16_t Pred = 1) {
-        static_assert(NBlocks == 1 || NBlocks == 2);
-        // each block must be a full XMX B matrix
-        static_assert(BlockH == REG_K);
-        static_assert(BlockW == REG_N);
-        auto pitch = Desc.get_pitch() + 1;
-        auto base = reinterpret_cast<svmptr_t>(Desc.get_base() + Desc.get_block_y()*pitch + Desc.get_block_x() * sizeof(T));
-        matrix<T, BlockH, NBlocks * BlockW> temp;
-        #pragma unroll
-        for(int i = 0; i < BlockH; i++) {
-            cm_svm_block_read(base + i * pitch, temp[i]);
-        }
+    // template <typename T = int, unsigned NBlocks = 1, unsigned BlockH = 1, unsigned BlockW = 1>
+    // inline void cm_load_transpose(vector_ref<T, NBlocks*BlockW*BlockH> Res, const lsc::block_2d_desc<T, NBlocks, BlockH, BlockW> &Desc, int16_t Pred = 1) {
+    //     static_assert(NBlocks == 1);
+    //     auto pitch = Desc.get_pitch() + 1;
+    //     auto base = reinterpret_cast<svmptr_t>(Desc.get_base() + Desc.get_block_y()*pitch + Desc.get_block_x() * sizeof(T));
+    //     matrix<T, BlockH, BlockW> temp;
+    //     #pragma unroll
+    //     for(int i = 0; i < BlockH; i++) {
+    //         cm_svm_block_read(base + i * pitch, temp[i]);
+    //     }
+    //     Transpose2DMatrix(temp, Res.format<T, BlockW, BlockH>());
+    // }
 
-        auto out_vnni = Res.format<T, NBlocks * (BlockH/2), 2*BlockW>();
-        #pragma unroll
-        for(int i = 0; i < NBlocks; i ++) {
-            out_vnni.select<BlockH/2, 1, BlockW, 2>(i*(BlockH/2), 0) = temp.select<BlockH/2, 2, BlockW, 1>(0, i*BlockW);
-            out_vnni.select<BlockH/2, 1, BlockW, 2>(i*(BlockH/2), 1) = temp.select<BlockH/2, 2, BlockW, 1>(1, i*BlockW);
-        }
-    }
+    // // in VNNI case, NBlocks is increasing along X dimension (increase cache-line usage)
+    // template <typename T = int, unsigned NBlocks = 1, unsigned BlockH = 1, unsigned BlockW = 1>
+    // inline void cm_load_vnni(vector_ref<T, NBlocks*BlockW*BlockH> Res, const lsc::block_2d_desc<T, NBlocks, BlockH, BlockW> &Desc, int16_t Pred = 1) {
+    //     static_assert(NBlocks == 1 || NBlocks == 2);
+    //     // each block must be a full XMX B matrix
+    //     static_assert(BlockH == REG_K);
+    //     static_assert(BlockW == REG_N);
+    //     auto pitch = Desc.get_pitch() + 1;
+    //     auto base = reinterpret_cast<svmptr_t>(Desc.get_base() + Desc.get_block_y()*pitch + Desc.get_block_x() * sizeof(T));
+    //     matrix<T, BlockH, NBlocks * BlockW> temp;
+    //     #pragma unroll
+    //     for(int i = 0; i < BlockH; i++) {
+    //         cm_svm_block_read(base + i * pitch, temp[i]);
+    //     }
+
+    //     auto out_vnni = Res.format<T, NBlocks * (BlockH/2), 2*BlockW>();
+    //     #pragma unroll
+    //     for(int i = 0; i < NBlocks; i ++) {
+    //         out_vnni.select<BlockH/2, 1, BlockW, 2>(i*(BlockH/2), 0) = temp.select<BlockH/2, 2, BlockW, 1>(0, i*BlockW);
+    //         out_vnni.select<BlockH/2, 1, BlockW, 2>(i*(BlockH/2), 1) = temp.select<BlockH/2, 2, BlockW, 1>(1, i*BlockW);
+    //     }
+    // }
 
-    template <typename T = int, unsigned NBlocks = 1, unsigned BlockH = 1, unsigned BlockW = 1>
-    inline void cm_store_normal(const lsc::block_2d_desc<T, NBlocks, BlockH, BlockW> &Desc, vector_ref<T, NBlocks*BlockW*BlockH> Res) {
-        static_assert(NBlocks == 1);
-        auto pitch = Desc.get_pitch() + 1;
-        auto base = reinterpret_cast<svmptr_t>(Desc.get_base() + Desc.get_block_y()*pitch + Desc.get_block_x() * sizeof(T));
-        #pragma unroll
-        for(int i = 0; i < BlockH; i++) {
-            cm_svm_block_write(base + i * pitch, Res.select<BlockW, 1>(i*BlockW));
-        }
-    }
+    // template <typename T = int, unsigned NBlocks = 1, unsigned BlockH = 1, unsigned BlockW = 1>
+    // inline void cm_store_normal(const lsc::block_2d_desc<T, NBlocks, BlockH, BlockW> &Desc, vector_ref<T, NBlocks*BlockW*BlockH> Res) {
+    //     static_assert(NBlocks == 1);
+    //     auto pitch = Desc.get_pitch() + 1;
+    //     auto base = reinterpret_cast<svmptr_t>(Desc.get_base() + Desc.get_block_y()*pitch + Desc.get_block_x() * sizeof(T));
+    //     #pragma unroll
+    //     for(int i = 0; i < BlockH; i++) {
+    //         cm_svm_block_write(base + i * pitch, Res.select<BlockW, 1>(i*BlockW));
+    //     }
+    // }
 #endif
 
 //===============================================================================================
@@ -416,4 +415,4 @@ inline void prepackAsVNNIWidth2(matrix_ref<T1, K, N> input, matrix_ref<T2, K/2,
         out.row(r).select<N, 2>(0) = input.row(r*2);
         out.row(r).select<N, 2>(1) = input.row(r*2+1);
     }
-}
+}