Skip to content

Commit 16caa4b

Browse files
committed
update readme.md
1 parent 0bb3fae commit 16caa4b

25 files changed

+98
-85
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@
88
/AmxMulWin/GPUCache/**
99
/AmxMulWin/Intel® VTune™ Profiler Results/**
1010
/AmxMulWin/Intel_Inspector/**
11+
/AmxMulWin/AmxMulWin/x64/**

AmxMulWin/AmxMulWin.vcxproj

+5-1
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,8 @@
171171
<ClInclude Include="..\vecmul_example.h" />
172172
</ItemGroup>
173173
<ItemGroup>
174-
<None Include="..\benchmark.csv" />
175174
<None Include="..\CMakePresets.json" />
175+
<None Include="..\experiments\benchmark.csv" />
176176
<None Include="..\generated\asm\run-nasm.bat" />
177177
<CustomBuild Include="..\generated\asm\tdpbssd_N16_M16_K64.asm">
178178
<DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
@@ -327,13 +327,17 @@
327327
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj</Outputs>
328328
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj</Outputs>
329329
</CustomBuild>
330+
<None Include="..\README.md" />
330331
<None Include="..\run-sde.bat" />
331332
</ItemGroup>
332333
<ItemGroup>
333334
<Text Include="..\CMakeLists.txt" />
334335
<Text Include="..\generated\asm\howto.txt" />
335336
<Text Include="..\readme.txt" />
336337
</ItemGroup>
338+
<ItemGroup>
339+
<Image Include="..\experiments\Matrix Multiplication BF16.png" />
340+
</ItemGroup>
337341
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
338342
<ImportGroup Label="ExtensionTargets">
339343
</ImportGroup>

AmxMulWin/AmxMulWin.vcxproj.filters

+9-1
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,10 @@
106106
<None Include="..\run-sde.bat">
107107
<Filter>Resource Files</Filter>
108108
</None>
109-
<None Include="..\benchmark.csv">
109+
<None Include="..\experiments\benchmark.csv">
110+
<Filter>Resource Files</Filter>
111+
</None>
112+
<None Include="..\README.md">
110113
<Filter>Resource Files</Filter>
111114
</None>
112115
</ItemGroup>
@@ -165,4 +168,9 @@
165168
<Filter>Header Files\asm</Filter>
166169
</CustomBuild>
167170
</ItemGroup>
171+
<ItemGroup>
172+
<Image Include="..\experiments\Matrix Multiplication BF16.png">
173+
<Filter>Resource Files</Filter>
174+
</Image>
175+
</ItemGroup>
168176
</Project>

AmxMulWin/nasm.exe

1.56 MB
Binary file not shown.

AmxMulWin/ndisasm.exe

1.07 MB
Binary file not shown.

amx.amx_matrix.h

+8-8
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,22 @@
11
#pragma once
2-
#include <string>
3-
#include <ios> //hex
4-
#include <iomanip> //setw
5-
#include <iostream> //std::cout
2+
#include <bit>
63
#include <cstdint>
74
#include <cstdlib>
5+
#include <intrin.h>
6+
#include <iomanip> //setw
7+
#include <ios> //hex
88
#include <iosfwd>
9+
#include <iostream> //std::cout
910
#include <random>
1011
#include <sstream>
12+
#include <string>
1113
#include <utility>
12-
#include <intrin.h>
13-
#include <bit>
1414

15-
#include "amx.tile.h"
1615
#include "amx.print.h"
17-
#include "amx.types.h"
16+
#include "amx.tile.h"
1817
#include "amx.tile_array.h"
1918
#include "amx.tools.h"
19+
#include "amx.types.h"
2020

2121
namespace amx {
2222

amx.benchmark.h

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,23 @@
11
#pragma once
22
#include <array>
33
#include <cstdint>
4-
#include <iostream>
5-
#include <ostream>
6-
#include <string>
74
#include <fstream>
8-
#include <iosfwd>
95
#include <iomanip>
106
#include <ios>
7+
#include <iosfwd>
8+
#include <iostream>
9+
#include <ostream>
10+
#include <string>
1111
#include <vector>
1212

1313
#include "amx.amx_matrix.h"
1414
#include "amx.tile.h"
1515
#include "amx.tmul.ref.h"
16-
#include "amx.types.h"
1716
#include "amx.tmul.spr.h"
17+
#include "amx.types.h"
1818

19-
#include "tools.timing.h"
2019
#include "generated/asm/amx.asm.h"
20+
#include "tools.timing.h"
2121

2222

2323
namespace amx::benchmark {

amx.gen.h

+8-8
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
#pragma once
2-
#include <string>
2+
#include <array>
3+
#include <cstdint>
4+
#include <intrin.h>
5+
#include <iosfwd>
36
#include <iostream>
7+
#include <memory>
8+
#include <regex>
49
#include <sstream>
10+
#include <string>
511
#include <tuple>
6-
#include <array>
7-
#include <iosfwd>
8-
#include <vector>
9-
#include <cstdint>
1012
#include <unordered_map>
11-
#include <regex>
12-
#include <memory>
13-
#include <intrin.h>
1413
#include <utility>
14+
#include <vector>
1515

1616
#include "amx.amx_matrix.h"
1717
#include "amx.tools.h"

amx.matrix.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
#pragma once
2-
#include <vector>
3-
#include <string>
42
#include <iostream>
53
#include <sstream>
4+
#include <string>
5+
#include <vector>
66

77
#include "amx.print.h"
8-
#include "amx.types.h"
98
#include "amx.tools.h"
9+
#include "amx.types.h"
1010

1111
namespace amx {
1212

amx.print.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
#pragma once
2-
#include <sstream>
3-
#include <ios> //hex
4-
#include <iomanip> //setw
5-
#include <type_traits>
62
#include <cstdint>
3+
#include <iomanip> //setw
4+
#include <ios> //hex
75
#include <iosfwd>
86
#include <ostream>
7+
#include <sstream>
8+
#include <type_traits>
99

1010
#include "amx.types.h"
1111

amx.test.h

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
#pragma once
2-
#include <iostream> //std::cout
32
#include <cstdint>
3+
#include <iostream> //std::cout
44
#include <string>
55

6-
#include "amx.tmul.ref.h"
7-
#include "amx.tmul.spr.h"
8-
#include "amx.tile.h"
9-
#include "amx.matrix.h"
10-
#include "amx.test_data.h"
116
#include "amx.amx_matrix.h"
7+
#include "amx.matrix.h"
128
#include "amx.print.h"
13-
#include "amx.types.h"
9+
#include "amx.test_data.h"
10+
#include "amx.tile.h"
11+
#include "amx.tmul.ref.h"
12+
#include "amx.tmul.spr.h"
1413
#include "amx.transpose.h"
14+
#include "amx.types.h"
1515

1616
#include "generated/asm/amx.asm.h"
1717

amx.test_data.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#pragma once
2-
#include <sstream>
3-
#include <iostream> //std::cout
42
#include <iosfwd>
3+
#include <iostream> //std::cout
4+
#include <sstream>
55
#include <string>
66

77
#include "amx.amx_matrix.h"

amx.tile.h

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
#pragma once
2-
#include <string>
3-
#include <sstream>
4-
#include <iostream>
52
#include <cstdlib>
63
#include <iosfwd>
4+
#include <iostream>
5+
#include <sstream>
6+
#include <string>
77
#include <typeinfo>
88

9-
#include "amx.types.h"
109
#include "amx.print.h"
1110
#include "amx.tools.h"
12-
#include <utility>
11+
#include "amx.types.h"
12+
#include <bit>
1313
#include <cstdint>
1414
#include <type_traits>
15-
#include <bit>
15+
#include <utility>
1616

1717
namespace amx {
1818

amx.tile_config.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#pragma once
2-
#include <string>
32
#include <cstdint>
43
#include <sstream>
4+
#include <string>
55

66
namespace amx {
77

amx.tmul.ref.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
#pragma once
2-
#include <iostream> //std::cout
32
#include <cstdint>
3+
#include <iostream> //std::cout
44

5-
#include "amx.tile.h"
65
#include "amx.amx_matrix.h"
7-
#include "amx.types.h"
6+
#include "amx.tile.h"
87
#include "amx.tools.h"
8+
#include "amx.types.h"
99

1010

1111
namespace amx::tmul::ref {

amx.tmul.spr.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
#pragma once
22
#include <immintrin.h>
33

4+
#include "amx.amx_matrix.h"
45
#include "amx.tile.h"
56
#include "amx.tile_config.h"
6-
#include "amx.amx_matrix.h"
7-
#include "generated/asm/amx.asm.h"
87
#include "amx.tools.h"
98
#include "amx.types.h"
9+
#include "generated/asm/amx.asm.h"
1010
#include <iostream>
1111
#include <ostream>
1212

amx.tools.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#pragma once
2-
#include <iostream>
32
#include <fstream>
3+
#include <iostream>
44

55
//#define LINUX
66

amx.types.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#pragma once
2-
#include <cstdint>
32
#include <bit>
3+
#include <cstdint>
44
#include <immintrin.h>
55
#include <sstream>
66
#include <string>

amx.vecmul.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
#pragma once
2-
#include <vector>
3-
#include <immintrin.h>
42
#include <array>
3+
#include <immintrin.h>
4+
#include <vector>
55

66
#include "amx.tile_config.h"
7-
#include "amx.types.h"
87
#include "amx.transpose.h"
8+
#include "amx.types.h"
99

1010

1111
namespace amx {

example.vecmul.h

+8-8
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,22 @@
11
#pragma once
22
#include <array>
3+
#include <cstddef>
34
#include <cstdint>
4-
#include <vector>
5-
#include <random>
6-
#include <iostream>
7-
#include <ostream>
8-
#include <tuple>
95
#include <immintrin.h>
10-
#include <cstddef>
116
#include <iosfwd>
7+
#include <iostream>
8+
#include <ostream>
9+
#include <random>
1210
#include <sstream>
1311
#include <string>
12+
#include <tuple>
13+
#include <vector>
1414

15+
#include "amx.print.h"
1516
#include "amx.tile_config.h"
17+
#include "amx.transpose.h"
1618
#include "amx.types.h"
1719
#include "tools.timing.h"
18-
#include "amx.print.h"
19-
#include "amx.transpose.h"
2020
#include <limits>
2121

2222

29.4 KB
Loading
File renamed without changes.

0 commit comments

Comments
 (0)