Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,9 @@ Submitting results:

- the '-d c' option fails for some CPUs; this is under investigation

- certain 15-bit Barrett kernels are incompatible with RDNA 2 and RDNA 3 GPUs,
and need to be ported to 32 bits

- some have reported mfakto does not work on certain Nvidia hardware; this is
also being investigated

Expand Down
74 changes: 60 additions & 14 deletions src/mfaktc.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ GPU_type gpu_types[]={
{GPU_GCN5, 64, "GCN5"},
{GPU_GCNF, 64, "GCNF"},
{GPU_RDNA, 64, "RDNA"},
{GPU_RDNA2, 64, "RDNA2"},
{GPU_RDNA3, 64, "RDNA3"},
{GPU_RDNA4, 64, "RDNA4"},
{GPU_APU, 80, "APU"},
{GPU_CPU, 1, "CPU"},
{GPU_NVIDIA, 8, "NVIDIA"},
Expand Down Expand Up @@ -289,7 +291,7 @@ GPUKernels find_fastest_kernel(mystuff_t *mystuff, cl_uint do_test)
UNKNOWN_KERNEL,
UNKNOWN_KERNEL },
{
/* GPU_GCN4 (Ellesmere/Lexa/Baffin) (only barrett tested) */
/* GPU_GCN4 (Ellesmere/Lexa/Baffin) (only Barrett kernels tested) */
BARRETT69_MUL15,
BARRETT70_MUL15,
BARRETT71_MUL15,
Expand All @@ -311,7 +313,7 @@ GPUKernels find_fastest_kernel(mystuff_t *mystuff, cl_uint do_test)
UNKNOWN_KERNEL,
UNKNOWN_KERNEL },
{
/* GPU_GCN5 (Vega 56/Vega 64/"Vega" Ryzen 2xxx-3xxx iGPU) (only barrett tested) */
/* GPU_GCN5 (Vega 56/Vega 64/"Vega" Ryzen 2xxx-3xxx iGPU) (only Barrett kernels tested) */
BARRETT69_MUL15,
BARRETT70_MUL15,
BARRETT71_MUL15,
Expand All @@ -333,7 +335,7 @@ GPUKernels find_fastest_kernel(mystuff_t *mystuff, cl_uint do_test)
UNKNOWN_KERNEL,
UNKNOWN_KERNEL },
{
/* GPU_GCNF (Last GCN - Radeon VII) (only barrett tested) */
/* GPU_GCNF (Last GCN - Radeon VII) (only Barrett kernels tested) */
BARRETT76_MUL32,
BARRETT77_MUL32,
BARRETT87_MUL32,
Expand All @@ -354,30 +356,74 @@ GPUKernels find_fastest_kernel(mystuff_t *mystuff, cl_uint do_test)
MG88,
UNKNOWN_KERNEL,
UNKNOWN_KERNEL },
{
/* GPU_RDNA (1st/2nd gen RDNA) (only barett tested) */
{
/* GPU_RDNA (1st gen RDNA) (does not like all 15-bit Barrett kernels) */
BARRETT76_MUL32,
BARRETT77_MUL32,
BARRETT87_MUL32,
BARRETT88_MUL32,
BARRETT73_MUL15,
BARRETT74_MUL15,
BARRETT79_MUL32,
BARRETT92_MUL32,
MG62,
_63BIT_MUL24,
_71BIT_MUL24,
MG88,
UNKNOWN_KERNEL,
UNKNOWN_KERNEL,
UNKNOWN_KERNEL,
UNKNOWN_KERNEL,
UNKNOWN_KERNEL,
UNKNOWN_KERNEL,
UNKNOWN_KERNEL,
UNKNOWN_KERNEL },
{
/* GPU_RDNA2 (2nd gen RDNA) */
BARRETT69_MUL15,
BARRETT70_MUL15,
BARRETT71_MUL15,
BARRETT76_MUL32,
BARRETT77_MUL32,
BARRETT87_MUL32,
BARRETT88_MUL32,
BARRETT73_MUL15,
BARRETT74_MUL15,
BARRETT79_MUL32,
BARRETT92_MUL32,
MG62,
_63BIT_MUL24,
_71BIT_MUL24,
MG88,
UNKNOWN_KERNEL,
UNKNOWN_KERNEL,
UNKNOWN_KERNEL,
UNKNOWN_KERNEL,
UNKNOWN_KERNEL }, // TODO fix failures in kernels: {'cl_barrett15_83_gs': 33323, 'cl_barrett15_88_gs': 33764, 'cl_barrett15_82_gs': 33225}
{
/* GPU_RDNA3 (3rd gen RDNA) (only Barettt tested) */
BARRETT69_MUL15,
BARRETT70_MUL15,
BARRETT71_MUL15,
BARRETT76_MUL32,
BARRETT77_MUL32,
BARRETT82_MUL15,
BARRETT83_MUL15,
BARRETT87_MUL32,
BARRETT88_MUL32,
BARRETT73_MUL15,
BARRETT74_MUL15,
BARRETT79_MUL32,
BARRETT88_MUL15,
BARRETT92_MUL32,
MG62,
_63BIT_MUL24,
_71BIT_MUL24,
MG88,
UNKNOWN_KERNEL,
UNKNOWN_KERNEL },
{
/* GPU_RDNA3 (3rd gen RDNA) (only barett tested) */
UNKNOWN_KERNEL,
UNKNOWN_KERNEL,
UNKNOWN_KERNEL,
UNKNOWN_KERNEL }, // TODO fix failures in kernels: {'cl_barrett15_83_gs': 33323, 'cl_barrett15_88_gs': 33764, 'cl_barrett15_82_gs': 33225}
{
/* GPU_RDNA4 (4th gen RDNA) (only Barrett kernels tested) */
BARRETT69_MUL15,
BARRETT70_MUL15,
BARRETT71_MUL15,
Expand All @@ -398,7 +444,7 @@ GPUKernels find_fastest_kernel(mystuff_t *mystuff, cl_uint do_test)
UNKNOWN_KERNEL,
UNKNOWN_KERNEL,
UNKNOWN_KERNEL }, // TODO fix failures in kernels: {'cl_barrett15_83_gs': 33323, 'cl_barrett15_88_gs': 33764, 'cl_barrett15_82_gs': 33225}
{
{
/* GPU_APU, (BeaverCreek=???, v=4) */
BARRETT70_MUL15, // "cl_barrett15_70" (79.66 M/s)
BARRETT69_MUL15, // "cl_barrett15_69" (78.40 M/s)
Expand All @@ -421,7 +467,7 @@ GPUKernels find_fastest_kernel(mystuff_t *mystuff, cl_uint do_test)
UNKNOWN_KERNEL,
UNKNOWN_KERNEL,
UNKNOWN_KERNEL },
{
{
/* GPU_CPU, i7 620M @ 3.06GHz */
MG62, // "cl_mg_62" (9.60 M/s)
BARRETT77_MUL32, // "cl_barrett32_77" (5.54 M/s)
Expand All @@ -433,7 +479,7 @@ GPUKernels find_fastest_kernel(mystuff_t *mystuff, cl_uint do_test)
BARRETT70_MUL15, // "cl_barrett15_70" (3.60 M/s)
BARRETT92_MUL32, // "cl_barrett32_92" (3.56 M/s)
BARRETT71_MUL15, // "cl_barrett15_71" (3.43 M/s)
// BARRETT70_MUL24, // "cl_barrett24_70" (3.40 M/s)
// BARRETT70_MUL24, // "cl_barrett24_70" (3.40 M/s)
BARRETT73_MUL15, // "cl_barrett15_73" (3.07 M/s)
BARRETT74_MUL15, // "cl_barrett15_74"
BARRETT82_MUL15, // "cl_barrett15_82" (2.72 M/s)
Expand Down
11 changes: 9 additions & 2 deletions src/mfakto.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -749,17 +749,19 @@ void set_gpu_type()
mystuff.gpu_type = GPU_GCNF;
}
else if (STM("gfx101") || // RDNA1
STM("gfx103") || // RDNA2

PAT("RX [56][0-9][0-9][0-9]") // Model
// Also known as 6[0-9]0M, but might be too vague to match
)
{
mystuff.gpu_type = GPU_RDNA;
}
else if (STM("gfx103")) // RDNA2
{
mystuff.gpu_type = GPU_RDNA2;
}
else if (STM("gfx110") || // Catch-all RDNA3
STM("gfx115") || // Catch-all RDNA3.5
STM("gfx120") || // Catch-all RDNA4

PAT("RX [79][0-9][0-9][0-9]") || // Model
PAT("80[456]0S") // Strix Halo, huge APU
Expand All @@ -768,6 +770,11 @@ void set_gpu_type()
{
mystuff.gpu_type = GPU_RDNA3;
}
else if (STM("gfx120")) // Catch-all RDNA4
{
mystuff.gpu_type = GPU_RDNA4;
}

else if (STM("Cayman") || // 6950, 6970
STM("Devastator") || // 7xx0D (iGPUs of A4/6/8/10)
STM("Scrapper") || // 7xx0G (iGPUs of A4/6/8/10)
Expand Down
2 changes: 2 additions & 0 deletions src/my_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,9 @@ enum GPU_types
GPU_GCN5,
GPU_GCNF, // R VII
GPU_RDNA,
GPU_RDNA2,
GPU_RDNA3,
GPU_RDNA4,
GPU_APU,
GPU_CPU,
GPU_NVIDIA,
Expand Down
Loading