Skip to content

Commit 7eee899

Browse files
authored
Merge pull request #31 from ixfd64/0.24.0-backport
Backport changes from mfaktc 0.24.0 to 0.23 branch
2 parents da80c50 + 4296147 commit 7eee899

File tree

18 files changed

+661
-735
lines changed

18 files changed

+661
-735
lines changed

.github/workflows/build.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ jobs:
208208
cuda: ${{ matrix.sys.cuda_version }}
209209
sub-packages: ${{ startsWith(matrix.sys.cuda_version, '8.') && '[]' || '[ "nvcc", "cudart" ]' }}
210210
use-local-cache: false
211+
use-github-cache: false
211212

212213
- name: Configure path to CUDA
213214
shell: powershell
@@ -307,6 +308,7 @@ jobs:
307308
printf '%s\n' ${{ env.base_name }}.zip | sort -Vr
308309
echo 'EOF'
309310
} > $GITHUB_OUTPUT
311+
( echo "${{ github.ref_name }}" | grep -qsP "v?\d+(?:\.\d+(?:\.\d+)?(?:-\d+)?|\b)(-(?:alpha|beta|pre))" && echo "PRERELEASE=true" || echo "PRERELEASE=false" ) >> $GITHUB_OUTPUT
310312
311313
- name: Create and upload release package
312314
uses: softprops/action-gh-release@v2.2.1
@@ -315,6 +317,7 @@ jobs:
315317
files: |
316318
${{ steps.makeinfo.outputs.RELEASE_FILES }}
317319
preserve_order: true
320+
prerelease: ${{ steps.makeinfo.outputs.PRERELEASE }}
318321
generate_release_notes: true
319322
body_path: RELEASE_NOTES.txt
320323
make_latest: true

.github/workflows/scripts/build_helper.sh

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,28 +37,31 @@ export GSORT='/usr/bin/sort'
3737

3838
CUDA_VERSION_FULL="$(echo "$1" | head -n1 | grep -Eom1 -e '^[1-9]([0-9])?\.[0-9]{1,2}(\.[0-9]{1,3})?$')"
3939
declare -a CUDA_VERSION
40-
CUDA_VERSION=( $(echo "$CUDA_VERSION_FULL" | tr '.' ' ') )
40+
IFS=" " read -r -a CUDA_VERSION <<< "$(echo "$CUDA_VERSION_FULL" | tr '.' ' ')"
4141
if [[ -z "${CUDA_VERSION[*]}" ]]; then
42-
echo "ERROR! Can't parse CUDA version $1" >&2
42+
echo "Error: unexpected CUDA version $1" >&2
4343
exit 2
4444
fi
4545

4646
CUDA_VER_MAJOR=${CUDA_VERSION[0]}
4747
CUDA_VER_MINOR=${CUDA_VERSION[1]}
48-
CUDA_VER="${CUDA_VER_MAJOR}${CUDA_VER_MINOR}"
4948
echo -e "CUDA_VER_MAJOR=${CUDA_VER_MAJOR}\nCUDA_VER_MINOR=${CUDA_VER_MINOR}" > "$0.out"
5049

50+
# Format CUDA_VER as single integer with both major and minor (inc. leading zero) versions.
51+
# Used for simple comparison of CUDA versions internally in this script.
52+
printf -v CUDA_VER %d%02d "${CUDA_VER_MAJOR}" "${CUDA_VER_MINOR}";
53+
5154
# CUDA supports the --list-gpu-arch flag from 11.0.0 onwards.
5255
# For older CUDA versions, use grep to parse the supported architectures from
5356
# the output of --help
54-
[ $CUDA_VER -gt 110 ] && NVCC_OPTS='--list-gpu-arch' || NVCC_OPTS='--help'
57+
[ "$CUDA_VER" -gt 1100 ] && NVCC_OPTS='--list-gpu-arch' || NVCC_OPTS='--help'
5558
NVCC_REGEX='compute_[1-9][0-9]{1,2}'
5659
# CUDA 11.0.x is a special case. Its --help output lists compute_32 and higher,
5760
# but only compute capability 3.5 and later are supported.
58-
[ $CUDA_VER -eq 110 ] && NVCC_REGEX='compute_(3[5-9]|[4-9][0-9])'
61+
[ "$CUDA_VER" -eq 1100 ] && NVCC_REGEX='compute_(3[5-9]|[4-9][0-9])'
5962

6063
declare -a CC_LIST
61-
CC_LIST=( $(nvcc $NVCC_OPTS | grep -Eoe "$NVCC_REGEX" | cut -d '_' -f2 | $GSORT -un | xargs) )
64+
IFS=" " read -r -a CC_LIST <<< "$(nvcc "$NVCC_OPTS" | grep -Eoe "$NVCC_REGEX" | cut -d '_' -f2 | $GSORT -un | xargs)"
6265
if [ ${#CC_LIST[*]} -eq 0 ]; then
6366
echo "Error: could not parse list of supported compute capabilities" >&2
6467
exit 3
@@ -75,11 +78,11 @@ for CC in "${CC_LIST[@]}"; do
7578
sed -i "/^NVCCFLAGS = .*\$/a NVCCFLAGS += --generate-code arch=compute_${CC},code=sm_${CC}" src/Makefile src/Makefile.win
7679
done
7780

78-
if [ $CUDA_VER -ge 110 ]; then
81+
if [ "$CUDA_VER" -ge 1100 ]; then
7982
echo 'Adding NVCCFLAGS to allow unsupported MSVC versions...'
8083
sed -i '/^NVCCFLAGS = .*/a NVCCFLAGS += -allow-unsupported-compiler -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH' src/Makefile.win
8184
fi
82-
if [ $CUDA_VER -lt 120 ]; then
85+
if [ "$CUDA_VER" -lt 1200 ]; then
8386
echo "Adding libraries to LDFLAGS to support static build on older Ubuntu versions..."
8487
sed -i -E 's/^(LDFLAGS = .*? -lcudart_static) (.*)/\1 -ldl -lrt -lpthread \2/' src/Makefile
8588
fi
@@ -94,6 +97,7 @@ elif [[ -x "$(command -v powershell.exe)" ]]; then
9497
COMPILER_VER="${CC_VSPROD}, $(echo "$CC_VSINFO" | grep InstallationVersion | cut -d':' -f2 | xargs)"
9598
else
9699
COMPILER_VER="$(gcc --version | head -n1)"
100+
# shellcheck source=/dev/null
97101
source /etc/os-release
98102
OS_VER="${PRETTY_NAME}"
99103
OS_TYPE="linux64"
@@ -106,8 +110,8 @@ fi
106110

107111
NVCC_VER="$(nvcc --version | tail -n1 | sed -E 's/^Build //')"
108112

109-
# Version from src/params.h
110-
# Match semver: https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string
113+
# get mfaktc version from src/params.h
114+
# match SemVer and GIMPS version strings: https://regex101.com/r/m38d3i/2
111115
MFAKTC_VER="$(LC_ALL=en_US.utf8 grep -iPo '#define[\s\t]+MFAKTC_VERSION[\s\t]+"v?\d+(?:\.\d+(?:\.\d+)?(?:-\d+)?|\b)(?:-?(?:alpha|beta|pre)\.?(?:\d+)?\b)?' src/params.h | cut -d '"' -f 2)"
112116

113117
# Git-formatted version

Changelog.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1+
version 0.23.4
2+
==============================
3+
- includes changes backported from mfaktc 0.24.0
4+
5+
bug fixes:
6+
- fixed typos and formatting in output
7+
- fix some compilation warnings
8+
- the maximum number of threads per SM should no longer exceed the limit for
9+
certain architectures
10+
- CUDA versions are now correctly detected
11+
12+
build:
13+
- made the build helper script more robust
14+
15+
other changes:
16+
- replaced deprecated CUDA function calls
17+
118
version 0.23.3
219
==============================
320
bug fixes:

README.txt

Lines changed: 35 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -77,18 +77,15 @@ Some compile-time settings in the file src/params.h can be changed:
7777
- the last part contains defines which should *not* be changed unless you
7878
fully understand them. It is possible to easily screw something up.
7979

80-
Be aware that 32-bit applications are not supported in CUDA Toolkit 12.2 and
81-
later. You will need to use an older CUDA Toolkit to build mfaktc for 32 bits.
82-
See this thread for details:
80+
Be aware that mfaktc 0.24.0 and CUDA Toolkit 12.2 drop support for 32-bit
81+
builds. You will need to use the '0.23' branch and an older CUDA Toolkit to
82+
compile mfaktc for 32 bits. See this thread for details:
8383
https://forums.developer.nvidia.com/t/whats-the-last-version-of-the-cuda-toolkit-to-support-32-bit-applications/323106/4
8484

8585
In any case, a 64-bit build is preferred except on some old low-end GPUs.
8686
Testing on an Intel Core i7 CPU has shown that the performance-critical CPU
8787
code runs about 33% faster compared to 32 bits.
8888

89-
Important note: mfaktc will no longer officially support 32-bit builds in
90-
version 0.24 onwards.
91-
9289
#############
9390
# 2.1 Linux #
9491
#############
@@ -258,6 +255,9 @@ A note on extending assignments:
258255

259256
---
260257

258+
You must use mfaktc 0.24.0 or above starting in 2026 as the CRC32 checksums
259+
will be used to validate results.
260+
261261
Once you have your assignments, create an empty file called worktodo.txt
262262
and copy all the "Factor=..." lines into that file. Start mfaktc, sit back
263263
and let it do its job. Running mfaktc is also a great way to stress test
@@ -276,17 +276,16 @@ Submitting results:
276276

277277
Step 1) log in to the GIMPS website with your username and password
278278
Step 2) on the menu bar, select Manual Testing > Results
279-
Step 3) upload the results.json.txt file produced by mfaktc. You may
280-
archive or delete the file after it has been processed.
279+
Step 3) upload the results.json.txt file produced by mfaktc. Do not submit
280+
the results.txt file as it is no longer accepted by the PrimeNet
281+
server. You may archive or delete the results.json.txt file after
282+
it has been processed.
281283

282284
To prevent abuse, admin approval is required for manual submissions. You
283285
can request approval by contacting George Woltman at woltman@alum.mit.edu
284286
or posting on the GIMPS forum:
285287
https://mersenneforum.org/forumdisplay.php?f=38
286288

287-
Important note: the results.txt file is deprecated and will no longer be
288-
accepted from 2025 onwards.
289-
290289
##################
291290
# 5 Known issues #
292291
##################
@@ -309,7 +308,7 @@ Submitting results:
309308
lower speed. Performance-wise, this is not recommended on GPUs which can
310309
handle more than 100 million candidates per second.
311310
- the debug options CHECKS_MODBASECASE and USE_DEVICE_PRINTF might report 'qi'
312-
values that are too high while using the Barrett kernels. They are caused by
311+
values that are too high while using the Barrett kernels; this is caused by
313312
factor candidates out of the specified range.
314313

315314

@@ -358,24 +357,23 @@ A: Yes. In most cases, this is required to make full use of a GPU when sieving
358357
You will need a separate directory for each mfaktc instance.
359358

360359
Q: Are checkpoint files compatible between different mfaktc versions?
361-
A: Save files are compatible between 32-bit and 64-bit executables. mfaktc can
362-
also load a checkpoint from either a Linux or Windows version on either OS.
363-
However, the executable and checkpoint file must have the same version
364-
number. Complete any active assignments before you upgrade.
360+
A: Save files are compatible between different platforms and architectures. For
361+
example, the 32-bit Windows version can read a checkpoint from 64-bit Linux
362+
and vice versa.
363+
364+
However, mfaktc 0.23.x and below can only load checkpoints with the same
365+
version number as the executable. Complete any active assignments before you
366+
upgrade.
365367

366368
Q: What do the version numbers mean?
367-
A: Stable releases are usually named 0.x where "x" is incremented for each
368-
release. Some versions include a patch such as a bug fix or other small
369-
change. You can see in the change log that mfaktc 0.13p1 is one such
370-
example. Such releases are intended for general use. Please note that patch
371-
releases after 0.16p1 use the major.minor.patch naming scheme.
369+
A: mfaktc 0.23.0 and above use the semantic versioning scheme. You can learn
370+
more about semantic versioning here: https://semver.org
372371

373372
You may come across pre-release versions that are not publicly available.
374-
Such versions usually *not* intended for productive usage; sometimes they
375-
have the computational code disabled or don't even compile. Please don't use
376-
them for production work as they have usually had minimal to zero QA and may
377-
contain critical issues.
378-
373+
Such versions are *not* intended for general use; sometimes they have the
374+
computational code disabled or don't even compile. Please don't use them for
375+
production work as they have usually had minimal to zero QA and may contain
376+
critical issues.
379377

380378
###########
381379
# 8 .plan #
@@ -387,23 +385,24 @@ A: Stable releases are usually named 0.x where "x" is incremented for each
387385
- CRC32 checksums to reduce invalid results
388386
- improved performance on Pascal devices
389387
- metadata in checkpoint file names
390-
- replace deprecated cudaThreadSynchronize() calls
388+
- replace deprecated cudaThreadSynchronize() with cudaDeviceSynchronize()
391389

392-
not planned for a specific release yet, no particular order!
393-
- performance improvements whenever I find them ;)
394-
- change compiletime options to runtime options (if feasible and useful)
390+
ongoing improvements
391+
- performance improvements whenever they are found ;-)
392+
- fix bugs as they are discovered
393+
- change compile-time options to runtime options, if applicable
395394
- documentation and comments in code
396-
- try to use double precision for the long integer divisions <-- unsure
397-
- json output for wagstaff numbers https://www.mersenneforum.org/showpost.php?p=662680&postcount=3769
395+
- try to use double precision for the long integer divisions
396+
- unsure, may or may not be useful
397+
398+
requested features; no particular order and not planned for a specific release
399+
- JSON output for Wagstaff numbers https://www.mersenneforum.org/showpost.php?p=662680&postcount=3769
398400
- factors-meta.<factor>.timestamp https://www.mersenneforum.org/showpost.php?p=662603&postcount=3750
399401
- factors-meta.<factor>.class https://www.mersenneforum.org/showpost.php?p=662720&postcount=3781
400402
- found factors support https://www.mersenneforum.org/showpost.php?p=662682&postcount=3770
401-
- os info https://www.mersenneforum.org/showpost.php?p=662648&postcount=3757
402-
- security checksum https://www.mersenneforum.org/showpost.php?p=662658&postcount=3761
403-
- detailed runtime logging https://www.mersenneforum.org/showpost.php?p=662953&postcount=3845
404403
- begink and endk logging https://www.mersenneforum.org/showpost.php?p=662953&postcount=3845
405404
- only log every n seconds https://www.mersenneforum.org/showpost.php?p=662795&postcount=3826
406405
- catch HUP https://www.mersenneforum.org/showpost.php?p=662777&postcount=3815
407406
- non-prime exponents https://www.mersenneforum.org/showpost.php?p=663442&postcount=3873
408407
- TF10G support https://www.mersenneforum.org/showpost.php?p=663442&postcount=3873
409-
- Remove CPU Sieving support https://www.mersenneforum.org/showpost.php?p=663517&postcount=3894
408+
- drop CPU sieving support https://www.mersenneforum.org/showpost.php?p=663517&postcount=3894

src/Makefile

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@ CFLAGS_EXTRA_SIEVE = -funroll-all-loops
1010

1111
# compiler settings for .cu files (GPU)
1212
NVCC = nvcc
13-
NVCCFLAGS = $(CUDA_INCLUDE) --ptxas-options=-v
14-
15-
# generate code for various compute capabilities
16-
# NVCCFLAGS += --generate-code arch=compute_11,code=sm_11 # CC 1.1, 1.2 and 1.3 GPUs will use this code (1.0 is not possible for mfaktc)
17-
# NVCCFLAGS += --generate-code arch=compute_20,code=sm_20 # CC 2.x GPUs will use this code, one code fits all!
18-
# NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # all CC 3.x GPUs _COULD_ use this code
19-
# NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # but CC 3.2 _CAN_ use funnel shift which is useful for mfaktc
20-
NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # CC 5.x GPUs will use this code
13+
NVCCFLAGS = $(CUDA_INCLUDE) --ptxas-options=-v -Wno-deprecated-gpu-targets
14+
15+
# generate code for supported compute capabilities
16+
# NVCCFLAGS += --generate-code arch=compute_11,code=sm_11 # mfaktc cannot use 1.0 but supports 1.1 and above
17+
# NVCCFLAGS += --generate-code arch=compute_20,code=sm_20 # Fermi GPUs
18+
# NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # early Kepler GPUs
19+
# NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # later Kepler GPUs; funnel shift added in 3.2
20+
NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # Maxwell GPUs
2121
NVCCFLAGS += --generate-code arch=compute_60,code=sm_60
2222
NVCCFLAGS += --generate-code arch=compute_61,code=sm_61
2323
NVCCFLAGS += --generate-code arch=compute_62,code=sm_62
@@ -31,7 +31,7 @@ NVCCFLAGS += --generate-code arch=compute_89,code=sm_89
3131
NVCCFLAGS += --generate-code arch=compute_90,code=sm_90
3232
NVCCFLAGS += --generate-code arch=compute_120,code=sm_120
3333

34-
# pass some options to the C host compiler (e.g. gcc on Linux)
34+
# pass some options to the C host compiler
3535
NVCCFLAGS += --compiler-options=-Wall
3636

3737
# Linker

src/Makefile.win

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
CC = cl
22
CFLAGS = /Ox /Oy /W2 /fp:fast /I"$(CUDA_PATH)\include" /I"$(CUDA_PATH)\include\cudart" /nologo
33

4-
NVCCFLAGS = -m64 --ptxas-options=-v
4+
NVCCFLAGS = -m64 --ptxas-options=-v -Wno-deprecated-gpu-targets
55
CUFLAGS = -DWIN64 -Xcompiler "/EHsc /W3 /nologo /Ox" $(NVCCFLAGS)
66

77
############################################################

src/Makefile.win32

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@ CFLAGS = /Ox /Oy /GL /W2 /fp:fast /I"$(CUDA_PATH)\include" /I"$(CUDA_PATH)\inclu
33

44
CC_PATH = $(shell where cl)
55

6-
NVCCFLAGS = -m32 --ptxas-options=-v
6+
NVCCFLAGS = -m32 --ptxas-options=-v -Wno-deprecated-gpu-targets
77
CUFLAGS = -ccbin="$(CC_PATH)" -Xcompiler "/EHsc /W3 /nologo /Ox /GL" $(NVCCFLAGS)
88

9-
# generate code for various compute capabilities
10-
NVCCFLAGS += --generate-code arch=compute_11,code=sm_11 # CC 1.1, 1.2 and 1.3 GPUs will use this code (1.0 is not possible for mfaktc)
11-
NVCCFLAGS += --generate-code arch=compute_20,code=sm_20 # CC 2.x GPUs will use this code, one code fits all!
12-
NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # all CC 3.x GPUs _COULD_ use this code
13-
NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # but CC 3.2 _CAN_ use funnel shift which is useful for mfaktc
14-
NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # CC 5.x GPUs will use this code
9+
# generate code for supported compute capabilities
10+
NVCCFLAGS += --generate-code arch=compute_11,code=sm_11 # mfaktc cannot use 1.0 but supports 1.1 and above
11+
NVCCFLAGS += --generate-code arch=compute_20,code=sm_20 # Fermi GPUs
12+
NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # early Kepler GPUs
13+
NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # later Kepler GPUs; funnel shift added in 3.2
14+
NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # Maxwell GPUs
1515

1616
LINK = link
1717
LFLAGS = /nologo /LTCG #/ltcg:pgo

src/compatibility.h

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ mfaktc is distributed in the hope that it will be useful,
1111
but WITHOUT ANY WARRANTY; without even the implied warranty of
1212
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1313
GNU General Public License for more details.
14-
14+
1515
You should have received a copy of the GNU General Public License
1616
along with mfaktc. If not, see <http://www.gnu.org/licenses/>.
1717
*/
@@ -26,7 +26,7 @@ along with mfaktc. If not, see <http://www.gnu.org/licenses/>.
2626
#define PRId64 "lld"
2727
#define PRIu64 "llu"
2828
#define PRIx64 "llx"
29-
29+
3030
#define strncasecmp _strnicmp
3131
#else
3232
#define PRId64 "Ld"
@@ -38,7 +38,43 @@ along with mfaktc. If not, see <http://www.gnu.org/licenses/>.
3838
#ifdef _MSC_VER
3939
#define my_usleep(A) Sleep((A) / 1000)
4040
#define srandom(A) srand(A)
41-
#define random(A) rand(A)
41+
#define random() rand()
4242
#else
4343
#define my_usleep(A) usleep(A)
4444
#endif
45+
46+
/* snprintf for VS 2005-2013 https://stackoverflow.com/a/8712996 */
47+
#if defined(_MSC_VER) && _MSC_VER < 1900
48+
49+
#if !(defined(_INC_STDARG) || defined(_STDARG_H))
50+
#include <stdarg.h>
51+
#endif
52+
53+
#define snprintf c99_snprintf
54+
#define vsnprintf c99_vsnprintf
55+
56+
__inline int c99_vsnprintf(char* outBuf, size_t size, const char* format, va_list ap)
57+
{
58+
int count = -1;
59+
60+
if (size != 0)
61+
count = _vsnprintf_s(outBuf, size, _TRUNCATE, format, ap);
62+
if (count == -1)
63+
count = _vscprintf(format, ap);
64+
65+
return count;
66+
}
67+
68+
__inline int c99_snprintf(char* outBuf, size_t size, const char* format, ...)
69+
{
70+
int count;
71+
va_list ap;
72+
73+
va_start(ap, format);
74+
count = c99_vsnprintf(outBuf, size, format, ap);
75+
va_end(ap);
76+
77+
return count;
78+
}
79+
80+
#endif

0 commit comments

Comments
 (0)