primesearch
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/build.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.github/workflows/scripts/build_helper.sh‎
Lines changed: 14 additions & 10 deletions b/‎.github/workflows/scripts/build_helper.sh‎
Lines changed: 14 additions & 10 deletions
diff --git a/‎Changelog.txt‎
Lines changed: 17 additions & 0 deletions b/‎Changelog.txt‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎README.txt‎
Lines changed: 35 additions & 36 deletions b/‎README.txt‎
Lines changed: 35 additions & 36 deletions
diff --git a/‎src/Makefile‎
Lines changed: 9 additions & 9 deletions b/‎src/Makefile‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎src/Makefile.win‎
Lines changed: 1 addition & 1 deletion b/‎src/Makefile.win‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/Makefile.win32‎
Lines changed: 7 additions & 7 deletions b/‎src/Makefile.win32‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎src/compatibility.h‎
Lines changed: 39 additions & 3 deletions b/‎src/compatibility.h‎
Lines changed: 39 additions & 3 deletions
@@ -208,6 +208,7 @@ jobs:
           cuda: ${{ matrix.sys.cuda_version }}
           sub-packages: ${{ startsWith(matrix.sys.cuda_version, '8.') && '[]' || '[ "nvcc", "cudart" ]' }}
           use-local-cache: false
+          use-github-cache: false
 
       - name: Configure path to CUDA
         shell: powershell
@@ -307,6 +308,7 @@ jobs:
             printf '%s\n' ${{ env.base_name }}.zip | sort -Vr
             echo 'EOF'
           } > $GITHUB_OUTPUT
+          ( echo "${{ github.ref_name }}" | grep -qsP "v?\d+(?:\.\d+(?:\.\d+)?(?:-\d+)?|\b)(-(?:alpha|beta|pre))" && echo "PRERELEASE=true" || echo "PRERELEASE=false" ) >> $GITHUB_OUTPUT
 
       - name: Create and upload release package
         uses: softprops/action-gh-release@v2.2.1
@@ -315,6 +317,7 @@ jobs:
           files: |
             ${{ steps.makeinfo.outputs.RELEASE_FILES }}
           preserve_order: true
+          prerelease: ${{ steps.makeinfo.outputs.PRERELEASE }}
           generate_release_notes: true
           body_path: RELEASE_NOTES.txt
           make_latest: true
 
@@ -37,28 +37,31 @@ export GSORT='/usr/bin/sort'
 
 CUDA_VERSION_FULL="$(echo "$1" | head -n1 | grep -Eom1 -e '^[1-9]([0-9])?\.[0-9]{1,2}(\.[0-9]{1,3})?$')"
 declare -a CUDA_VERSION
-CUDA_VERSION=( $(echo "$CUDA_VERSION_FULL" | tr '.' ' ') )
+IFS=" " read -r -a CUDA_VERSION <<< "$(echo "$CUDA_VERSION_FULL" | tr '.' ' ')"
 if [[ -z "${CUDA_VERSION[*]}" ]]; then
-  echo "ERROR! Can't parse CUDA version $1" >&2
+  echo "Error: unexpected CUDA version $1" >&2
   exit 2
 fi
 
 CUDA_VER_MAJOR=${CUDA_VERSION[0]}
 CUDA_VER_MINOR=${CUDA_VERSION[1]}
-CUDA_VER="${CUDA_VER_MAJOR}${CUDA_VER_MINOR}"
 echo -e "CUDA_VER_MAJOR=${CUDA_VER_MAJOR}\nCUDA_VER_MINOR=${CUDA_VER_MINOR}" > "$0.out"
 
+# Format CUDA_VER as single integer with both major and minor (inc. leading zero) versions.
+# Used for simple comparison of CUDA versions internally in this script.
+printf -v CUDA_VER %d%02d "${CUDA_VER_MAJOR}" "${CUDA_VER_MINOR}";
+
 # CUDA supports the --list-gpu-arch flag from 11.0.0 onwards.
 # For older CUDA versions, use grep to parse the supported architectures from
 # the output of --help
-[ $CUDA_VER -gt 110 ] && NVCC_OPTS='--list-gpu-arch' || NVCC_OPTS='--help'
+[ "$CUDA_VER" -gt 1100 ] && NVCC_OPTS='--list-gpu-arch' || NVCC_OPTS='--help'
 NVCC_REGEX='compute_[1-9][0-9]{1,2}'
 # CUDA 11.0.x is a special case. Its --help output lists compute_32 and higher,
 # but only compute capability 3.5 and later are supported.
-[ $CUDA_VER -eq 110 ] && NVCC_REGEX='compute_(3[5-9]|[4-9][0-9])'
+[ "$CUDA_VER" -eq 1100 ] && NVCC_REGEX='compute_(3[5-9]|[4-9][0-9])'
 
 declare -a CC_LIST
-CC_LIST=( $(nvcc $NVCC_OPTS | grep -Eoe "$NVCC_REGEX" | cut -d '_' -f2 | $GSORT -un | xargs) )
+IFS=" " read -r -a CC_LIST <<< "$(nvcc "$NVCC_OPTS" | grep -Eoe "$NVCC_REGEX" | cut -d '_' -f2 | $GSORT -un | xargs)"
 if [ ${#CC_LIST[*]} -eq 0 ]; then
   echo "Error: could not parse list of supported compute capabilities" >&2
   exit 3
@@ -75,11 +78,11 @@ for CC in "${CC_LIST[@]}"; do
   sed -i "/^NVCCFLAGS = .*\$/a NVCCFLAGS += --generate-code arch=compute_${CC},code=sm_${CC}" src/Makefile src/Makefile.win
 done
 
-if [ $CUDA_VER -ge 110 ]; then
+if [ "$CUDA_VER" -ge 1100 ]; then
   echo 'Adding NVCCFLAGS to allow unsupported MSVC versions...'
   sed -i '/^NVCCFLAGS = .*/a NVCCFLAGS += -allow-unsupported-compiler -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH' src/Makefile.win
 fi
-if [ $CUDA_VER -lt 120 ]; then
+if [ "$CUDA_VER" -lt 1200 ]; then
   echo "Adding libraries to LDFLAGS to support static build on older Ubuntu versions..."
   sed -i -E 's/^(LDFLAGS = .*? -lcudart_static) (.*)/\1 -ldl -lrt -lpthread \2/' src/Makefile
 fi
@@ -94,6 +97,7 @@ elif [[ -x "$(command -v powershell.exe)" ]]; then
   COMPILER_VER="${CC_VSPROD}, $(echo "$CC_VSINFO" | grep InstallationVersion | cut -d':' -f2 | xargs)"
 else
   COMPILER_VER="$(gcc --version | head -n1)"
+  # shellcheck source=/dev/null
   source /etc/os-release
   OS_VER="${PRETTY_NAME}"
   OS_TYPE="linux64"
@@ -106,8 +110,8 @@ fi
 
 NVCC_VER="$(nvcc --version | tail -n1 | sed -E 's/^Build //')"
 
-# Version from src/params.h
-# Match semver: https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string
+# get mfaktc version from src/params.h
+# match SemVer and GIMPS version strings: https://regex101.com/r/m38d3i/2
 MFAKTC_VER="$(LC_ALL=en_US.utf8 grep -iPo '#define[\s\t]+MFAKTC_VERSION[\s\t]+"v?\d+(?:\.\d+(?:\.\d+)?(?:-\d+)?|\b)(?:-?(?:alpha|beta|pre)\.?(?:\d+)?\b)?' src/params.h | cut -d '"' -f 2)"
 
 # Git-formatted version
 
@@ -1,3 +1,20 @@
+version 0.23.4
+==============================
+- includes changes backported from mfaktc 0.24.0
+
+bug fixes:
+- fixed typos and formatting in output
+- fix some compilation warnings
+- the maximum number of threads per SM should no longer exceed the limit for
+  certain architectures
+- CUDA versions are now correctly detected
+
+build:
+- made the build helper script more robust
+
+other changes:
+- replaced deprecated CUDA function calls
+
 version 0.23.3
 ==============================
 bug fixes:
 
@@ -77,18 +77,15 @@ Some compile-time settings in the file src/params.h can be changed:
 - the last part contains defines which should *not* be changed unless you
   fully understand them. It is possible to easily screw something up.
 
-Be aware that 32-bit applications are not supported in CUDA Toolkit 12.2 and
-later. You will need to use an older CUDA Toolkit to build mfaktc for 32 bits.
-See this thread for details:
+Be aware that mfaktc 0.24.0 and CUDA Toolkit 12.2 drop support for 32-bit
+builds. You will need to use the '0.23' branch and an older CUDA Toolkit to
+compile mfaktc for 32 bits. See this thread for details:
 https://forums.developer.nvidia.com/t/whats-the-last-version-of-the-cuda-toolkit-to-support-32-bit-applications/323106/4
 
 In any case, a 64-bit build is preferred except on some old low-end GPUs.
 Testing on an Intel Core i7 CPU has shown that the performance-critical CPU
 code runs about 33% faster compared to 32 bits.
 
-Important note: mfaktc will no longer officially support 32-bit builds in
-version 0.24 onwards.
-
 #############
 # 2.1 Linux #
 #############
@@ -258,6 +255,9 @@ A note on extending assignments:
 
 ---
 
+    You must use mfaktc 0.24.0 or above starting in 2026 as the CRC32 checksums
+    will be used to validate results.
+
     Once you have your assignments, create an empty file called worktodo.txt
     and copy all the "Factor=..." lines into that file. Start mfaktc, sit back
     and let it do its job. Running mfaktc is also a great way to stress test
@@ -276,17 +276,16 @@ Submitting results:
 
     Step 1) log in to the GIMPS website with your username and password
     Step 2) on the menu bar, select Manual Testing > Results
-    Step 3) upload the results.json.txt file produced by mfaktc. You may
-            archive or delete the file after it has been processed.
+    Step 3) upload the results.json.txt file produced by mfaktc. Do not submit
+            the results.txt file as it is no longer accepted by the PrimeNet
+            server. You may archive or delete the results.json.txt file after
+            it has been processed.
 
     To prevent abuse, admin approval is required for manual submissions. You
     can request approval by contacting George Woltman at woltman@alum.mit.edu
     or posting on the GIMPS forum:
     https://mersenneforum.org/forumdisplay.php?f=38
 
-    Important note: the results.txt file is deprecated and will no longer be
-    accepted from 2025 onwards.
-
 ##################
 # 5 Known issues #
 ##################
@@ -309,7 +308,7 @@ Submitting results:
   lower speed. Performance-wise, this is not recommended on GPUs which can
   handle more than 100 million candidates per second.
 - the debug options CHECKS_MODBASECASE and USE_DEVICE_PRINTF might report 'qi'
-  values that are too high while using the Barrett kernels. They are caused by
+  values that are too high while using the Barrett kernels; this is caused by
   factor candidates out of the specified range.
 
 
@@ -358,24 +357,23 @@ A: Yes. In most cases, this is required to make full use of a GPU when sieving
    You will need a separate directory for each mfaktc instance.
 
 Q: Are checkpoint files compatible between different mfaktc versions?
-A: Save files are compatible between 32-bit and 64-bit executables. mfaktc can
-   also load a checkpoint from either a Linux or Windows version on either OS.
-   However, the executable and checkpoint file must have the same version
-   number. Complete any active assignments before you upgrade.
+A: Save files are compatible between different platforms and architectures. For
+   example, the 32-bit Windows version can read a checkpoint from 64-bit Linux
+   and vice versa.
+
+   However, mfaktc 0.23.x and below can only load checkpoints with the same
+   version number as the executable. Complete any active assignments before you
+   upgrade.
 
 Q: What do the version numbers mean?
-A: Stable releases are usually named 0.x where "x" is incremented for each
-   release. Some versions include a patch such as a bug fix or other small
-   change. You can see in the change log that mfaktc 0.13p1 is one such
-   example. Such releases are intended for general use. Please note that patch
-   releases after 0.16p1 use the major.minor.patch naming scheme.
+A: mfaktc 0.23.0 and above use the semantic versioning scheme. You can learn
+   more about semantic versioning here: https://semver.org
 
    You may come across pre-release versions that are not publicly available.
-   Such versions usually *not* intended for productive usage; sometimes they
-   have the computational code disabled or don't even compile. Please don't use
-   them for production work as they have usually had minimal to zero QA and may
-   contain critical issues.
-
+   Such versions are *not* intended for general use; sometimes they have the
+   computational code disabled or don't even compile. Please don't use them for
+   production work as they have usually had minimal to zero QA and may contain
+   critical issues.
 
 ###########
 # 8 .plan #
@@ -387,23 +385,24 @@ A: Stable releases are usually named 0.x where "x" is incremented for each
   - CRC32 checksums to reduce invalid results
   - improved performance on Pascal devices
   - metadata in checkpoint file names
-  - replace deprecated cudaThreadSynchronize() calls
+  - replace deprecated cudaThreadSynchronize() with cudaDeviceSynchronize()
 
-not planned for a specific release yet, no particular order!
-- performance improvements whenever I find them ;)
-- change compiletime options to runtime options (if feasible and useful)
+ongoing improvements
+- performance improvements whenever they are found ;-)
+- fix bugs as they are discovered
+- change compile-time options to runtime options, if applicable
 - documentation and comments in code
-- try to use double precision for the long integer divisions                  <-- unsure
-- json output for wagstaff numbers https://www.mersenneforum.org/showpost.php?p=662680&postcount=3769
+- try to use double precision for the long integer divisions
+  - unsure, may or may not be useful
+
+requested features; no particular order and not planned for a specific release
+- JSON output for Wagstaff numbers https://www.mersenneforum.org/showpost.php?p=662680&postcount=3769
 - factors-meta.<factor>.timestamp https://www.mersenneforum.org/showpost.php?p=662603&postcount=3750
 - factors-meta.<factor>.class https://www.mersenneforum.org/showpost.php?p=662720&postcount=3781
 - found factors support https://www.mersenneforum.org/showpost.php?p=662682&postcount=3770
-- os info https://www.mersenneforum.org/showpost.php?p=662648&postcount=3757
-- security checksum https://www.mersenneforum.org/showpost.php?p=662658&postcount=3761
-- detailed runtime logging https://www.mersenneforum.org/showpost.php?p=662953&postcount=3845
 - begink and endk logging https://www.mersenneforum.org/showpost.php?p=662953&postcount=3845
 - only log every n seconds https://www.mersenneforum.org/showpost.php?p=662795&postcount=3826
 - catch HUP https://www.mersenneforum.org/showpost.php?p=662777&postcount=3815
 - non-prime exponents https://www.mersenneforum.org/showpost.php?p=663442&postcount=3873
 - TF10G support https://www.mersenneforum.org/showpost.php?p=663442&postcount=3873
-- Remove CPU Sieving support https://www.mersenneforum.org/showpost.php?p=663517&postcount=3894
+- drop CPU sieving support https://www.mersenneforum.org/showpost.php?p=663517&postcount=3894
@@ -10,14 +10,14 @@ CFLAGS_EXTRA_SIEVE = -funroll-all-loops
 
 # compiler settings for .cu files (GPU)
 NVCC = nvcc
-NVCCFLAGS = $(CUDA_INCLUDE) --ptxas-options=-v
-
-# generate code for various compute capabilities
-# NVCCFLAGS += --generate-code arch=compute_11,code=sm_11 # CC 1.1, 1.2 and 1.3 GPUs will use this code (1.0 is not possible for mfaktc)
-# NVCCFLAGS += --generate-code arch=compute_20,code=sm_20 # CC 2.x GPUs will use this code, one code fits all!
-# NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # all CC 3.x GPUs _COULD_ use this code
-# NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # but CC 3.2 _CAN_ use funnel shift which is useful for mfaktc
-NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # CC 5.x GPUs will use this code
+NVCCFLAGS = $(CUDA_INCLUDE) --ptxas-options=-v -Wno-deprecated-gpu-targets
+
+# generate code for supported compute capabilities
+# NVCCFLAGS += --generate-code arch=compute_11,code=sm_11 # mfaktc cannot use 1.0 but supports 1.1 and above
+# NVCCFLAGS += --generate-code arch=compute_20,code=sm_20 # Fermi GPUs
+# NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # early Kepler GPUs
+# NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # later Kepler GPUs; funnel shift added in 3.2
+NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # Maxwell GPUs
 NVCCFLAGS += --generate-code arch=compute_60,code=sm_60
 NVCCFLAGS += --generate-code arch=compute_61,code=sm_61
 NVCCFLAGS += --generate-code arch=compute_62,code=sm_62
@@ -31,7 +31,7 @@ NVCCFLAGS += --generate-code arch=compute_89,code=sm_89
 NVCCFLAGS += --generate-code arch=compute_90,code=sm_90
 NVCCFLAGS += --generate-code arch=compute_120,code=sm_120
 
-# pass some options to the C host compiler (e.g. gcc on Linux)
+# pass some options to the C host compiler
 NVCCFLAGS += --compiler-options=-Wall
 
 # Linker
 
@@ -1,7 +1,7 @@
 CC = cl
 CFLAGS = /Ox /Oy /W2 /fp:fast /I"$(CUDA_PATH)\include" /I"$(CUDA_PATH)\include\cudart" /nologo
 
-NVCCFLAGS = -m64 --ptxas-options=-v
+NVCCFLAGS = -m64 --ptxas-options=-v -Wno-deprecated-gpu-targets
 CUFLAGS = -DWIN64 -Xcompiler "/EHsc /W3 /nologo /Ox" $(NVCCFLAGS)
 
 ############################################################
 
@@ -3,15 +3,15 @@ CFLAGS = /Ox /Oy /GL /W2 /fp:fast /I"$(CUDA_PATH)\include" /I"$(CUDA_PATH)\inclu
 
 CC_PATH = $(shell where cl)
 
-NVCCFLAGS = -m32 --ptxas-options=-v
+NVCCFLAGS = -m32 --ptxas-options=-v -Wno-deprecated-gpu-targets
 CUFLAGS = -ccbin="$(CC_PATH)" -Xcompiler "/EHsc /W3 /nologo /Ox /GL" $(NVCCFLAGS)
 
-# generate code for various compute capabilities
-NVCCFLAGS += --generate-code arch=compute_11,code=sm_11 # CC 1.1, 1.2 and 1.3 GPUs will use this code (1.0 is not possible for mfaktc)
-NVCCFLAGS += --generate-code arch=compute_20,code=sm_20 # CC 2.x GPUs will use this code, one code fits all!
-NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # all CC 3.x GPUs _COULD_ use this code
-NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # but CC 3.2 _CAN_ use funnel shift which is useful for mfaktc
-NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # CC 5.x GPUs will use this code
+# generate code for supported compute capabilities
+NVCCFLAGS += --generate-code arch=compute_11,code=sm_11 # mfaktc cannot use 1.0 but supports 1.1 and above
+NVCCFLAGS += --generate-code arch=compute_20,code=sm_20 # Fermi GPUs
+NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # early Kepler GPUs
+NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # later Kepler GPUs; funnel shift added in 3.2
+NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # Maxwell GPUs
 
 LINK = link
 LFLAGS = /nologo /LTCG #/ltcg:pgo
 
@@ -11,7 +11,7 @@ mfaktc is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
-                                
+
 You should have received a copy of the GNU General Public License
 along with mfaktc.  If not, see <http://www.gnu.org/licenses/>.
 */
@@ -26,7 +26,7 @@ along with mfaktc.  If not, see <http://www.gnu.org/licenses/>.
   #define PRId64 "lld"
   #define PRIu64 "llu"
   #define PRIx64 "llx"
-  
+
   #define strncasecmp _strnicmp
 #else
   #define PRId64 "Ld"
@@ -38,7 +38,43 @@ along with mfaktc.  If not, see <http://www.gnu.org/licenses/>.
 #ifdef _MSC_VER
   #define my_usleep(A) Sleep((A) / 1000)
   #define srandom(A) srand(A)
-  #define random(A) rand(A)
+  #define random() rand()
 #else
   #define my_usleep(A) usleep(A)
 #endif
+
+/* snprintf for VS 2005-2013 https://stackoverflow.com/a/8712996 */
+#if defined(_MSC_VER) && _MSC_VER < 1900
+
+#if !(defined(_INC_STDARG) || defined(_STDARG_H))
+#include <stdarg.h>
+#endif
+
+#define snprintf c99_snprintf
+#define vsnprintf c99_vsnprintf
+
+__inline int c99_vsnprintf(char* outBuf, size_t size, const char* format, va_list ap)
+{
+    int count = -1;
+
+    if (size != 0)
+        count = _vsnprintf_s(outBuf, size, _TRUNCATE, format, ap);
+    if (count == -1)
+        count = _vscprintf(format, ap);
+
+    return count;
+}
+
+__inline int c99_snprintf(char* outBuf, size_t size, const char* format, ...)
+{
+    int count;
+    va_list ap;
+
+    va_start(ap, format);
+    count = c99_vsnprintf(outBuf, size, format, ap);
+    va_end(ap);
+
+    return count;
+}
+
+#endif