Skip to content

Commit 1530f71

Browse files
authored
Merge pull request #50 from ixfd64/0.23.5-backport
Backport changes to 0.23 branch and bump to version 0.23.5
2 parents 45bb438 + 887aa93 commit 1530f71

File tree

8 files changed

+251
-138
lines changed

8 files changed

+251
-138
lines changed

.github/workflows/build.yml

Lines changed: 87 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,13 @@ on:
2323
push:
2424
paths-ignore:
2525
- '**/*.txt'
26+
- '**/*.ini'
2627
- 'COPYING'
2728
- '.gitignore'
2829
pull_request:
2930
paths-ignore:
3031
- '**/*.txt'
32+
- '**/*.ini'
3133
- 'COPYING'
3234
- '.gitignore'
3335
types:
@@ -51,6 +53,7 @@ jobs:
5153
sys:
5254
# Specified version combination must exist as CUDA container image from Nvidia: nvcr.io/nvidia/cuda:${{ matrix.sys.cuda_version }}-devel-${{ matrix.sys.ct_os }}
5355
# Available versions can be found here: https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags (note that only Ubuntu distros are supported by this action)
56+
- { cuda_version: '12.9.0', ct_os: 'ubuntu24.04' }
5457
- { cuda_version: '12.8.1', ct_os: 'ubuntu24.04' }
5558
# - { cuda_version: '12.8.0', ct_os: 'ubuntu24.04' }
5659
- { cuda_version: '12.6.3', ct_os: 'ubuntu22.04' }
@@ -101,7 +104,7 @@ jobs:
101104
- name: Start Docker container
102105
run: |
103106
docker pull $CONTAINER
104-
docker run --name build-container -d -v ${{ github.workspace }}:/workspace $CONTAINER tail -f /dev/null
107+
docker run --name build-container -d -v ${GITHUB_WORKSPACE}:/workspace $CONTAINER tail -f /dev/null
105108
106109
- name: Update GPG keys for CUDA repo on Ubuntu 16.04
107110
if: matrix.sys.ct_os == 'ubuntu16.04'
@@ -146,8 +149,8 @@ jobs:
146149
env:
147150
SCRIPT: |
148151
cd /workspace
149-
zip -9 -j ${{ steps.prepare.outputs.BASE_NAME }}.zip *
150-
echo "[${{ steps.prepare.outputs.BASE_NAME }}.zip](https://github.com/${{ github.repository }}/releases/download/${{ github.ref_name }}/${{ steps.prepare.outputs.BASE_NAME }}.zip) | \
152+
zip -9 -j ${{ steps.prepare.outputs.BASE_NAME }}.zip Changelog.txt COPYING mfaktc mfaktc.ini README.txt
153+
echo "[${{ steps.prepare.outputs.BASE_NAME }}.zip](https://github.com/${GITHUB_REPOSITORY}/releases/download/${GITHUB_REF_NAME}/${{ steps.prepare.outputs.BASE_NAME }}.zip) | \
151154
${{ matrix.sys.cuda_version }} | ${{ steps.prepare.outputs.CC_MIN }}-${{ steps.prepare.outputs.CC_MAX }} | ${{ steps.prepare.outputs.OS_VER }} | \
152155
${{ steps.prepare.outputs.COMPILER_VER }} | ${{ steps.prepare.outputs.NVCC_VER }}" > ${{ steps.prepare.outputs.BASE_NAME }}.txt
153156
run: docker exec build-container bash -c "$SCRIPT"
@@ -161,37 +164,46 @@ jobs:
161164

162165
# Begin job "build-win"
163166
build-win:
164-
runs-on: ${{ matrix.sys.os }}
167+
# windows-2022 also works and produces nearly identical binaries, so let's
168+
# target the image that will be supported in the longer term. windows-2025
169+
# is in beta as of June 2025, but testing has shown it is reliable.
170+
runs-on: windows-2025
165171

166172
strategy:
167173
# If set to true, all jobs within the same matrix (such as Linux or
168174
# Windows builds) will be aborted at the same time if any one job fails.
169175
fail-fast: false
170176

171177
matrix:
172-
# Available versions can be viewed at the Jimver/cuda-toolkit action sources:
173-
# https://github.com/Jimver/cuda-toolkit/blob/v0.2.21/src/links/windows-links.ts
178+
# Available CUDA versions can be viewed at the Jimver/cuda-toolkit action sources:
179+
# https://github.com/Jimver/cuda-toolkit/blob/v0.2.24/src/links/windows-links.ts
174180
sys:
175-
- { cuda_version: '12.8.1', os: 'windows-2022' }
176-
- { cuda_version: '12.6.3', os: 'windows-2022' }
177-
- { cuda_version: '12.5.1', os: 'windows-2022' }
178-
- { cuda_version: '12.4.1', os: 'windows-2022' }
179-
- { cuda_version: '12.3.2', os: 'windows-2022' }
180-
- { cuda_version: '12.2.2', os: 'windows-2022' }
181-
- { cuda_version: '12.1.1', os: 'windows-2022' }
182-
- { cuda_version: '12.0.1', os: 'windows-2022' }
183-
- { cuda_version: '11.8.0', os: 'windows-2022' }
184-
- { cuda_version: '11.7.1', os: 'windows-2022' }
185-
- { cuda_version: '11.6.2', os: 'windows-2022' }
186-
- { cuda_version: '11.5.2', os: 'windows-2022' }
187-
- { cuda_version: '11.4.4', os: 'windows-2022' }
188-
- { cuda_version: '11.3.1', os: 'windows-2022' }
189-
- { cuda_version: '11.2.2', os: 'windows-2019' }
190-
- { cuda_version: '11.1.1', os: 'windows-2019' }
191-
- { cuda_version: '11.0.1', os: 'windows-2019' }
192-
- { cuda_version: '10.0.130', os: 'windows-2019' }
193-
- { cuda_version: '9.2.148', os: 'windows-2019' }
194-
- { cuda_version: '8.0.61', os: 'windows-2019' }
181+
- { cuda_version: '12.9.0' }
182+
- { cuda_version: '12.8.1' }
183+
- { cuda_version: '12.6.3' }
184+
- { cuda_version: '12.5.1' }
185+
- { cuda_version: '12.4.1' }
186+
- { cuda_version: '12.3.2' }
187+
- { cuda_version: '12.2.2' }
188+
- { cuda_version: '12.1.1' }
189+
- { cuda_version: '12.0.1' }
190+
- { cuda_version: '11.8.0' }
191+
- { cuda_version: '11.7.1' }
192+
- { cuda_version: '11.6.2' }
193+
- { cuda_version: '11.5.2' }
194+
- { cuda_version: '11.4.4' }
195+
- { cuda_version: '11.3.1' }
196+
- { cuda_version: '11.2.2' }
197+
- { cuda_version: '11.1.1' }
198+
- { cuda_version: '11.0.1' }
199+
- { cuda_version: '10.0.130' }
200+
- { cuda_version: '9.2.148' }
201+
- { cuda_version: '8.0.61' }
202+
203+
env:
204+
MSVC_PKG_VC140: Microsoft.VisualStudio.Component.VC.140
205+
MSVC_PKG_VC141: Microsoft.VisualStudio.Component.VC.v141.x86.x64
206+
MSVC_SETUP_CMD: '"C:\Program Files (x86)\Microsoft Visual Studio\Installer\setup.exe" modify --passive --norestart --installPath "C:\Program Files\Microsoft Visual Studio\2022\Enterprise"'
195207

196208
steps:
197209

@@ -203,7 +215,7 @@ jobs:
203215

204216
- name: Install CUDA Toolkit
205217
id: cuda-toolkit
206-
uses: Jimver/[email protected].22
218+
uses: Jimver/[email protected].24
207219
with:
208220
cuda: ${{ matrix.sys.cuda_version }}
209221
sub-packages: ${{ startsWith(matrix.sys.cuda_version, '8.') && '[]' || '[ "nvcc", "cudart" ]' }}
@@ -222,41 +234,52 @@ jobs:
222234
bash .github/workflows/scripts/build_helper.sh ${{ matrix.sys.cuda_version }}
223235
cat .github/workflows/scripts/build_helper.sh.out >> $GITHUB_OUTPUT
224236
225-
- name: Build from sources with MSVC 2022 using PowerShell
226-
if: ${{ matrix.sys.os == 'windows-2022' }}
227-
shell: powershell
228-
# MSVC 2022 on the Windows 2022 Server runner has a PowerShell script to
229-
# launch a development shell.
230-
run: |
231-
& 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Launch-VsDevShell.ps1' -Arch amd64 -HostArch amd64
232-
cd "${{ github.workspace }}\src"
233-
Copy-Item mfaktc.ini ..
234-
make SHELL="powershell.exe" -f Makefile.win
235-
236-
- name: Build from sources with MSVC 2019 using cmd.exe
237-
if: ${{ matrix.sys.os == 'windows-2019' }}
237+
- name: Build from sources with MSVC
238238
shell: cmd
239-
# MSVC 2019 has a similar script on the Windows 2019 Server runner, but
240-
# that only supports a 32-bit (x86) environment and doesn't allow
241-
# setting the architecture.
242-
# So we have to run a batch file to configure a 64-bit environment and
243-
# then launch PowerShell from make afterwards. PowerShell is much better
244-
# at handling long paths and quotes when invoked from make.
245239
run: |
246-
"C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat" x64 ${{ env.VCVARS_VER }} & cd src & copy mfaktc.ini .. & make SHELL="powershell.exe" -f Makefile.win
240+
${{ env.MSVC_ADD_PKG_CMD }}
241+
"${{ env.VCVARS_PATH }}" x64 ${{ env.VCVARS_VER }} & cd src & copy mfaktc.ini .. & make SHELL="powershell.exe" -j%NUMBER_OF_PROCESSORS% -f Makefile.win & cl /? 2>&1 | findstr /C:"Version" > clversion.log & echo Build finished
247242
env:
248-
# -vcvars_ver=14.0 enables the MSVC 14.0 (2015) build environment -
249-
# this is an MSVC 2019 component and not a complete MSVC instance.
250-
VCVARS_VER: ${{ steps.prepare.outputs.CUDA_VER_MAJOR <= 10 && '-vcvars_ver=14.0' || '' }}
243+
VCVARS_PATH: 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat'
244+
# vcvars_ver=14.x enables Build Tools for previous MSVC versions:
245+
# - 14.0 is for Visual Studio 2015
246+
# - 14.16 is for Visual Studio 2017 version 15.9.x
247+
# - 14.29 is for Visual Studio 2019 version 16.11.x
248+
VCVARS_VER: >-
249+
${{ (
250+
steps.prepare.outputs.CUDA_VER_MAJOR <= 9 && '-vcvars_ver=14.0'
251+
|| steps.prepare.outputs.CUDA_VER_MAJOR == 10 && '-vcvars_ver=14.16'
252+
|| steps.prepare.outputs.CUDA_VER_MAJOR == 11 && steps.prepare.outputs.CUDA_VER_MINOR <= 2 && '-vcvars_ver=14.29'
253+
|| ''
254+
) }}
255+
# Build Tools for Visual Studio 2019 (named VC142 internally) are
256+
# included with windows-2022 and later. However, the Build Tools for
257+
# older MSVC versions need to be installed by running '${MSVC_SETUP_CMD} --add <package name>'
258+
#
259+
# - ${MSVC_PKG_VC140} is required for 14.0 / MSVC 2015
260+
# - ${MSVC_PKG_VC141} is required for 14.16 / MSVC 2017 version 15.9.x
261+
MSVC_ADD_PKG_CMD: >-
262+
${{ (
263+
steps.prepare.outputs.CUDA_VER_MAJOR <= 9 && format('{0} --add {1}', env.MSVC_SETUP_CMD, env.MSVC_PKG_VC140)
264+
|| steps.prepare.outputs.CUDA_VER_MAJOR == 10 && format('{0} --add {1}', env.MSVC_SETUP_CMD, env.MSVC_PKG_VC141)
265+
|| 'echo Build Tools already exist - installation not needed'
266+
) }}
251267
252268
- name: Prepare build archive with description
253269
shell: bash
270+
# We can't get the actual compiler (cl.exe) version with the helper
271+
# script because cl might not be installed during the 'Prepare sources
272+
# and gather info' step. COMPILER_VER is set to the MSVC rather than
273+
# the cl version number, so we added a hack to log the cl version
274+
# during the build step. Here, the cl version is extracted and added to
275+
# the table along with the MSVC version.
254276
run: |
255-
choco install -y --no-progress zip
256-
zip -9 -j "${{ steps.prepare.outputs.BASE_NAME }}.zip" *
257-
echo "[${{ steps.prepare.outputs.BASE_NAME }}.zip](https://github.com/${{ github.repository }}/releases/download/${{ github.ref_name }}/${{ steps.prepare.outputs.BASE_NAME }}.zip) | \
277+
[ -f src/clversion.log ] && CL_VER="$(grep -Eoe 'Version [\.0-9]+ ' src/clversion.log | cut -d' ' -f2)"
278+
[ -z "$CL_VER" ] && CL_VER='Unknown'
279+
tar -a -c -f "${{ steps.prepare.outputs.BASE_NAME }}.zip" Changelog.txt COPYING mfaktc-win-64.exe mfaktc.ini README.txt
280+
echo "[${{ steps.prepare.outputs.BASE_NAME }}.zip](https://github.com/${GITHUB_REPOSITORY}/releases/download/${GITHUB_REF_NAME}/${{ steps.prepare.outputs.BASE_NAME }}.zip) | \
258281
${{ matrix.sys.cuda_version }} | ${{ steps.prepare.outputs.CC_MIN }}-${{ steps.prepare.outputs.CC_MAX }} | ${{ steps.prepare.outputs.OS_VER }} | \
259-
${{ steps.prepare.outputs.COMPILER_VER }} | ${{ steps.prepare.outputs.NVCC_VER }}" > ${{ steps.prepare.outputs.BASE_NAME }}.txt
282+
${CL_VER} (${{ steps.prepare.outputs.COMPILER_VER }}) | ${{ steps.prepare.outputs.NVCC_VER }}" > ${{ steps.prepare.outputs.BASE_NAME }}.txt
260283
261284
- name: Upload build artifacts
262285
uses: actions/upload-artifact@v4
@@ -267,10 +290,8 @@ jobs:
267290

268291
# Begin job "upload-release"
269292
upload-release:
270-
# This job expects the Git tag name to begin with the version specified by
271-
# MFAKTC_VERSION in params.h
272-
# Otherwise, the job will fail because there is a version conflict between Git
273-
# and params.h that must be resolved.
293+
# Git tag name must begin with the version specified by MFAKTC_VERSION in
294+
# params.h or else the job will fail
274295
if: github.ref_type == 'tag' && startsWith(github.ref, 'refs/tags/')
275296
needs: [ build-linux, build-win ]
276297
runs-on: ubuntu-latest
@@ -289,26 +310,25 @@ jobs:
289310
- name: Prepare asset list and release notes
290311
id: makeinfo
291312
run: |
292-
if ! compgen -G "${{ env.base_name }}.txt" > /dev/null 2>&1; then
293-
echo "::error ::Could not find release notes with mask ${{ env.base_name }}.txt"
313+
if ! compgen -G "${base_name}.txt" > /dev/null 2>&1; then
314+
echo "::error ::Could not find release notes with mask ${base_name}.txt"
294315
echo "::error ::Ensure the Git tag name begins with the version specified by MFAKTC_VERSION in src/params.h"
295316
exit 1
296317
fi
297318
{
298-
echo "Binary releases (automated builds) as follows."
299-
echo "Compute Capability (CC) in the table means minimum and maximum versions supported."
300-
echo "CC versions are listed without the separator. For example, '90' means devices with compute capability 9.0 can run that build."
319+
echo "Pre-compiled binaries (automated builds) as follows."
320+
echo "In this table, the Compute Capability column means the minimum and maximum versions supported."
301321
echo
302-
echo "File | CUDA version | Compute Capability | Build OS | Compiler version | NVCC version"
322+
echo "File | CUDA version | Compute Capability | Build OS | Host compiler | NVCC version"
303323
echo "--- | --- | --- | --- | --- | ---"
304-
sort -Vr ${{ env.base_name }}.txt
324+
sort -Vr ${base_name}.txt
305325
} > RELEASE_NOTES.txt
306326
{
307327
echo 'RELEASE_FILES<<EOF'
308-
printf '%s\n' ${{ env.base_name }}.zip | sort -Vr
328+
printf '%s\n' "${base_name}.zip" | sort -Vr
309329
echo 'EOF'
310330
} > $GITHUB_OUTPUT
311-
( echo "${{ github.ref_name }}" | grep -qsP "v?\d+(?:\.\d+(?:\.\d+)?(?:-\d+)?|\b)(-(?:alpha|beta|pre))" && echo "PRERELEASE=true" || echo "PRERELEASE=false" ) >> $GITHUB_OUTPUT
331+
( echo "$GITHUB_REF_NAME" | grep -qsP "v?\d+(?:\.\d+(?:\.\d+)?(?:-\d+)?|\b)(-(?:alpha|beta|pre))" && echo "PRERELEASE=true" || echo "PRERELEASE=false" ) >> $GITHUB_OUTPUT
312332
313333
- name: Create and upload release package
314334
uses: softprops/[email protected]

.github/workflows/codeql.yml

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
name: "CodeQL (advanced setup)"
2+
3+
on:
4+
push:
5+
branches: [ "main" ]
6+
pull_request:
7+
branches: [ "main" ]
8+
schedule:
9+
- cron: '16 12 * * 1'
10+
11+
jobs:
12+
analyze:
13+
name: Analyze (${{ matrix.language }})
14+
runs-on: 'ubuntu-latest'
15+
container: 'nvcr.io/nvidia/cuda:12.9.0-devel-ubuntu24.04'
16+
permissions:
17+
# required for all workflows
18+
security-events: write
19+
20+
# required to fetch internal or private CodeQL packs
21+
packages: read
22+
23+
# only required for workflows in private repositories
24+
actions: read
25+
contents: read
26+
27+
strategy:
28+
fail-fast: false
29+
matrix:
30+
include:
31+
- language: c-cpp
32+
build-mode: autobuild
33+
steps:
34+
- name: Checkout repository
35+
uses: actions/checkout@v4
36+
37+
- name: Initialize CodeQL
38+
uses: github/codeql-action/init@v3
39+
with:
40+
languages: ${{ matrix.language }}
41+
build-mode: ${{ matrix.build-mode }}
42+
43+
- name: Perform CodeQL analysis
44+
uses: github/codeql-action/analyze@v3
45+
with:
46+
category: "/language:${{matrix.language}}"

.github/workflows/scripts/build_helper.sh

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,10 @@ elif [ ${#CC_LIST[*]} -lt 3 ]; then
6969
echo "Warning: less than three (3) supported compute capabilities" >&2
7070
fi
7171

72-
echo "All supported CCs: ${CC_LIST[*]}, CC_MIN=${CC_LIST[0]}, CC_MAX=${CC_LIST[-1]}"
73-
echo -e "CC_LIST=\"${CC_LIST[*]}\"\nCC_MIN=${CC_LIST[0]}\nCC_MAX=${CC_LIST[-1]}" >> "$0.out"
72+
CC_MIN="${CC_LIST[0]:0:(-1)}.${CC_LIST[0]:(-1)}"
73+
CC_MAX="${CC_LIST[-1]:0:(-1)}.${CC_LIST[-1]:(-1)}"
74+
echo "All supported CCs: ${CC_LIST[*]}, CC_MIN=${CC_MIN}, CC_MAX=${CC_MAX}"
75+
echo -e "CC_LIST=\"${CC_LIST[*]}\"\nCC_MIN=${CC_MIN}\nCC_MAX=${CC_MAX}" >> "$0.out"
7476

7577
echo 'Removing NVCCFLAGS strings with "arch=..." entries from makefiles and populating them with discovered supported values.'
7678
sed -i '/^NVCCFLAGS += --generate-code arch=compute.*/d' src/Makefile.win src/Makefile
@@ -87,13 +89,15 @@ if [ "$CUDA_VER" -lt 1200 ]; then
8789
sed -i -E 's/^(LDFLAGS = .*? -lcudart_static) (.*)/\1 -ldl -lrt -lpthread \2/' src/Makefile
8890
fi
8991

90-
echo 'Gathering version info on generic compiler and NVCC...'
92+
echo 'Gathering host compiler and NVCC version info...'
93+
# COMPILER_VER for Windows builds is actually set to the MSVC product version.
94+
# We retrieve the cl.exe version during the build step and add it to the table.
9195
if [[ -x "$(command -v vswhere.exe)" ]]; then
92-
CC_VSPROD="$(vswhere -latest -products '*' -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property displayName)"
96+
CC_VSPROD="$(vswhere -latest -products '*' -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property displayName | sed -e 's/Visual Studio/MSVC/')"
9397
COMPILER_VER="${CC_VSPROD}, $(vswhere -latest -products '*' -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationVersion)"
9498
elif [[ -x "$(command -v powershell.exe)" ]]; then
9599
CC_VSINFO="$(powershell -Command Get-VSSetupInstance)"
96-
CC_VSPROD="$(echo "$CC_VSINFO" | grep DisplayName | cut -d':' -f2 | xargs)"
100+
CC_VSPROD="$(echo "$CC_VSINFO" | grep DisplayName | cut -d':' -f2 | xargs | sed -e 's/Visual Studio/MSVC/')"
97101
COMPILER_VER="${CC_VSPROD}, $(echo "$CC_VSINFO" | grep InstallationVersion | cut -d':' -f2 | xargs)"
98102
else
99103
COMPILER_VER="$(gcc --version | head -n1)"
@@ -104,11 +108,11 @@ else
104108
fi
105109

106110
if [[ -x "$(command -v powershell.exe)" ]]; then
107-
OS_VER="$(powershell -Command "[System.Environment]::OSVersion.VersionString")"
111+
OS_VER="$(powershell -Command "[System.Environment]::OSVersion.VersionString" | cut -d ' ' -f2-)"
108112
OS_TYPE="win64"
109113
fi
110114

111-
NVCC_VER="$(nvcc --version | tail -n1 | sed -E 's/^Build //')"
115+
NVCC_VER="$(nvcc --version | tail -n1 | sed -E 's/^Build //;s/^Cuda compilation tools, //')"
112116

113117
# get mfaktc version from src/params.h
114118
# match SemVer and GIMPS version strings: https://regex101.com/r/m38d3i/2

Changelog.txt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,24 @@
1+
version 0.23.5
2+
==============================
3+
- includes changes backported from mfaktc 0.24.0
4+
5+
bug fixes:
6+
- Blackwell devices are now correctly supported on Windows (thanks to
7+
yellowbeeblackbee)
8+
9+
build:
10+
- enabled more compiler optimizations by default for a slight performance gain
11+
(thanks to NStorm)
12+
- various improvements to GitHub Actions workflows (thanks to NStorm)
13+
- support for additional CUDA versions
14+
- removed dependency on deprecated windows-2019 images
15+
- code analysis with CodeQL
16+
- improved visual appearance and clarity of release information (thanks to
17+
Danny Chia)
18+
19+
other changes:
20+
- re-organized mfaktc.ini to be more user-friendly (thanks to James Heinrich)
21+
122
version 0.23.4
223
==============================
324
- includes changes backported from mfaktc 0.24.0

src/Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@ CUDA_LIB = -L$(CUDA_DIR)/lib64/
55

66
# compiler settings for .c files (CPU)
77
CC = gcc
8-
CFLAGS = -Wall -Wextra -O2 $(CUDA_INCLUDE) -malign-double
8+
CFLAGS = -Wall -Wextra -O3 -flto -malign-double -ffunction-sections -fdata-sections -Wl,--gc-sections $(CUDA_INCLUDE)
99
CFLAGS_EXTRA_SIEVE = -funroll-all-loops
1010

1111
# compiler settings for .cu files (GPU)
1212
NVCC = nvcc
13-
NVCCFLAGS = $(CUDA_INCLUDE) --ptxas-options=-v -Wno-deprecated-gpu-targets
13+
NVCCFLAGS = $(CUDA_INCLUDE) --ptxas-options=-v -O3 -Wno-deprecated-gpu-targets
1414

1515
# generate code for supported compute capabilities
1616
# NVCCFLAGS += --generate-code arch=compute_11,code=sm_11 # mfaktc cannot use 1.0 but supports 1.1 and above
@@ -36,7 +36,7 @@ NVCCFLAGS += --compiler-options=-Wall
3636

3737
# Linker
3838
LD = gcc
39-
LDFLAGS = -fPIC $(CUDA_LIB) -lcudart_static -lm -lstdc++
39+
LDFLAGS = -flto -fPIC -Wl,--gc-sections $(CUDA_LIB) -lcudart_static -lm -lstdc++
4040

4141
INSTALL = install
4242

0 commit comments

Comments
 (0)