Skip to content

Commit a3575fe

Browse files
authored
Merge pull request #352 from IntelPython/fix/l2_norm_diff
Address difference in l2_norm implementations
2 parents ab6ecb5 + ff2ddf3 commit a3575fe

File tree

5 files changed

+5
-1
lines changed

5 files changed

+5
-1
lines changed

.github/workflows/conda-package.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
fail-fast: false
3434
matrix:
3535
python: ['3.9', '3.10', '3.11']
36-
os: [ubuntu-latest, windows-latest]
36+
os: [ubuntu-latest, windows-2019]
3737

3838
runs-on: ${{ matrix.os }}
3939

dpbench/benchmarks/default/l2_norm/l2_norm_numba_dpex_p.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
@dpjit
1111
def l2_norm(a, d):
1212
for i in nb.prange(a.shape[0]):
13+
d[i] = 0.0
1314
for k in range(a.shape[1]):
1415
d[i] += np.square(a[i, k])
1516
d[i] = np.sqrt(d[i])

dpbench/benchmarks/default/l2_norm/l2_norm_numba_mlir_p.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
@nb.njit(parallel=True, fastmath=True)
1111
def _l2_norm(a, d):
1212
for i in numba.prange(a.shape[0]):
13+
d[i] = 0.0
1314
for k in range(a.shape[1]):
1415
d[i] += np.square(a[i, k])
1516
d[i] = np.sqrt(d[i])

dpbench/benchmarks/default/l2_norm/l2_norm_numba_npr.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
@nb.njit(parallel=True, fastmath=True)
1010
def l2_norm(a, d):
1111
for i in nb.prange(a.shape[0]):
12+
d[i] = 0.0
1213
for k in range(a.shape[1]):
1314
d[i] += np.square(a[i, k])
1415
d[i] = np.sqrt(d[i])

dpbench/benchmarks/default/l2_norm/l2_norm_sycl_native_ext/l2_norm_sycl/_l2_norm_kernel.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ void l2_norm_impl(queue Queue,
2222
.submit([&](handler &h) {
2323
h.parallel_for<theKernel<FpTy>>(range<1>{npoints}, [=](id<1> myID) {
2424
size_t i = myID[0];
25+
d[i] = 0.0;
2526
for (size_t k = 0; k < dims; k++) {
2627
d[i] += a[i * dims + k] * a[i * dims + k];
2728
}

0 commit comments

Comments
 (0)