Merge branch 'pyscf:master' into libqcschema

ajjenkins1 · web-flow · commit 6aa731d2923f · 2024-11-27T21:17:04.000-08:00
diff --git a/pyscf/dft/radi.py b/pyscf/dft/radi.py
@@ -47,14 +47,21 @@
 def murray(n, *args, **kwargs):
     raise RuntimeError('Not implemented')
 
-# Gauss-Chebyshev of the first kind,  and the transformed interval [0,\infty)
 def becke(n, charge, *args, **kwargs):
     '''Becke, JCP 88, 2547 (1988); DOI:10.1063/1.454033'''
     if charge == 1:
         rm = BRAGG_RADII[charge]
     else:
         rm = BRAGG_RADII[charge] * .5
-    t, w = numpy.polynomial.chebyshev.chebgauss(n)
+
+    # Points and weights for Gauss-Chebyshev quadrature points of the second kind
+    # The weights are adjusted to integrate a function on the interval [-1, 1] with uniform weighting
+    # instead of weighted by sqrt(1 - t^2) = sin(i pi / (n+1)).
+    i = numpy.arange(n) + 1
+    t = numpy.cos(i * numpy.pi / (n + 1))
+    w = numpy.pi / (n + 1) * numpy.sin(i * numpy.pi / (n + 1))
+
+    # Change of variables to map the domain to [0, inf)
     r = (1+t)/(1-t) * rm
     w *= 2/(1-t)**2 * rm
     return r, w
diff --git a/pyscf/dft/test/test_grids.py b/pyscf/dft/test/test_grids.py
@@ -91,7 +91,7 @@ def test_radi(self):
 
         grid.radi_method = radi.becke
         grid.build(with_non0tab=False)
-        self.assertAlmostEqual(lib.fp(grid.weights), 818061.875131255, 7)
+        self.assertAlmostEqual(lib.fp(grid.weights), 780.7183109298, 9)
 
     def test_prune(self):
         grid = gen_grid.Grids(h2o)
diff --git a/pyscf/lib/np_helper/np_helper.c b/pyscf/lib/np_helper/np_helper.c
@@ -14,6 +14,7 @@
  */
 
 #include <stdlib.h>
+#include <complex.h>
 #include "np_helper/np_helper.h"
 
 void NPdset0(double *p, const size_t n)
@@ -47,3 +48,96 @@ void NPzcopy(double complex *out, const double complex *in, const size_t n)
                 out[i] = in[i];
         }
 }
+
+/*
+ * These are mostly useful for first-touch array allocation on NUMA systems.
+ * Use with numpy.empty.
+ */
+void NPomp_dset0(const size_t n, double *out)
+{
+#pragma omp parallel for schedule(static)
+        for (size_t i = 0; i < n; i++) {
+                out[i] = 0.0;
+        }
+}
+
+void NPomp_zset0(const size_t n, double complex *out)
+{
+#pragma omp parallel for schedule(static)
+        for (size_t i = 0; i < n; i++) {
+                out[i] = 0.0;
+        }
+}
+
+
+/*
+ * Copy a double precision matrix with multithreading.
+ */
+void NPomp_dcopy(const size_t m,
+                 const size_t n,
+                 const double *__restrict in, const size_t in_stride,
+                 double *__restrict out, const size_t out_stride)
+{
+#pragma omp parallel for schedule(static)
+        for (size_t i = 0; i < m; i++) {
+#pragma omp simd
+                for (size_t j = 0; j < n; j++) {
+                        out[i * out_stride + j] = in[i * in_stride + j];
+                }
+        }
+}
+
+/*
+ * Copy a complex double precision matrix with multithreading.
+ */
+void NPomp_zcopy(const size_t m,
+                 const size_t n,
+                 const double complex *__restrict in, const size_t in_stride,
+                 double complex *__restrict out, const size_t out_stride)
+{
+#pragma omp parallel for schedule(static)
+        for (size_t i = 0; i < m; i++) {
+#pragma omp simd
+                for (size_t j = 0; j < n; j++) {
+                        out[i * out_stride + j] = in[i * in_stride + j];
+                }
+        }
+}
+
+/*
+ * Elementwise multiplication of two double matrices.
+ * B <- A \circ B
+ */
+void NPomp_dmul(const size_t m,
+                const size_t n,
+                const double *__restrict a, const size_t a_stride,
+                double *__restrict b, const size_t b_stride,
+                double *__restrict out, const size_t out_stride)
+{
+#pragma omp parallel for schedule(static)
+        for (size_t i = 0; i < m; i++) {
+#pragma omp simd
+                for (size_t j = 0; j < n; j++) {
+                        out[i * out_stride + j] = b[i * b_stride + j] * a[i * a_stride + j];
+                }
+        }
+}
+
+/*
+ * Elementwise multiplication of two complex double matrices.
+ * B <- A \circ B
+ */
+void NPomp_zmul(const size_t m,
+                const size_t n,
+                const double complex *__restrict a, const size_t a_stride,
+                double complex *__restrict b, const size_t b_stride,
+                double complex *__restrict out, const size_t out_stride)
+{
+#pragma omp parallel for schedule(static)
+        for (size_t i = 0; i < m; i++) {
+#pragma omp simd
+                for (size_t j = 0; j < n; j++) {
+                        out[i * out_stride + j] = b[i * b_stride + j] * a[i * a_stride + j];
+                }
+        }
+}
diff --git a/pyscf/lib/np_helper/np_helper.h b/pyscf/lib/np_helper/np_helper.h
@@ -70,6 +70,24 @@ void NPzset0(double complex *p, const size_t n);
 void NPdcopy(double *out, const double *in, const size_t n);
 void NPzcopy(double complex *out, const double complex *in, const size_t n);
 
+void NPomp_dset0(const size_t n, double *out);
+void NPomp_zset0(const size_t n, double complex *out);
+
+void NPomp_dcopy(const size_t m, const size_t n,
+                 const double *in, const size_t in_stride,
+                 double *out, const size_t out_stride);
+void NPomp_zcopy(const size_t m, const size_t n,
+                 const double complex *in, const size_t in_stride,
+                 double complex *out, const size_t out_stride);
+void NPomp_dmul(const size_t m, const size_t n,
+                const double *a, const size_t a_stride,
+                double *b, const size_t b_stride,
+                double *out, const size_t out_stride);
+void NPomp_zmul(const size_t m, const size_t n,
+                const double complex *a, const size_t a_stride,
+                double complex *b, const size_t b_stride,
+                double complex *out, const size_t out_stride);
+
 void NPdgemm(const char trans_a, const char trans_b,
              const int m, const int n, const int k,
              const int lda, const int ldb, const int ldc,
diff --git a/pyscf/lib/numpy_helper.py b/pyscf/lib/numpy_helper.py
@@ -552,12 +552,10 @@ def inplace_transpose_scale(a, alpha=1.0):
     alpha : float, optional
         scaling factor, by default 1.0
     """
-    assert a.ndim == 2
+    lda, order, _ = leading_dimension_order(a)
     assert a.shape[0] == a.shape[1]
     n = a.shape[0]
-    astrides = [s // a.itemsize for s in a.strides]
-    lda = max(astrides)
-    assert min(astrides) == 1
+    assert order in ('C', 'F')
     if a.dtype == numpy.double:
         _np_helper.NPomp_d_itranspose_scale(
             ctypes.c_int(n), ctypes.c_double(alpha),
@@ -974,6 +972,37 @@ def frompointer(pointer, count, dtype=float):
     a = numpy.ndarray(count, dtype=numpy.int8, buffer=buf)
     return a.view(dtype)
 
+def leading_dimension_order(a):
+    """Return the leading dimension and the order of a matrix.
+
+    Parameters
+    ----------
+    a : ndarray
+        2D array.
+
+    Returns
+    -------
+    lda : int
+        Leading dimension of the array -- the stride between rows or columns.
+    order : str
+        'F' for col major, 'C' for row major, 'G' for neither.
+    a_cshape : tuple
+        If a is row major, a.shape; if a is col major, a.T.shape; otherwise None.
+    """
+    assert a.ndim == 2
+    astrides = [s//a.itemsize for s in a.strides]
+    lda = max(astrides)
+    if astrides[0] == 1:
+        order = 'F'
+        a_cshape = a.T.shape
+    elif astrides[1] == 1:
+        order = 'C'
+        a_cshape = a.shape
+    else:
+        order = 'G'
+        a_cshape = None
+    return lda, order, a_cshape
+
 norm = numpy.linalg.norm
 
 def cond(x, p=None):
@@ -1177,6 +1206,99 @@ def expm(a):
         y, buf = buf, y
     return y
 
+def omatcopy(a, out=None):
+    """Copies a matrix.
+
+    Parameters
+    ----------
+    a : ndarray
+        Matrix to be copied. The order of the matrix is preserved.
+        a can be either row or column major.
+    out : ndarray, optional
+        Matrix to be overwritten. A new one is allocated if not provided.
+
+    Returns
+    -------
+    out : ndarray
+        Copy of a with the same order.
+    """
+    lda, _, a_cshape = leading_dimension_order(a)
+    if out is None:
+        out = numpy.empty_like(a)
+    ld_out, _, out_cshape = leading_dimension_order(out)
+    assert out_cshape == a_cshape and a_cshape is not None
+    if a.dtype == numpy.double:
+        fn = _np_helper.NPomp_dcopy
+    elif a.dtype == numpy.complex128:
+        fn = _np_helper.NPomp_zcopy
+    else:
+        raise NotImplementedError
+    fn(ctypes.c_size_t(a_cshape[0]),
+       ctypes.c_size_t(a_cshape[1]),
+       a.ctypes.data_as(ctypes.c_void_p),
+       ctypes.c_size_t(lda),
+       out.ctypes.data_as(ctypes.c_void_p),
+       ctypes.c_size_t(ld_out))
+    return out
+
+def zeros(shape, dtype=numpy.double, order='C'):
+    """Allocate and zero an array in parallel. Useful for multi-socket systems
+       due to the first touch policy. On most systems np.zeros does not count
+       as first touch. Arrays returned by this function will (ideally) have
+       pages backing them that are distributed across the sockets.
+    """
+    dtype = numpy.dtype(dtype)
+    if dtype == numpy.double:
+        out = numpy.empty(shape, dtype=dtype, order=order)
+        _np_helper.NPomp_dset0(ctypes.c_size_t(out.size),
+                              out.ctypes.data_as(ctypes.c_void_p))
+    elif dtype == numpy.complex128:
+        out = numpy.empty(shape, dtype=dtype, order=order)
+        _np_helper.NPomp_zset0(ctypes.c_size_t(out.size),
+                              out.ctypes.data_as(ctypes.c_void_p))
+    else: # fallback
+        out = numpy.zeros(shape, dtype=dtype, order=order)
+    return out
+
+def entrywise_mul(a, b, out=None):
+    """Entrywise multiplication of two matrices.
+
+    Parameters
+    ----------
+    a : ndarray
+    b : ndarray
+    out : ndarray, optional
+        Output matrix. A new one is allocated if not provided.
+
+    Returns
+    -------
+    ndarray
+        a * b
+    """
+    assert a.ndim == 2 and b.ndim == 2
+    assert a.shape == b.shape and a.dtype == b.dtype
+    lda, _, a_cshape = leading_dimension_order(a)
+    ldb, _, b_cshape = leading_dimension_order(b)
+    if out is None:
+        out = numpy.empty_like(b)
+    ld_out, _, out_cshape = leading_dimension_order(out)
+    assert a_cshape == b_cshape and b_cshape == out_cshape and a_cshape is not None
+    if a.dtype == numpy.double:
+        fn = _np_helper.NPomp_dmul
+    elif a.dtype == numpy.complex128:
+        fn = _np_helper.NPomp_zmul
+    else:
+        return numpy.multiply(a, b, out=out)
+    fn(ctypes.c_size_t(a_cshape[0]),
+       ctypes.c_size_t(a_cshape[1]),
+       a.ctypes.data_as(ctypes.c_void_p),
+       ctypes.c_size_t(lda),
+       b.ctypes.data_as(ctypes.c_void_p),
+       ctypes.c_size_t(ldb),
+       out.ctypes.data_as(ctypes.c_void_p),
+       ctypes.c_size_t(ld_out))
+    return out
+
 def ndarray_pointer_2d(array):
     '''Return an array that contains the addresses of the first element in each
     row of the input 2d array.
diff --git a/pyscf/lib/test/test_numpy_helper.py b/pyscf/lib/test/test_numpy_helper.py
@@ -254,6 +254,42 @@ def test_ndarray_pointer_2d(self):
         addr = lib.ndarray_pointer_2d(a)
         self.assertTrue(all(addr == a.ctypes.data + numpy.array([0, 24, 48])))
 
+    def test_omatcopy(self):
+        a = numpy.random.random((5,5))
+        b = numpy.empty_like(a)
+        lib.omatcopy(a, out=b)
+        self.assertTrue(numpy.all(a == b))
+        a = numpy.random.random((403,410)).T
+        b = numpy.empty_like(a)
+        lib.omatcopy(a, out=b)
+        self.assertTrue(numpy.all(a == b))
+
+    def test_zeros(self):
+        a = lib.zeros((100,100), dtype=numpy.double)
+        self.assertTrue(numpy.all(a == 0))
+        self.assertTrue(a.dtype == numpy.double)
+        a = lib.zeros((100,100), dtype=numpy.complex128)
+        self.assertTrue(numpy.all(a == 0))
+        self.assertTrue(a.dtype == numpy.complex128)
+        a = lib.zeros((100,100), dtype=numpy.int32)
+        self.assertTrue(numpy.all(a == 0))
+        self.assertTrue(a.dtype == numpy.int32)
+
+    def test_entrywise_mul(self):
+        a = numpy.random.random((101,100))
+        b = numpy.random.random((101,100))
+        prod = lib.entrywise_mul(a, b)
+        self.assertTrue(numpy.allclose(prod, a * b))
+        a = numpy.random.random((101,100))
+        b = numpy.random.random((101,100))
+        a = a + a*1j
+        b = b + b*1j
+        prod = lib.entrywise_mul(a, b)
+        self.assertTrue(numpy.allclose(prod, a * b))
+        # inplace test
+        lib.entrywise_mul(a, b, out=b)
+        self.assertTrue(numpy.allclose(prod, b))
+
 if __name__ == "__main__":
     print("Full Tests for numpy_helper")
     unittest.main()
diff --git a/pyscf/scf/dispersion.py b/pyscf/scf/dispersion.py
@@ -50,7 +50,7 @@
 
 # These xc functionals are not supported yet
 _black_list = {
-    'wb97x-d', 'wb97x-d3',
+    'wb97x-d', 'wb97x-d3', 'wb97x_d', 'wb97x_d3',
     'wb97m-d3bj2b', 'wb97m-d3bjatm',
     'b97m-d3bj2b', 'b97m-d3bjatm',
 }
diff --git a/tools/verify_examples.py b/tools/verify_examples.py

Original file line number	Diff line number	Diff line change
`@@ -50,7 +50,7 @@`
`50`	`50`
`51`	`51`	`# These xc functionals are not supported yet`
`52`	`52`	`_black_list = {`
`53`		`- 'wb97x-d', 'wb97x-d3',`
	`53`	`+ 'wb97x-d', 'wb97x-d3', 'wb97x_d', 'wb97x_d3',`
`54`	`54`	`'wb97m-d3bj2b', 'wb97m-d3bjatm',`
`55`	`55`	`'b97m-d3bj2b', 'b97m-d3bjatm',`
`56`	`56`	`}`