@@ -89,6 +89,13 @@ void name##_int(const int i,const int j,const int k,doublecomplex result[static
8989 vCopyIntReal(i,j,k,qvec); \
9090 name(qvec,result,true); }
9191
92+ // same as above, but calling function is different from name and accepts additional argument
93+ # define INT_WRAPPER_INTER_3 (name ,func ,arg ) \
94+ void name##_int(const int i,const int j,const int k,doublecomplex result[static restrict 6]) { \
95+ double qvec[3]; \
96+ vCopyIntReal(i,j,k,qvec); \
97+ func(qvec,result,true,arg); }
98+
9299// wrapper for <name> (reflected interaction), based on integer input; arguments are described in .h file
93100# define INT_WRAPPER_REFL (name ) \
94101void name##_int(const int i,const int j,const int k,doublecomplex result[static restrict 6]) { \
@@ -105,13 +112,21 @@ void name##_real(const double qvec_in[restrict 3],doublecomplex result[static re
105112 vCopy(qvec_in,qvec); \
106113 name(qvec,result,false); }
107114
115+ // same as above, but calling function is different from name and accepts additional argument
116+ # define REAL_WRAPPER_INTER_3 (name ,func ,arg ) \
117+ void name##_real(const double qvec_in[restrict 3],doublecomplex result[static restrict 6]) { \
118+ double qvec[3]; \
119+ vCopy(qvec_in,qvec); \
120+ func(qvec,result,true,arg); }
121+
108122// wrapper for <name>, based on real input; arguments are described in .h file
109123# define REAL_WRAPPER_REFL (name ) \
110124void name##_real(const double qvec[restrict 3],doublecomplex result[static restrict 6]) \
111125 { name(qvec,result,false); }
112126
113127// aggregate defines
114128#define WRAPPERS_INTER (name ) INT_WRAPPER_INTER(name) REAL_WRAPPER_INTER(name)
129+ #define WRAPPERS_INTER_3 (name ,func ,arg ) INT_WRAPPER_INTER_3(name,func,arg) REAL_WRAPPER_INTER_3(name,func,arg)
115130#define WRAPPERS_REFL (name ) INT_WRAPPER_REFL(name) REAL_WRAPPER_REFL(name)
116131
117132/* this macro defines a void (error generating) real-input wrapper for Green's tensor formulations, which are, for
@@ -141,7 +156,10 @@ static inline void vCopyIntReal(const int i,const int j,const int k,double qvec[
141156
142157static inline void InterParams (double qvec [static 3 ],double qmunu [static 6 ],double * rr ,double * rn ,double * invr3 ,
143158 double * kr ,double * kr2 ,const bool unitsGrid )
144- // some common variables needed by the interaction functions - needed for all except IGT
159+ /* some common variables needed by the interaction functions - needed for all except IGT
160+ * It will probably break down for qvec=0, so if any interaction term is required for zero argument, a new function need
161+ * to be created, which will avoid this singularity
162+ */
145163{
146164 double invrn ;
147165
@@ -323,6 +341,7 @@ static inline void InterTerm_fcd(double qvec[static 3],doublecomplex result[stat
323341 * speed of FCD can be improved by using faster version of sici routine, using predefined tables, etc (e.g. as is
324342 * done in GSL library). But currently extra time for this computation is already smaller than one main iteration.
325343 */
344+ // If needed, it can be updated to work fine for qvec==0
326345{
327346 // standard variable definitions used for functions InterParams and InterTerm_core
328347 double qmunu [6 ]; // normalized outer-product {qxx,qxy,qxz,qyy,qyz,qzz}
@@ -364,6 +383,7 @@ WRAPPERS_INTER(InterTerm_fcd)
364383
365384static inline void InterTerm_fcd_st (double qvec [static 3 ],doublecomplex result [static 6 ],const bool unitsGrid )
366385// Interaction term between two dipoles for static FCD (in the limit of k->inf). See InterTerm_fcd for more details.
386+ // If needed, it can be updated to work fine for qvec==0
367387{
368388 // standard variable definitions used for functions InterParams and InterTerm_core
369389 double qmunu [6 ]; // normalized outer-product {qxx,qxy,qxz,qyy,qyz,qzz}
@@ -560,96 +580,112 @@ NO_REAL_WRAPPER(InterTerm_igt_so)
560580
561581//=====================================================================================================================
562582
563- static inline void lower_gamma (const double x ,double res [static 3 ])
564- /* computes the values of lower incomplete gamma function of orders 1/2,3/2, and 5/2 at x^2 by upward recursion
583+ static inline double lower_gamma52 (const double sx ,const double expMx )
584+ /* computes the values of lower incomplete gamma function of order 5/2 at sx^2 by upward recursion from erf(x),
585+ * i.e. sx = sqrt(x) is given together with exp(-sx^2)=exp(-x)
565586 * it is numerically stable (up to all digits) for x>1
566587 * if extension to complex input is required, cerf can be obtained from http://apps.jcns.fz-juelich.de/doku/sc/libcerf
567588 */
568589{
569- double te = exp (- x * x );
570- res [0 ]= SQRT_PI * erf (x );
571- res [1 ]= 0.5 * res [0 ]- x * te ;
572- res [2 ]= 1.5 * res [1 ]- x * x * x * te ;
590+ return 0.75 * SQRT_PI * erf (sx ) - sx * (1.5 + sx * sx )* expMx ;
573591}
574592
575593//=====================================================================================================================
576594
577- static inline void gamma_scaled (const double x , double res [ static 3 ] )
578- /* computes the values of scaled lower incomplete gamma function of orders 1/2,3/2, and 5/2 at x by downward recursion
595+ static inline double gamma_scaled (const double s , const double x , const double expMx )
596+ /* computes the values of scaled lower incomplete gamma function, using given value of exp(-x)
579597 * f[s-1/2,x] = g[s,x]/x^s = M(s,s+1,-x)/s = exp(-x)M(1,s+1,x)/s, where M is Kummer's confluent hypergeometric function
598+ *
599+ * The code is based on Numerical Recipes, 3rd ed. There it is mentioned that such series are more efficient than
600+ * continuous fraction for x < s. Series is f[s-1/2,x] = exp(-x)*Sum{k=0->inf,Gamma(s)*x^k/Gamma(s+k+1)}.
601+ * For s=5/2 to reach double precision it requires from 9 to 16 iterations when 0.1<=x<=1 (and even smaller -
602+ * for smaller x)
580603 */
581604{
582- /* First compute f[2,x]*exp(x) by series summation, the code is based on Numerical Recipes, 3rd ed. There it is
583- * mentioned that such series are more efficient than continuous fraction for x < 5/2.
584- * Series is f[s-1/2,x]*exp(x) = Sum{k=0->inf,Gamma(s)*x^k/Gamma(s+k+1)}, we use s=5/2
585- * To reach double precision it requires from 9 to 16 iterations when 0.1<=x<=1 (and even smaller - for smaller x)
586- */
587- #define SVAL 2.5
588605 double ap ,del ,sum ;
589- ap = SVAL ;
590- del = sum = 1.0 /SVAL ;
606+ ap = s ;
607+ del = sum = 1.0 /s ;
591608 do {
592609 ap ++ ;
593610 del *=x /ap ;
594611 sum += del ;
595612 } while (fabs (del ) > fabs (sum )* DBL_EPSILON );
596- #undef SVAL
597- // downward recursion
598- double te = exp (- x );
599- res [2 ]= sum * te ;
600- res [1 ]= (x * res [2 ]+ te )/1.5 ;
601- // currently res[0] is not used
602- // res[0]=(x*res[1]+te)/0.5;
613+ return sum * expMx ;
603614}
604615
605616//=====================================================================================================================
606617
607- static inline void InterTerm_nloc (double qvec [static 3 ],doublecomplex result [static 6 ],const bool unitsGrid )
618+ static inline double AverageGaussCube (double x )
619+ /* computes one component of Gaussian averaging over a cube: (multiplied by 2)
620+ * [2/(sqrt(2pi)*d*Rp)]*Integral[exp(-(x+t)^2/(2Rp^2)),{t,-d/2,d/2}]
621+ */
622+ {
623+ double dif ;
624+ if (x < 0 ) x = - x ; // for convenience work only with non-negative input
625+ const double y = x /(SQRT2 * nloc_Rp );
626+ const double z = gridspace /(2 * SQRT2 * nloc_Rp );
627+ // two branches not to lose precision
628+ if (x < 1 ) dif = erf (y + z ) - erf (y - z );
629+ else dif = erfc (y - z ) - erfc (y + z );
630+ return dif /gridspace ;
631+ }
632+
633+ //=====================================================================================================================
634+
635+ static inline void InterTerm_nloc_both (double qvec [static 3 ],doublecomplex result [static 6 ],const bool unitsGrid ,
636+ const bool averageH )
608637/* Interaction term between two dipoles using the non-local interaction;
609638 * qvec is a distance given by either integer-valued vector (in units of d) or arbitrary-valued in real units (um),
610639 * controlled by unitsGrid (true of false respectively), result is for produced output
640+ * averageH specifies if the h function should be averaged over the dipole (cube) volume
611641 *
612642 * !!! Currently only static version is implemented
613643 * !!! Mind the difference in sign with term in quantum-mechanical simulations, which defines the interaction energy
614- * G = 2/[sqrt(PI)*R^3] * {2*(RR/R^2)*g(5/2,x) - I*g(3/2,x)}, where x=R^2/(2*Rp^2) and g is lower incomplete
615- * gamma-function. For moderate x, those gamma functions can be easily expressed through erf by upward recursion, since
616- * g(1/2,y^2) = sqrt(pi)*erf(y) and g(s+1,x) = s*g(s,x) - exp(-x)*x^s
617- *
644+ * G = 4/[3sqrt(PI)R^3]g(5/2,x)[3(RR/R^2)-I] - (4pi/3)h(R), where x=R^2/(2Rp^2), g is lower incomplete gamma-function.
645+ * For moderate x, those gamma functions can be easily expressed through erf by upward recursion, since
646+ * g(1/2,y^2) = sqrt(pi)erf(y) and g(s+1,x) = s*g(s,x) - exp(-x)x^s
618647 * For small x to save significant digits, we define f(m,x) = g(m+1/2,x)/x^(m+1/2) [no additional coefficient 1/2]
619- * and compute them by downward recursion starting from f[2,x] (computed by series representation)
620- * Then we use the following expression for G (with R^3 replaced by Rp^3), which is regular for x->0
621- * G = 1/[sqrt(2*PI)*Rp^3] * {2*(RR/R^2)*x*f(2,x) - I*f(1,x)}
648+ * (computed by series representation), then g(5/2,x)/R^3 = f(2,x)/[sqrt(2)*Rp]^3
649+ *
650+ * (4pi/3)h(R)=exp(-x)*sqrt(2/pi)/(3*Rp^3) - is the non-locality function. If averageH, it is replaced by its integral
651+ * over cube, which can be easily expressed through erf.
652+ *
653+ * Currently this function is faster than FCD, so we should not worry about speed. But if needed, calculation of both
654+ * h(R) and its integrals can be optimized by tabulating 1D functions (either exp, or combinations of erf)
622655 */
656+ // If needed, it can be updated to work fine for qvec==0
623657{
624658 // standard variable definitions used for functions InterParams
625659 double qmunu [6 ]; // normalized outer-product {qxx,qxy,qxz,qyy,qyz,qzz}
626660 double rr ,rn ,invr3 ,kr ,kr2 ; // |R|, |R/d|, |R|^-3, kR, (kR)^2
627661
628- double x , y , ar [ 3 ], expval ;
662+ double sx , x , t1 , t2 , expMx , invRp3 ;
629663 InterParams (qvec ,qmunu ,& rr ,& rn ,& invr3 ,& kr ,& kr2 ,unitsGrid );
630- if (rr >=nloc_Rp ) {
631- if (nloc_Rp == 0 ) {
632- if (rr == 0 ) LogError (ALL_POS ,"Non-local interaction is not defined for both R and Rp equal to 0" );
633- // same as lower_gamma but explicitly using erf(inf)=1, exp(-inf)=0
634- ar [0 ]= SQRT_PI ;
635- ar [1 ]= 0.5 * ar [0 ];
636- ar [2 ]= 1.5 * ar [1 ];
637- }
638- else lower_gamma (SQRT1_2 * rr /nloc_Rp ,ar );
639- expval = TWO_OVER_SQRT_PI * invr3 ;
664+ if (nloc_Rp == 0 ) {
665+ if (rr == 0 ) LogError (ALL_POS ,"Non-local interaction is not defined for both R and Rp equal to 0" );
666+ t1 = invr3 ;
667+ // when averaging, we check if the point r is inside the cube around r0. Conforms with general formula below
668+ if (averageH && rn <=SQRT3 * 0.5 && fabs (qvec [0 ])* rn <=0.5 && fabs (qvec [1 ])* rn <=0.5 && fabs (qvec [2 ])* rn <=0.5 )
669+ t2 = FOUR_PI_OVER_THREE ;
670+ else t2 = 0 ;
640671 }
641672 else {
642- y = SQRT1_2 * rr /nloc_Rp ;
643- x = y * y ;
644- gamma_scaled (x ,ar );
645- ar [2 ]*=x ;
646- expval = SQRT1_2PI /(nloc_Rp * nloc_Rp * nloc_Rp );
673+ invRp3 = 1 /(nloc_Rp * nloc_Rp * nloc_Rp );
674+ sx = SQRT1_2 * rr /nloc_Rp ;
675+ x = sx * sx ;
676+ expMx = exp (- sx * sx );
677+ // the threshold for x is somewhat arbitrary
678+ if (x > 1 ) t1 = (4 /(3 * SQRT_PI ))* lower_gamma52 (sx ,expMx )* invr3 ;
679+ else t1 = SQRT2_9PI * x * gamma_scaled (2.5 ,x ,expMx )* invRp3 ;
680+ if (averageH )
681+ t2 = PI_OVER_SIX * AverageGaussCube (qvec [0 ]* rr )* AverageGaussCube (qvec [1 ]* rr )* AverageGaussCube (qvec [2 ]* rr );
682+ else t2 = expMx * invRp3 * SQRT2_9PI ;
647683 }
648684
649685//#define INTERACT_DIAG(ind) { result[ind] = ((t1*qmunu[ind]+t3) + I*(kr+t2*qmunu[ind]))*(*expval); }
650686//#define INTERACT_NONDIAG(ind) { result[ind] = (t1+I*t2)*qmunu[ind]*(*expval); }
651- #define INTERACT_DIAG (ind ) { result[ind] = (2*ar[2] *qmunu[ind] - ar[1])*expval ; }
652- #define INTERACT_NONDIAG (ind ) { result[ind] = 2*ar[2] *qmunu[ind]*expval ; }
687+ #define INTERACT_DIAG (ind ) { result[ind] = 3*t1 *qmunu[ind] - (t1+t2) ; }
688+ #define INTERACT_NONDIAG (ind ) { result[ind] = 3*t1 *qmunu[ind]; }
653689
654690 INTERACT_DIAG (0 ); // xx
655691 INTERACT_NONDIAG (1 ); // xy
@@ -661,17 +697,12 @@ static inline void InterTerm_nloc(double qvec[static 3],doublecomplex result[sta
661697#undef INTERACT_DIAG
662698#undef INTERACT_NONDIAG
663699
664- // double t1=-expval*ar[1];
665- // doubel t2=2*expval*ar[2];
666- // res = t1*I + t2*(RR/R^2)
667- // for (int i=0;i<6;i++) result[i]=t2*qmunu[i];
668- // result[0]+=t1;
669- // result[3]+=t1;
670- // result[5]+=t1;
671700 PRINT_GVAL ;
672701}
673702
674- WRAPPERS_INTER (InterTerm_nloc )
703+ // wrappers both for nloc and nloc0
704+ WRAPPERS_INTER_3 (InterTerm_nloc ,InterTerm_nloc_both ,true)
705+ WRAPPERS_INTER_3 (InterTerm_nloc0 ,InterTerm_nloc_both ,false)
675706
676707//=====================================================================================================================
677708
@@ -1326,6 +1357,7 @@ void InitInteraction(void)
13261357 SET_FUNC_POINTERS (InterTerm ,igt_so );
13271358 break ;
13281359 case G_NLOC : SET_FUNC_POINTERS (InterTerm ,nloc ); break ;
1360+ case G_NLOC0 : SET_FUNC_POINTERS (InterTerm ,nloc0 ); break ;
13291361 case G_SO :
13301362 if (InteractionRealArgs ) PrintError ("'-int so' does not support calculation of interaction tensor for "
13311363 "arbitrary real arguments" );
0 commit comments