adda-team
diff --git a/‎src/Makefile‎
Lines changed: 1 addition & 2 deletions b/‎src/Makefile‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎src/comm.c‎
Lines changed: 2 additions & 3 deletions b/‎src/comm.c‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎src/fft.c‎
Lines changed: 24 additions & 2 deletions b/‎src/fft.c‎
Lines changed: 24 additions & 2 deletions
diff --git a/‎src/fort/propaesplibreintadda.f‎
Lines changed: 5 additions & 8 deletions b/‎src/fort/propaesplibreintadda.f‎
Lines changed: 5 additions & 8 deletions
diff --git a/‎src/function.h‎
Lines changed: 19 additions & 2 deletions b/‎src/function.h‎
Lines changed: 19 additions & 2 deletions
diff --git a/‎src/interaction.c‎
Lines changed: 27 additions & 7 deletions b/‎src/interaction.c‎
Lines changed: 27 additions & 7 deletions
diff --git a/‎src/iterative.c‎
Lines changed: 5 additions & 1 deletion b/‎src/iterative.c‎
Lines changed: 5 additions & 1 deletion
@@ -329,8 +329,7 @@ ifeq ($(COMPILER),gnu)
   COPT1   := -O2
   COPT2   := -O3 -ffast-math -funroll-loops
   CWARN   := -Wall -Wextra -Wcast-qual -Wpointer-arith -Wwrite-strings -Wstrict-prototypes \
-             -Wstrict-aliasing=1 -Wshadow -Wcast-align -Wnested-externs -Wcomment -Wno-unknown-pragmas \
-             -Wno-overlength-strings
+             -Wstrict-aliasing=1 -Wshadow -Wcast-align -Wnested-externs -Wcomment -Wno-overlength-strings
   # gcc versions prior to 4.7.2 are affected by bug http://gcc.gnu.org/bugzilla/show_bug.cgi?id=7263 , which causes
   # -pedantic flag to generates warnings on every occurence of I (complex i)
   GCC_GTEQ_472 := $(shell expr `gcc -dumpversion | sed -e 's/\.\([0-9][0-9]\)/\1/g' -e 's/\.\([0-9]\)/0\1/g' \
 
@@ -154,9 +154,8 @@ static MPI_Datatype MPIVarType(var_type type,bool reduce,int *mult)
  * directly used. In this case we emulate more complex datatypes through multiplication of double, and additional
  * variable 'mult' is returned to account for this factor.
  *
- * Reduction of complex numbers is emulated if not supported; the emulation is not perfectly portable - depends on a
- * particular implementation of complex numbers. The good thing is that it is only used for old (less modern) MPI
- * implementations.
+ * Reduction of complex numbers is emulated if not supported; C99 implies that this emulation is portable. Anyway, it
+ * is only used for old (less modern) MPI implementations.
  */
 {
 	if (reduce) *mult=1; // default value when direct correspondence is possible
 
@@ -34,7 +34,9 @@
 #include <string.h>
 
 #ifdef CLFFT_AMD
+	IGNORE_WARNING(-Wstrict-prototypes) // no way to change the library header
 #	include <clAmdFft.h> //external library from AMD
+	STOP_IGNORE
 	// Defines precision of clAmdFft transforms. !!! CLFFT_DOUBLE_FAST should be tested when becomes operational
 #	define PRECISION_CLFFT CLFFT_DOUBLE
 #elif defined(CLFFT_APPLE)
@@ -318,8 +320,15 @@ void fftX(const int isign)
 #elif defined(FFT_TEMPERTON)
 	int nn=gridX,inc=1,jump=nn,lot=boxY;
 	size_t z;
-
+	/* Calls to Temperton FFT cause warnings for translation from doublecomplex to double pointers. However, such a cast
+	 * is perfectly valid in C99. So we set pragmas to remove these warnings.
+	 *
+	 * !!! TODO: Another (ultimate) solution is to remove this routine altogether, since FFTW is perfect in all
+	 * respects. This is also reasonable considering future switch to tgmath.h
+	 */
+	IGNORE_WARNING(-Wstrict-aliasing);
 	for (z=0;z<3*local_Nz;z++) cfft99_((double *)(Xmatrix+z*gridX*smallY),work,trigsX,ifaxX,&inc,&jump,&nn,&lot,&isign);
+	STOP_IGNORE;
 #endif
 }
 
@@ -343,7 +352,9 @@ void fftY(const int isign)
 #elif defined(FFT_TEMPERTON)
 	int nn=gridY,inc=1,jump=nn,lot=3*gridZ;
 
+	IGNORE_WARNING(-Wstrict-aliasing);
 	cfft99_((double *)(slices_tr),work,trigsY,ifaxY,&inc,&jump,&nn,&lot,&isign);
+	STOP_IGNORE;
 #endif
 }
 
@@ -367,7 +378,9 @@ void fftZ(const int isign)
 #elif defined(FFT_TEMPERTON)
 	int nn=gridZ,inc=1,jump=nn,lot=boxY,Xcomp;
 
+	IGNORE_WARNING(-Wstrict-aliasing);
 	for (Xcomp=0;Xcomp<3;Xcomp++) cfft99_((double *)(slices+gridYZ*Xcomp),work,trigsZ,ifaxZ,&inc,&jump,&nn,&lot,&isign);
+	STOP_IGNORE;
 #endif
 }
 
@@ -382,7 +395,9 @@ static void fftX_Dm(const size_t lengthZ ONLY_FOR_TEMPERTON)
 	int nn=gridX,inc=1,jump=nn,lot=D2sizeY,isign=FFT_FORWARD;
 	size_t z;
 
+	IGNORE_WARNING(-Wstrict-aliasing);
 	for (z=0;z<lengthZ;z++) cfft99_((double *)(D2matrix+z*gridX*D2sizeY),work,trigsX,ifaxX,&inc,&jump,&nn,&lot,&isign);
+	STOP_IGNORE;
 #endif
 }
 
@@ -396,7 +411,9 @@ static void fftY_Dm(void)
 #elif defined(FFT_TEMPERTON)
 	int nn=gridY,inc=1,jump=nn,lot=gridZ,isign=FFT_FORWARD;
 
+	IGNORE_WARNING(-Wstrict-aliasing);
 	cfft99_((double *)slice_tr,work,trigsY,ifaxY,&inc,&jump,&nn,&lot,&isign);
+	STOP_IGNORE;
 #endif
 }
 
@@ -410,7 +427,9 @@ static void fftZ_Dm(void)
 #elif defined(FFT_TEMPERTON)
 	int nn=gridZ,inc=1,jump=nn,lot=gridY,isign=FFT_FORWARD;
 
+	IGNORE_WARNING(-Wstrict-aliasing);
 	cfft99_((double *)slice,work,trigsZ,ifaxZ,&inc,&jump,&nn,&lot,&isign);
+	STOP_IGNORE;
 #endif
 }
 
@@ -531,6 +550,9 @@ static void fftInitAfterD(void)
  * completely separate code is used for OpenCL and FFTW3, because even precise-timing output is significantly different.
  * In particular, FFTW3 uses separate plans for forward and backward, while clFFT (by Apple or AMD) uses one plan for
  * both directions.
+ *
+ * clFft access the OpenCL buffers directly, so they are not anyhow affected by the definition of complex numbers in the
+ * main part of the code (although, it is consistent with it)
  */
 {
 #ifdef OPENCL
@@ -777,7 +799,7 @@ void InitDmatrix(void)
 	CREATE_CL_BUFFER(bufslices,CL_MEM_READ_WRITE,gridYZ*3*sizeof(doublecomplex),NULL);
 	CREATE_CL_BUFFER(bufslices_tr,CL_MEM_READ_WRITE,gridYZ*3*sizeof(doublecomplex),NULL);
 	/* The following are constant device buffers which are initialized with host data. But bufDmatrix is initialized in
-	 * the end of this function (to be compatible with prognosis. And bufcc_sqrt is initialized in InitCC, since it may
+	 * the end of this function (to be compatible with prognosis). And bufcc_sqrt is initialized in InitCC, since it may
 	 * change for every run of the iterative solver.
 	 */
 	CREATE_CL_BUFFER(bufcc_sqrt,CL_MEM_READ_ONLY,sizeof(cc_sqrt),NULL);
 
@@ -21,12 +21,14 @@ subroutine propaespacelibreintadda(Rij,k0a,arretecube,relreq,
       double precision k0a,arretecubem
       double precision x,y,z,arretecube,k0,xx0,yy0,zz0
       double precision Rij(3),result(12)
+c     The structure of the result is the following:
+c     Re(G11),Re(G12),Re(G13),Re(G22),Re(G23),Re(G33),Im(G11),...,Im(G33)
 
 c     Variables needs for the integration
       integer  KEY, N, NF, NDIM, MINCLS, MAXCLS, IFAIL, NEVAL, NW
       parameter (nw=4000000,ndim=3,nf=12)
       double precision A(NDIM), B(NDIM), WRKSTR(NW)
-      double precision  ABSEST(NF), FINEST(NF), ABSREQ, RELREQ,err
+      double precision  ABSEST(NF), ABSREQ, RELREQ,err
 
       double precision Id(3,3),Rab,Rvect(3)
 
@@ -68,21 +70,16 @@ subroutine propaespacelibreintadda(Rij,k0a,arretecube,relreq,
       endif
 
       call  DCUHRE(NDIM,NF,A,B, MINCLS, MAXCLS, fonctionigtadda,
-     $      ABSREQ,RELREQ,KEY,NW,0,finest,ABSEST,NEVAL,IFAIL, WRKSTR) 
+     $      ABSREQ,RELREQ,KEY,NW,0,result,ABSEST,NEVAL,IFAIL, WRKSTR) 
 
       do N = 1,NF
-         FINEST(N)=FINEST(N)/arretecube/arretecube/arretecube
+         result(N)=result(N)/arretecube/arretecube/arretecube
       enddo
 
       if (ifail.ne.0) then
          write(*,*) 'IFAIL in IGT routine',IFAIL
       endif
 
-      do i = 1,6
-        result(2*i-1)=finest(i)
-        result(2*i)=finest(i+6)
-      enddo
-    
       end
 c*************************************************************
       subroutine fonctionigtadda(ndim,zz,nfun,f)
 
@@ -1,6 +1,6 @@
 /* File: function.h
  * $Date::                            $
- * Descr: function attributes
+ * Descr: function attributes and compiler pragmas
  *
  * Copyright (C) 2006,2008,2010-2011,2013 ADDA contributors
  * This file is part of ADDA.
@@ -17,7 +17,6 @@
 #ifndef __function_h
 #define __function_h
 
-
 // attribute options for GCC compilers (Intel compiler may also recognize them)
 #ifdef __GNUC__
 	// sets a macro for testing GCC version (copied from _mingw.h)
@@ -26,6 +25,16 @@
 			(__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
 #	else
 #		define GCC_PREREQ(major, minor)  0
+#	endif
+	// pragmas to ignore warnings
+#	if GCC_PREREQ(4,6)
+#		define DO_PRAGMA(x) _Pragma (#x)
+#		define IGNORE_WARNING(x) DO_PRAGMA(GCC diagnostic ignored #x)
+#		// assume that push is not used anywhere
+#		define STOP_IGNORE       _Pragma ("GCC diagnostic pop")
+#	else
+#		define IGNORE_WARNING(x)
+#		define STOP_IGNORE
 #	endif
 	// The following chooses between __printf__ and __gnu_printf__ attributes
 #	if GCC_PREREQ(4,4)
@@ -49,11 +58,19 @@
 #	define ATT_NORETURN __attribute__ ((__noreturn__))
 #	define ATT_UNUSED   __attribute__ ((__unused__))
 #else
+#	define IGNORE_WARNING(x)
+#	define STOP_IGNORE
 #	define ATT_PRINTF(a,b)
 #	define ATT_PURE
 #	define ATT_MALLOC
 #	define ATT_NORETURN
 #	define ATT_UNUSED
 #endif
 
+#ifdef __ICC
+#	define LARGE_LOOP _Pragma ("loop_count (10000)")
+#else
+#	define LARGE_LOOP
+#endif
+
 #endif // __function_h
@@ -138,8 +138,18 @@ static inline __m128d accImExp_pd(const double x)
 
 static inline doublecomplex accImExp(const double x)
 {
+	/* Here and further in the SSE3 part it is assumed that doublecomplex is equivalent to two doubles (that is
+	 * specified by the C99 standard). Explicit pointer casts have been put in place, and pragmas to ignore remaining
+	 * warnings from strict aliasing.
+	 *
+	 * !!! TODO: SSE3 code is a nice hack. But it should be considered carefully - is it worth it? In particular, it
+	 * seems that only parts of it are really beneficial (like tabulated evaluation of imaginary exponents), and those
+	 * can be incorporated into the main code (using standard C99 only).
+	 */
 	doublecomplex c;
-	_mm_store_pd(&c,accImExp_pd(x));
+	IGNORE_WARNING(-Wstrict-aliasing);
+	_mm_store_pd((double *)(&c),accImExp_pd(x));
+	STOP_IGNORE;
 	return c;
 }
 
@@ -155,26 +165,30 @@ static void CalcInterTerm_core(const double kr,const double kr2,const double inv
 	const __m128d v1 = _mm_set_pd(kr,t3);
 	const __m128d v2 = _mm_set_pd(t2,t1);
 	__m128d qff,im_re;
-	_mm_store_pd(expval,sc);
+	IGNORE_WARNING(-Wstrict-aliasing);
+	_mm_store_pd((double *)expval,sc);
+	STOP_IGNORE;
 
 #undef INTERACT_MUL
 #define INTERACT_DIAG(ind) { \
 	qff = _mm_set1_pd(qmunu[ind]); \
 	im_re = _mm_add_pd(v1,_mm_mul_pd(v2,qff)); \
 	im_re = cmul(sc,im_re); \
-	_mm_store_pd(result+ind,im_re); }
+	_mm_store_pd((double *)(result+ind),im_re); }
 #define INTERACT_NONDIAG(ind) { \
 	qff = _mm_set1_pd(qmunu[ind]); \
 	im_re = _mm_mul_pd(v2,qff); \
 	im_re = cmul(sc,im_re); \
-	_mm_store_pd(result+ind,im_re); }
+	_mm_store_pd((double *)(result+ind),im_re); }
 
+	IGNORE_WARNING(-Wstrict-aliasing);
 	INTERACT_DIAG(0);    // xx
 	INTERACT_NONDIAG(1); // xy
 	INTERACT_NONDIAG(2); // xz
 	INTERACT_DIAG(3);    // yy
 	INTERACT_NONDIAG(4); // yz
 	INTERACT_DIAG(5);    // zz
+	STOP_IGNORE;
 
 #undef INTERACT_DIAG
 #undef INTERACT_NONDIAG
@@ -716,12 +730,18 @@ void CalcInterTerm_igt(const int i,const int j,const int k,doublecomplex result[
 	double qvec[3],qmunu[6]; // unit directional vector {qx,qy,qz} and its outer-product {qxx,qxy,qxz,qyy,qyz,qzz}
 	double rn,invrn,invr3,kr,kr2; // |R/d|, 1/|R/d|, |R|^-3, kR, (kR)^2
 	doublecomplex expval; // exp(ikR)/|R|^3
-	double rtemp[3];
+	double rtemp[3],tmp[12];
+	int comp;
 
 	CalcInterParams1(i,j,k,qvec,&rn);
 	if (igt_lim==UNDEF || rn<=igt_lim) {
 		vMultScal(gridspace,qvec,rtemp);
-		propaespacelibreintadda_(rtemp,&WaveNum,&gridspace,&igt_eps,(double *)result);
+		/* passing complex vectors from Fortran to c is not necessarily portable (at least requires extra effort in
+		 * the Fortran code. So we do it through double. This is not bad for performance, since double is anyway used
+		 * internally for integration in this Fortran routine.
+		 */
+		propaespacelibreintadda_(rtemp,&WaveNum,&gridspace,&igt_eps,tmp);
+		for (comp=0;comp<6;comp++) result[comp] = tmp[comp] + I*tmp[comp+6];
 	}
 	else {
 		// The following is equivalent to CalcInterTerm_poi, except for the 1st part of initialization performed above
@@ -830,7 +850,7 @@ void InitInteraction(void)
 #ifndef NO_FORTRAN
 		case G_IGT: CalcInterTerm = &CalcInterTerm_igt; break;
 #endif
-		default: LogError(ONE_POS, "Invalid interaction term calculation method: %d",IntRelation);
+		default: LogError(ONE_POS, "Invalid interaction term calculation method: %d",(int)IntRelation);
 			// no break
 	}
 	// read tables if needed
 
@@ -92,7 +92,7 @@ struct iter_params_struct {
 	int vec_N;        // number of additional vectors to describe the state
 	void (*func)(const enum phase); // pointer to implementation of the iterative solver
 };
-static doublecomplex dumb; // dumb variable, used in workaround for issue 146
+static doublecomplex dumb ATT_UNUSED; // dumb variable, used in workaround for issue 146
 
 #define ITER_FUNC(name) static void name(const enum phase ph)
 
@@ -340,6 +340,10 @@ ITER_FUNC(BCGS2)
  * so we use l=2 here. In many cases one iteration of this method is similar to two iterations of BiCGStab, but overall
  * convergence is slightly better.
  * Breakdown tests were made to coincide with that for BiCGStab for l=1.
+ *
+ * !!! This iterative solver produces segmentation fault when compiled with icc 11.1. Probably that is related to
+ * issue 146. But we leave it be (assume that this is a compiler bug). Even if someone uses this compiler, he can
+ * live fine without this iterative solver.
  */
 {
 #define LL 2 // potentially the method will also work for l=1 (but memory allocation and freeing need to be adjusted)
Original file line number	Diff line number	Diff line change
`@@ -154,9 +154,8 @@ static MPI_Datatype MPIVarType(var_type type,bool reduce,int *mult)`
`154`	`154`	`* directly used. In this case we emulate more complex datatypes through multiplication of double, and additional`
`155`	`155`	`* variable 'mult' is returned to account for this factor.`
`156`	`156`	`*`
`157`		`- * Reduction of complex numbers is emulated if not supported; the emulation is not perfectly portable - depends on a`
`158`		`- * particular implementation of complex numbers. The good thing is that it is only used for old (less modern) MPI`
`159`		`- * implementations.`
	`157`	`+ * Reduction of complex numbers is emulated if not supported; C99 implies that this emulation is portable. Anyway, it`
	`158`	`+ * is only used for old (less modern) MPI implementations.`
`160`	`159`	`*/`
`161`	`160`	`{`
`162`	`161`	`if (reduce) *mult=1; // default value when direct correspondence is possible`