Skip to content

Commit 653fbf5

Browse files
committed
Finished the changes towards complex datatypes from C99. Fixes issue 70.
Added defines to simplify adding pragmas to ignore warnings in gcc and loop_count in icc. The ignore pragmas are used in several places to suppress warnings about conversion from doublecomplex* to complex*: - calls to Temperton FFT routines. - copying data in SSE3 optimizations (here explicit pointer casts were also added). Added comments in several places concerning the portability of such conversions. icc pragmas are now visible only when compiling with icc. Inclusion of clAmdFft.h is now enclosed in pragmas to ignore existent warning. Calling of IGT Fortran routines was slightly changed, conversion from double to complex has been moved inside the C file from Fortran routine. tests/2exec/comp2exec was improved by adding a common reference path (to search all reference binaries). Added '-pol lak' to suite.
1 parent 057298c commit 653fbf5

File tree

13 files changed

+143
-74
lines changed

13 files changed

+143
-74
lines changed

src/Makefile

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -329,8 +329,7 @@ ifeq ($(COMPILER),gnu)
329329
COPT1 := -O2
330330
COPT2 := -O3 -ffast-math -funroll-loops
331331
CWARN := -Wall -Wextra -Wcast-qual -Wpointer-arith -Wwrite-strings -Wstrict-prototypes \
332-
-Wstrict-aliasing=1 -Wshadow -Wcast-align -Wnested-externs -Wcomment -Wno-unknown-pragmas \
333-
-Wno-overlength-strings
332+
-Wstrict-aliasing=1 -Wshadow -Wcast-align -Wnested-externs -Wcomment -Wno-overlength-strings
334333
# gcc versions prior to 4.7.2 are affected by bug http://gcc.gnu.org/bugzilla/show_bug.cgi?id=7263 , which causes
335334
# -pedantic flag to generates warnings on every occurence of I (complex i)
336335
GCC_GTEQ_472 := $(shell expr `gcc -dumpversion | sed -e 's/\.\([0-9][0-9]\)/\1/g' -e 's/\.\([0-9]\)/0\1/g' \

src/comm.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,8 @@ static MPI_Datatype MPIVarType(var_type type,bool reduce,int *mult)
154154
* directly used. In this case we emulate more complex datatypes through multiplication of double, and additional
155155
* variable 'mult' is returned to account for this factor.
156156
*
157-
* Reduction of complex numbers is emulated if not supported; the emulation is not perfectly portable - depends on a
158-
* particular implementation of complex numbers. The good thing is that it is only used for old (less modern) MPI
159-
* implementations.
157+
* Reduction of complex numbers is emulated if not supported; C99 implies that this emulation is portable. Anyway, it
158+
* is only used for old (less modern) MPI implementations.
160159
*/
161160
{
162161
if (reduce) *mult=1; // default value when direct correspondence is possible

src/fft.c

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@
3434
#include <string.h>
3535

3636
#ifdef CLFFT_AMD
37+
IGNORE_WARNING(-Wstrict-prototypes) // no way to change the library header
3738
# include <clAmdFft.h> //external library from AMD
39+
STOP_IGNORE
3840
// Defines precision of clAmdFft transforms. !!! CLFFT_DOUBLE_FAST should be tested when becomes operational
3941
# define PRECISION_CLFFT CLFFT_DOUBLE
4042
#elif defined(CLFFT_APPLE)
@@ -318,8 +320,15 @@ void fftX(const int isign)
318320
#elif defined(FFT_TEMPERTON)
319321
int nn=gridX,inc=1,jump=nn,lot=boxY;
320322
size_t z;
321-
323+
/* Calls to Temperton FFT cause warnings for translation from doublecomplex to double pointers. However, such a cast
324+
* is perfectly valid in C99. So we set pragmas to remove these warnings.
325+
*
326+
* !!! TODO: Another (ultimate) solution is to remove this routine altogether, since FFTW is perfect in all
327+
* respects. This is also reasonable considering future switch to tgmath.h
328+
*/
329+
IGNORE_WARNING(-Wstrict-aliasing);
322330
for (z=0;z<3*local_Nz;z++) cfft99_((double *)(Xmatrix+z*gridX*smallY),work,trigsX,ifaxX,&inc,&jump,&nn,&lot,&isign);
331+
STOP_IGNORE;
323332
#endif
324333
}
325334

@@ -343,7 +352,9 @@ void fftY(const int isign)
343352
#elif defined(FFT_TEMPERTON)
344353
int nn=gridY,inc=1,jump=nn,lot=3*gridZ;
345354

355+
IGNORE_WARNING(-Wstrict-aliasing);
346356
cfft99_((double *)(slices_tr),work,trigsY,ifaxY,&inc,&jump,&nn,&lot,&isign);
357+
STOP_IGNORE;
347358
#endif
348359
}
349360

@@ -367,7 +378,9 @@ void fftZ(const int isign)
367378
#elif defined(FFT_TEMPERTON)
368379
int nn=gridZ,inc=1,jump=nn,lot=boxY,Xcomp;
369380

381+
IGNORE_WARNING(-Wstrict-aliasing);
370382
for (Xcomp=0;Xcomp<3;Xcomp++) cfft99_((double *)(slices+gridYZ*Xcomp),work,trigsZ,ifaxZ,&inc,&jump,&nn,&lot,&isign);
383+
STOP_IGNORE;
371384
#endif
372385
}
373386

@@ -382,7 +395,9 @@ static void fftX_Dm(const size_t lengthZ ONLY_FOR_TEMPERTON)
382395
int nn=gridX,inc=1,jump=nn,lot=D2sizeY,isign=FFT_FORWARD;
383396
size_t z;
384397

398+
IGNORE_WARNING(-Wstrict-aliasing);
385399
for (z=0;z<lengthZ;z++) cfft99_((double *)(D2matrix+z*gridX*D2sizeY),work,trigsX,ifaxX,&inc,&jump,&nn,&lot,&isign);
400+
STOP_IGNORE;
386401
#endif
387402
}
388403

@@ -396,7 +411,9 @@ static void fftY_Dm(void)
396411
#elif defined(FFT_TEMPERTON)
397412
int nn=gridY,inc=1,jump=nn,lot=gridZ,isign=FFT_FORWARD;
398413

414+
IGNORE_WARNING(-Wstrict-aliasing);
399415
cfft99_((double *)slice_tr,work,trigsY,ifaxY,&inc,&jump,&nn,&lot,&isign);
416+
STOP_IGNORE;
400417
#endif
401418
}
402419

@@ -410,7 +427,9 @@ static void fftZ_Dm(void)
410427
#elif defined(FFT_TEMPERTON)
411428
int nn=gridZ,inc=1,jump=nn,lot=gridY,isign=FFT_FORWARD;
412429

430+
IGNORE_WARNING(-Wstrict-aliasing);
413431
cfft99_((double *)slice,work,trigsZ,ifaxZ,&inc,&jump,&nn,&lot,&isign);
432+
STOP_IGNORE;
414433
#endif
415434
}
416435

@@ -531,6 +550,9 @@ static void fftInitAfterD(void)
531550
* completely separate code is used for OpenCL and FFTW3, because even precise-timing output is significantly different.
532551
* In particular, FFTW3 uses separate plans for forward and backward, while clFFT (by Apple or AMD) uses one plan for
533552
* both directions.
553+
*
554+
* clFft access the OpenCL buffers directly, so they are not anyhow affected by the definition of complex numbers in the
555+
* main part of the code (although, it is consistent with it)
534556
*/
535557
{
536558
#ifdef OPENCL
@@ -777,7 +799,7 @@ void InitDmatrix(void)
777799
CREATE_CL_BUFFER(bufslices,CL_MEM_READ_WRITE,gridYZ*3*sizeof(doublecomplex),NULL);
778800
CREATE_CL_BUFFER(bufslices_tr,CL_MEM_READ_WRITE,gridYZ*3*sizeof(doublecomplex),NULL);
779801
/* The following are constant device buffers which are initialized with host data. But bufDmatrix is initialized in
780-
* the end of this function (to be compatible with prognosis. And bufcc_sqrt is initialized in InitCC, since it may
802+
* the end of this function (to be compatible with prognosis). And bufcc_sqrt is initialized in InitCC, since it may
781803
* change for every run of the iterative solver.
782804
*/
783805
CREATE_CL_BUFFER(bufcc_sqrt,CL_MEM_READ_ONLY,sizeof(cc_sqrt),NULL);

src/fort/propaesplibreintadda.f

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,14 @@ subroutine propaespacelibreintadda(Rij,k0a,arretecube,relreq,
2121
double precision k0a,arretecubem
2222
double precision x,y,z,arretecube,k0,xx0,yy0,zz0
2323
double precision Rij(3),result(12)
24+
c The structure of the result is the following:
25+
c Re(G11),Re(G12),Re(G13),Re(G22),Re(G23),Re(G33),Im(G11),...,Im(G33)
2426

2527
c Variables needs for the integration
2628
integer KEY, N, NF, NDIM, MINCLS, MAXCLS, IFAIL, NEVAL, NW
2729
parameter (nw=4000000,ndim=3,nf=12)
2830
double precision A(NDIM), B(NDIM), WRKSTR(NW)
29-
double precision ABSEST(NF), FINEST(NF), ABSREQ, RELREQ,err
31+
double precision ABSEST(NF), ABSREQ, RELREQ,err
3032

3133
double precision Id(3,3),Rab,Rvect(3)
3234

@@ -68,21 +70,16 @@ subroutine propaespacelibreintadda(Rij,k0a,arretecube,relreq,
6870
endif
6971

7072
call DCUHRE(NDIM,NF,A,B, MINCLS, MAXCLS, fonctionigtadda,
71-
$ ABSREQ,RELREQ,KEY,NW,0,finest,ABSEST,NEVAL,IFAIL, WRKSTR)
73+
$ ABSREQ,RELREQ,KEY,NW,0,result,ABSEST,NEVAL,IFAIL, WRKSTR)
7274

7375
do N = 1,NF
74-
FINEST(N)=FINEST(N)/arretecube/arretecube/arretecube
76+
result(N)=result(N)/arretecube/arretecube/arretecube
7577
enddo
7678

7779
if (ifail.ne.0) then
7880
write(*,*) 'IFAIL in IGT routine',IFAIL
7981
endif
8082

81-
do i = 1,6
82-
result(2*i-1)=finest(i)
83-
result(2*i)=finest(i+6)
84-
enddo
85-
8683
end
8784
c*************************************************************
8885
subroutine fonctionigtadda(ndim,zz,nfun,f)

src/function.h

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* File: function.h
22
* $Date:: $
3-
* Descr: function attributes
3+
* Descr: function attributes and compiler pragmas
44
*
55
* Copyright (C) 2006,2008,2010-2011,2013 ADDA contributors
66
* This file is part of ADDA.
@@ -17,7 +17,6 @@
1717
#ifndef __function_h
1818
#define __function_h
1919

20-
2120
// attribute options for GCC compilers (Intel compiler may also recognize them)
2221
#ifdef __GNUC__
2322
// sets a macro for testing GCC version (copied from _mingw.h)
@@ -26,6 +25,16 @@
2625
(__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
2726
# else
2827
# define GCC_PREREQ(major, minor) 0
28+
# endif
29+
// pragmas to ignore warnings
30+
# if GCC_PREREQ(4,6)
31+
# define DO_PRAGMA(x) _Pragma (#x)
32+
# define IGNORE_WARNING(x) DO_PRAGMA(GCC diagnostic ignored #x)
33+
# // assume that push is not used anywhere
34+
# define STOP_IGNORE _Pragma ("GCC diagnostic pop")
35+
# else
36+
# define IGNORE_WARNING(x)
37+
# define STOP_IGNORE
2938
# endif
3039
// The following chooses between __printf__ and __gnu_printf__ attributes
3140
# if GCC_PREREQ(4,4)
@@ -49,11 +58,19 @@
4958
# define ATT_NORETURN __attribute__ ((__noreturn__))
5059
# define ATT_UNUSED __attribute__ ((__unused__))
5160
#else
61+
# define IGNORE_WARNING(x)
62+
# define STOP_IGNORE
5263
# define ATT_PRINTF(a,b)
5364
# define ATT_PURE
5465
# define ATT_MALLOC
5566
# define ATT_NORETURN
5667
# define ATT_UNUSED
5768
#endif
5869

70+
#ifdef __ICC
71+
# define LARGE_LOOP _Pragma ("loop_count (10000)")
72+
#else
73+
# define LARGE_LOOP
74+
#endif
75+
5976
#endif // __function_h

src/interaction.c

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,18 @@ static inline __m128d accImExp_pd(const double x)
138138

139139
static inline doublecomplex accImExp(const double x)
140140
{
141+
/* Here and further in the SSE3 part it is assumed that doublecomplex is equivalent to two doubles (that is
142+
* specified by the C99 standard). Explicit pointer casts have been put in place, and pragmas to ignore remaining
143+
* warnings from strict aliasing.
144+
*
145+
* !!! TODO: SSE3 code is a nice hack. But it should be considered carefully - is it worth it? In particular, it
146+
* seems that only parts of it are really beneficial (like tabulated evaluation of imaginary exponents), and those
147+
* can be incorporated into the main code (using standard C99 only).
148+
*/
141149
doublecomplex c;
142-
_mm_store_pd(&c,accImExp_pd(x));
150+
IGNORE_WARNING(-Wstrict-aliasing);
151+
_mm_store_pd((double *)(&c),accImExp_pd(x));
152+
STOP_IGNORE;
143153
return c;
144154
}
145155

@@ -155,26 +165,30 @@ static void CalcInterTerm_core(const double kr,const double kr2,const double inv
155165
const __m128d v1 = _mm_set_pd(kr,t3);
156166
const __m128d v2 = _mm_set_pd(t2,t1);
157167
__m128d qff,im_re;
158-
_mm_store_pd(expval,sc);
168+
IGNORE_WARNING(-Wstrict-aliasing);
169+
_mm_store_pd((double *)expval,sc);
170+
STOP_IGNORE;
159171

160172
#undef INTERACT_MUL
161173
#define INTERACT_DIAG(ind) { \
162174
qff = _mm_set1_pd(qmunu[ind]); \
163175
im_re = _mm_add_pd(v1,_mm_mul_pd(v2,qff)); \
164176
im_re = cmul(sc,im_re); \
165-
_mm_store_pd(result+ind,im_re); }
177+
_mm_store_pd((double *)(result+ind),im_re); }
166178
#define INTERACT_NONDIAG(ind) { \
167179
qff = _mm_set1_pd(qmunu[ind]); \
168180
im_re = _mm_mul_pd(v2,qff); \
169181
im_re = cmul(sc,im_re); \
170-
_mm_store_pd(result+ind,im_re); }
182+
_mm_store_pd((double *)(result+ind),im_re); }
171183

184+
IGNORE_WARNING(-Wstrict-aliasing);
172185
INTERACT_DIAG(0); // xx
173186
INTERACT_NONDIAG(1); // xy
174187
INTERACT_NONDIAG(2); // xz
175188
INTERACT_DIAG(3); // yy
176189
INTERACT_NONDIAG(4); // yz
177190
INTERACT_DIAG(5); // zz
191+
STOP_IGNORE;
178192

179193
#undef INTERACT_DIAG
180194
#undef INTERACT_NONDIAG
@@ -716,12 +730,18 @@ void CalcInterTerm_igt(const int i,const int j,const int k,doublecomplex result[
716730
double qvec[3],qmunu[6]; // unit directional vector {qx,qy,qz} and its outer-product {qxx,qxy,qxz,qyy,qyz,qzz}
717731
double rn,invrn,invr3,kr,kr2; // |R/d|, 1/|R/d|, |R|^-3, kR, (kR)^2
718732
doublecomplex expval; // exp(ikR)/|R|^3
719-
double rtemp[3];
733+
double rtemp[3],tmp[12];
734+
int comp;
720735

721736
CalcInterParams1(i,j,k,qvec,&rn);
722737
if (igt_lim==UNDEF || rn<=igt_lim) {
723738
vMultScal(gridspace,qvec,rtemp);
724-
propaespacelibreintadda_(rtemp,&WaveNum,&gridspace,&igt_eps,(double *)result);
739+
/* passing complex vectors from Fortran to c is not necessarily portable (at least requires extra effort in
740+
* the Fortran code. So we do it through double. This is not bad for performance, since double is anyway used
741+
* internally for integration in this Fortran routine.
742+
*/
743+
propaespacelibreintadda_(rtemp,&WaveNum,&gridspace,&igt_eps,tmp);
744+
for (comp=0;comp<6;comp++) result[comp] = tmp[comp] + I*tmp[comp+6];
725745
}
726746
else {
727747
// The following is equivalent to CalcInterTerm_poi, except for the 1st part of initialization performed above
@@ -830,7 +850,7 @@ void InitInteraction(void)
830850
#ifndef NO_FORTRAN
831851
case G_IGT: CalcInterTerm = &CalcInterTerm_igt; break;
832852
#endif
833-
default: LogError(ONE_POS, "Invalid interaction term calculation method: %d",IntRelation);
853+
default: LogError(ONE_POS, "Invalid interaction term calculation method: %d",(int)IntRelation);
834854
// no break
835855
}
836856
// read tables if needed

src/iterative.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ struct iter_params_struct {
9292
int vec_N; // number of additional vectors to describe the state
9393
void (*func)(const enum phase); // pointer to implementation of the iterative solver
9494
};
95-
static doublecomplex dumb; // dumb variable, used in workaround for issue 146
95+
static doublecomplex dumb ATT_UNUSED; // dumb variable, used in workaround for issue 146
9696

9797
#define ITER_FUNC(name) static void name(const enum phase ph)
9898

@@ -340,6 +340,10 @@ ITER_FUNC(BCGS2)
340340
* so we use l=2 here. In many cases one iteration of this method is similar to two iterations of BiCGStab, but overall
341341
* convergence is slightly better.
342342
* Breakdown tests were made to coincide with that for BiCGStab for l=1.
343+
*
344+
* !!! This iterative solver produces segmentation fault when compiled with icc 11.1. Probably that is related to
345+
* issue 146. But we leave it be (assume that this is a compiler bug). Even if someone uses this compiler, he can
346+
* live fine without this iterative solver.
343347
*/
344348
{
345349
#define LL 2 // potentially the method will also work for l=1 (but memory allocation and freeing need to be adjusted)

0 commit comments

Comments
 (0)