adda-team
diff --git a/‎src/GenerateB.c‎
Lines changed: 1 addition & 1 deletion b/‎src/GenerateB.c‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/calculator.c‎
Lines changed: 8 additions & 3 deletions b/‎src/calculator.c‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎src/comm.c‎
Lines changed: 4 additions & 4 deletions b/‎src/comm.c‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/comm.h‎
Lines changed: 1 addition & 1 deletion b/‎src/comm.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/const.h‎
Lines changed: 1 addition & 1 deletion b/‎src/const.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/crosssec.c‎
Lines changed: 9 additions & 2 deletions b/‎src/crosssec.c‎
Lines changed: 9 additions & 2 deletions
@@ -43,7 +43,7 @@ extern const int beam_Npars;
 extern const double beam_pars[];
 extern const char *beam_fnameY;
 extern const char *beam_fnameX;
-extern opt_index opt_beam;
+extern const opt_index opt_beam;
 
 // used in crosssec.c
 double beam_center_0[3]; // position of the beam center in laboratory reference frame
 
@@ -43,7 +43,7 @@ extern const angle_set beta_int,gamma_int,theta_int,phi_int;
 extern const int avg_inc_pol;
 extern const char *alldir_parms,*scat_grid_parms;
 // defined and initialized in timing.c
-extern TIME_TYPE Timing_Init;
+extern TIME_TYPE Timing_Init,Timing_Init_Int;
 #ifdef OPENCL
 extern TIME_TYPE Timing_OCL_Init;
 #endif
@@ -445,14 +445,17 @@ static void AllocateEverything(void)
 	/* estimate of the memory (only the fastest scaling part):
 	 * MatVec - (288+384nprocs/boxX [+192/nprocs])*Ndip
 	 *          more exactly: gridX*gridY*gridZ*(36+48nprocs/boxX [+24/nprocs]) value in [] is only for parallel mode.
-	 *          For OpenCL mode all MatVec part is allocated on GPU instead of main (CPU) memory
+	 * For surf additionally: gridX*gridY*gridZ*(48+48nprocs/boxX)
+	 * 			+ for Sommerfeld table: 64*boxZ*(boxX*boxY-(MIN(boxX,boxY))^2)
+	 *    For OpenCL mode all MatVec part is allocated on GPU instead of main (CPU) memory
 	 * others - nvoid_Ndip*{271(CGNR,BiCG), 367(CSYM,QMR2), 415(BiCGStab,QMR), or 463(BCGS2)}
 	 *          + additional 8*nvoid_Ndip for OpenCL mode and CGNR or Bi-CGSTAB
 	 * PARALLEL: above is total; division over processors of MatVec is uniform, others - according to local_nvoid_Ndip
 	 *
 	 * Sparse mode - each processor needs (265--457, depending on iterative solver)*local_nvoid_Ndip + 60*nvoid_Ndip
 	 *               and division is uniform, i.e. local_nvoid_Ndip = nvoid_Ndip/nprocs
-	 *               Part of the memory is currently not distributed among processors - see issue 160.
+	 *               Sommerfeld table - same as above, but it is not divided among processors.
+	 *               Part of the memory is currently not distributed among processors - see issues 160,175.
 	 */
 	MAXIMIZE(memPeak,memory);
 	double memSum=AccumulateMax(memPeak,&memmax);
@@ -592,7 +595,9 @@ void Calculator (void)
 	else dtheta_deg=dtheta_rad=block_theta=0;
 	finish_avg=false;
 	// Do preliminary setup for MatVec
+	TIME_TYPE startInitInt=GET_TIME();
 	InitInteraction();
+	Timing_Init_Int=GET_TIME()-startInitInt;
 #ifndef SPARSE
 	// initialize D matrix (for matrix-vector multiplication)
 	D("InitDmatrix started");
 
@@ -676,10 +676,10 @@ void BlockTranspose(doublecomplex * restrict X UOIP,TIME_TYPE *timing UOIP)
 
 //======================================================================================================================
 
-void BlockTranspose_Dm(doublecomplex * restrict X UOIP,const size_t lengthY UOIP,const size_t lengthZ UOIP)
-/* do the data-transposition, i.e. exchange, between fftX and fftY&fftZ; specialized for D matrix. It can be updated to
- * accept timing argument for generality. But, since this is a specialized function, we keep the timing variable
- * hard-wired in the code.
+void BlockTranspose_DRm(doublecomplex * restrict X UOIP,const size_t lengthY UOIP,const size_t lengthZ UOIP)
+/* do the data-transposition, i.e. exchange, between fftX and fftY&fftZ; specialized for D or R matrix. It can be
+ * updated to accept timing argument for generality. But, since this is a specialized function, we keep the timing
+ * variable hard-wired in the code.
  */
 {
 #ifdef ADDA_MPI
 
@@ -45,7 +45,7 @@ void ReadField(const char * restrict fname,doublecomplex *restrict field);
 
 #ifndef SPARSE
 void BlockTranspose(doublecomplex * restrict X,TIME_TYPE *timing);
-void BlockTranspose_Dm(doublecomplex * restrict X,size_t lengthY,size_t lengthZ);
+void BlockTranspose_DRm(doublecomplex * restrict X,size_t lengthY,size_t lengthZ);
 // used by granule generator
 void SetGranulComm(double z0,double z1,double gdZ,int gZ,size_t gXY,size_t buf_size,int *lz0,int *lz1,int sm_gr);
 void CollectDomainGranul(unsigned char * restrict dom,size_t gXY,int lz0,int locgZ,TIME_TYPE *timing);
 
@@ -18,7 +18,7 @@
 #define __const_h
 
 // version number (string)
-#define ADDA_VERSION "1.3a2"
+#define ADDA_VERSION "1.3b1"
 
 /* ADDA uses certain C99 extensions, which are widely supported by GNU and Intel compilers. However, they may be not
  * completely supported by e.g. Microsoft Visual Studio compiler. Therefore, we check the version of the standard here
 
@@ -631,6 +631,11 @@ static void CalcFieldSurf(doublecomplex ebuff[static restrict 3], // where to wr
 	if (above) { // simple reflection
 		vCopy(nF,nN);
 		nN[2]*=-1;
+		// no scattering at exactly 90 degrees for non-trivial surface (to avoid randomness for this case)
+		if (fabs(nN[2])<ROUND_ERR && cabs(msub-1)>ROUND_ERR) {
+			cvInit(ebuff);
+			return;
+		}
 	}
 	else { // transmission
 		if (msubInf) { // no transmission for perfectly reflecting substrate => zero result
@@ -738,11 +743,13 @@ static void CalcFieldSurf(doublecomplex ebuff[static restrict 3], // where to wr
 		kt=NAN; // redundant to remove warnings below
 	}
 	else {
-		if (cabs(msub-1)<ROUND_ERR && fabs(ki)<ROUND_ERR) kt=ki; // special case to avoid randomness due to round-off errors
+		// special case to avoid randomness due to round-off errors
+		if (cabs(msub-1)<ROUND_ERR && fabs(ki)<ROUND_ERR) kt=ki;
 		else kt=cSqrtCut(msub*msub - (nN[0]*nN[0]+nN[1]*nN[1]));
 		if (above) {
 			/* here we test only the exact zero, since for other cases (when msub=1, and very small values of ki,kt) the
-			 * above assignment kt=ki guarantees correct results through standard functions
+			 * above assignment kt=ki guarantees correct results through standard functions. Also the case of
+			 * 90deg-scattering (for msub!=1) is taken care of in the beginning of this function.
 			 */
 			if (ki==0 && kt==0) cs=cp=0;
 			else {