Skip to content

[ICE] GIMPLE full redundancy elimination pass #11

@nmnobre

Description

@nmnobre

When compiled with anything above -O0, the following code:

#include "../common/common.h"
#include "../common/sync.h"

#include <math.h>
#include <stdio.h>
#include <string.h>
#define ceild(n, d) ceil(((double)(n)) / ((double)(d)))
#define floord(n, d) floor(((double)(n)) / ((double)(d)))
#define max(x, y) ((x) > (y) ? (x) : (y))
#define min(x, y) ((x) < (y) ? (x) : (y))

#define N (10000 + 1)
#define I_MAX (10000)
#define Bi 125
#define Bj 113

#define boundary_left 10
#define boundary_right 0
#define bulk 5

void relaxGS(double *P)
{
	double stream[N] __attribute__((stream)); // a stream per matrix element
	
	double viewR, viewW;

	for (int j = 0; j < N; ++j)
	{
		#pragma omp task output (stream[j] << viewW)
		{
			viewW = P[j];
		}
	}

	#pragma scop
	for (int J = 0; J <= floord(I_MAX - 1 + N - 2, Bj); ++J)
	{
		for (int I = max(0, ceild(Bj * J - (Bi - 1) - (N - 2), Bi)); I <= min(floord(I_MAX - 1, Bi), floord(Bj * J + (Bj - 1) - 1, Bi)); ++I)
		{
			int lbs = max(max(1, (J * Bj - I * Bi) - (Bi - 1)), J * Bj - (I_MAX - 1));
			int ubs = min(N - 2, (J * Bj - I * Bi) + (Bj - 1));

			double stream_refL[1] __attribute__((stream_ref));
			memcpy(stream_refL, ((void**)stream) + lbs - 1, 1 * sizeof(void*));

			double stream_refC[ubs - lbs + 1] __attribute__((stream_ref));
			memcpy(stream_refC, ((void**)stream) + lbs, (ubs - lbs + 1) * sizeof(void*));

			double stream_refR[1] __attribute__((stream_ref));
			memcpy(stream_refR, ((void**)stream) + ubs + 1, 1 * sizeof(void*));
			
			double viewRC[ubs - lbs + 1][1], viewWC[ubs - lbs + 1][1];
			double viewRL[1][1];
			double viewRR[1][1];

			#pragma omp task \
			input(stream_refL >> viewRL[1][0], \
				  stream_refC >> viewRC[ubs - lbs + 1][1], \
				  stream_refR >> viewRR[1][0]) \
			output(stream_refC << viewWC[ubs - lbs + 1][1])
			{
				int counterRL[1];
				memset(counterRL, 0, sizeof(counterRL));

				int counterRC[ubs - lbs + 1];
				memset(counterRC, 0, sizeof(counterRC));

				int counterRR[1];
				memset(counterRR, 0, sizeof(counterRR));

				int counterWC[ubs - lbs + 1];
				memset(counterWC, 0, sizeof(counterWC));

				int lbr = max(Bi * I, Bj * J - (N - 2));
				int ubw = min(min(Bj * J + (Bj - 1) - 1 + 1, I_MAX - 1 + 1), Bi * I + (Bi - 1) + 1);

				double data[(ubs + 1) - (lbs - 1) + 1][ubw - lbr + 1];

				int lbj = max(1 + Bi * I, Bj * J);
				int ubj = min(I_MAX - 1 + N - 2, Bj * J + (Bj - 1));

				// unloading streams
				for (int j = lbj; j <= ubj; j++)
					for (int i = max(Bi * I, j - (N - 2)); i <= min(min(j - 1, I_MAX - 1), Bi * I + (Bi - 1)); i++)
					{
						if (j - i - 1 == lbs - 1 && (j - i == 1 || floord(j, Bj) - floord(j - 1, Bj)))
							data[j - i  - 1 - (lbs - 1)][i + 1 - lbr] = viewRL[j - i - 1 - (lbs - 1)][counterRL[j - i - 1 - (lbs - 1)]]; // peek
						if (floord(i, Bi) - floord(i - 1, Bi) || floord(j, Bj) - floord(j - 1, Bj))
							data[j - i - (lbs - 1)][i - lbr] = viewRC[j - i - lbs][counterRC[j - i - lbs]++]; // read
						if (j - i + 1 == ubs + 1 && (j - i == N - 2 || floord(i, Bi) - floord(i - 1, Bi)))
							data[j - i  + 1 - (lbs - 1)][i - lbr] = viewRR[j - i + 1 - (ubs + 1)][counterRR[j - i + 1 - (ubs + 1)]]; //peek
					}		
				
				// computational body
				for (int j = lbj; j <= ubj; j++)
					for (int i = max(Bi * I, j - (N - 2)); i <= min(min(j - 1, I_MAX - 1), Bi * I + (Bi - 1)); i++)
						data[j - i - (lbs - 1)][i + 1 - lbr] = (data[j - i - 1 - (lbs - 1)][i + 1 - lbr] + data[j - i + 1 - (lbs - 1)][i - lbr]) / 2;

				// loading streams
				for (int j = lbj; j <= ubj; j++)
					for (int i = max(Bi * I, j - (N - 2)); i <= min(min(j - 1, I_MAX - 1), Bi * I + (Bi - 1)); i++)
						if (i == I_MAX - 1 || floord(i + 1, Bi) - floord(i, Bi) || floord(j + 1, Bj) - floord(j, Bj))
							viewWC[j - i - lbs][counterWC[j - i - lbs]++] = data[j - i - (lbs - 1)][i + 1 - lbr]; // write
			}
		}
	}
	#pragma endscop

	for (int j = 0; j < N; ++j)
	{
		#pragma omp task input (stream[j] >> viewR)
		{
			P[j]=viewR;
		}
	}

	#pragma omp taskwait // to prevent the control program to finish and thus to execute unmatched producers
}

double *buildMatrix()
{
	double *P = malloc(sizeof(double[N]));

	P[0] = boundary_left;
	for (int i = 1; i < N - 1; ++i)
		P[i] = bulk;
	P[N - 1] = boundary_right;

	return P;
}

void printMatrix(double *P)
{
	for (int i = 0; i < N; i++)
		printf("%.15f ", P[i]);

	printf("\n");
}

int main(int argc, char const *argv[])
{
	double *P = buildMatrix();

	struct timeval start;
	struct timeval end;
	gettimeofday(&start, NULL);

	relaxGS(P);

	gettimeofday(&end, NULL);
	printf("%.5f\n", tdiff(&end, &start));

	free(P);

	return 0;
}

produces the following error:

during GIMPLE pass: fre
GS_1D_OS_ATT.c: In function ‘relaxGS._wstream_df_workfn.1’:
GS_1D_OS_ATT.c:156:1: internal compiler error: in eliminate_stmt, at tree-ssa-sccvn.c:5147
  156 | }
      | ^
0xbeee6e eliminate_dom_walker::eliminate_stmt(basic_block_def*, gimple_stmt_iterator*)
	../../gcc/tree-ssa-sccvn.c:5147
0xbef157 eliminate_dom_walker::before_dom_children(basic_block_def*)
	../../gcc/tree-ssa-sccvn.c:5523
0x11988a4 dom_walker::walk(basic_block_def*)
	../../gcc/domwalk.c:312
0xbe73aa eliminate_with_rpo_vn(bitmap_head*)
	../../gcc/tree-ssa-sccvn.c:5701
0xbf4ecd do_rpo_vn
	../../gcc/tree-ssa-sccvn.c:6842
0xbf5f3c execute
	../../gcc/tree-ssa-sccvn.c:6913

When compiled with -O0, compilation succeeds but the program seems to crash with a multitude of errors depending on the number of execution threads.

Current workaround: use commit 5b68e14 or older, as these use GCC5.2.0.

Metadata

Metadata

Assignees

Labels

bugSomething isn't workingcompilerCompiler related issue

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions