HPC Magazine February 2014 - Discovering OpenACC 2.0 - part II
Listing 10: A possible "no-tile" translation of the code in Listing 9.
#pragma acc loop gang collapse(2) for (ii=0 ; ii<1024; ii+=16) for (jj=0 ; ji<512; jj+=8) #pragma acc loop worker num_workers(64) collapse(2) for (i=ii ; i<ii+16; i++) for (j=jj ; j<jj+8; j++) { X[i][j] = c11*Y[i-1][j-1] + c12*Y[i-1][j-1] + c13*Y[i-1][j-1] + c21*Y[i][j-1] + c22*Y[i][j] + c23*Y[i][j-1] + c31*Y[i+1][j-1] + c32*Y[i-1][j-1] + c33*Y[i+1][j-1] ; }