HPC Magazine February 2014 - Discovering OpenACC 2.0 - part II
Listing 4: Corrected packing function using an atomic capture.
#define NB 1000000 int pack( const float X[NB] , float Y[1000] ) { int i ; n = 0 ; #pragma acc kernels copy(n) copyin(X[0:NB]) copyout(Y[0:1000]) { #pragma acc loop independent for (i=0;i<NB;++) { if ( X[i] != 0.0f ) { int oldn ; #pragma acc atomic capture { oldn = n ; n = n + 1 ; } if ( oldn < 1000 ) { Y[oldn] = X[i] ; } } } } return n ; }