HPC Magazine February 2014 - Discovering OpenACC 2.0 - part II

Listing 4: Corrected packing function using an atomic capture.


#define NB 1000000
int pack( const float X[NB] , float Y[1000] ) 
{
  int i ; 
  n = 0 ; 
  #pragma acc kernels copy(n) copyin(X[0:NB]) copyout(Y[0:1000])
  {
    #pragma acc loop independent 
    for (i=0;i<NB;++) {
      if ( X[i] != 0.0f ) {
        int oldn ;
        #pragma acc atomic capture
        {
        oldn = n ; 
        n = n + 1 ;  
        } 
        if ( oldn < 1000 ) {
          Y[oldn] = X[i] ;  
        }
      }
    }
  }
  return n ;  
}