HPC Magazine February 2014 - Discovering OpenACC 2.0 - part II

Listing 3: Poor implementation of a sparse vector packing function.


#define NB 1000000
int pack(  const float X[NB] , float Y[1000] ) 
{
  int i ; 
  n = 0 ; 
  #pragma acc kernels copy(n) copyin(X[0:NB]) copyout(Y[0:1000])
  {
    #pragma acc loop independent 
    for (i=0;i<NB;++) {
      if ( X[i] != 0.0f ) {
        if ( n < 1000 ) {
          Y[n] = X[i] ;  
        }
        /*  bug: The n values above may actually be different   
         *  than the ones below.
         */             
        #pragma acc atomic update 
        n = n + 1 ;  
      }
    }
  }
  return n ;  
}