#version 450 core

#define HIRES
#define RESTART_IND_UINT 0xFFFFFFFF
#ifdef HIRES
#define FFTXTSIZE 36    
#else
#define FFTXTSIZE 32
#endif
#define FFTXTPIX         (3*FFTXTSIZE*FFTXTSIZE)

layout(local_size_x=1) in;

layout(std430,binding=1) buffer FFBlockRC { uint rc[]; } rc;
layout(std430,binding=2) buffer FFBlockA { float a[]; } a;
layout(std430,binding=3) buffer FFWeightBuf { float w[]; } wbuf;
layout(std430,binding=4) buffer FFOnesBuf { uint k[]; } obuf;
layout(std430,binding=5) buffer FFCSBuf { uint s[]; } csbuf;
layout(std430,binding=7) buffer FFPixBuf { uint pix[]; } pxbuf;

uniform CtlBlock {
    int   stage, step, width, height, N, H, nrows, ncols, first, txts;
    uint  nelem, p0, mi, nnz;
    bool  reverse;
    float C;
    vec3  colour;
  } ctl;

void PrefixSum ( uint i )
{
  uint ii, m0, m1, ia, ib;

  ii = i+i;  m0 = 0x01 << ctl.step;  m1 = m0-1;
  ia = (ii & ~m0) | m1;
  if ( (ib = ia + (i & m1) + 1) < ctl.H )
    csbuf.s[ib+1] += csbuf.s[ia+1];
} /*PrefixSum*/

uint BinSearch ( uint i )
{
  uint j, k, l;

  if ( obuf.k[0]-1 >= i )
    return 0;
  else {
    for ( j = 0, k = ctl.N;  k-j > 1; ) {
      l = j + (k-j)/2;
      if ( obuf.k[l]-1 >= i ) k = l; else j = l;
    }
    return k;
  }
} /*BinSearch*/

void main ( void )
{
  uvec3 inv;
  uint  i, n0;
  float s;

  inv = gl_GlobalInvocationID;
  switch ( ctl.stage ) {
case 0:  /* przepisywanie liczb niezerowych wspolczynnikow w wierszach */
    csbuf.s[inv.x] = inv.x > 0 ? obuf.k[inv.x*FFTXTPIX-1] : 0;
    return;
case 1:  /* sumy prefiksowe */
    PrefixSum ( inv.x );
    return;
case 2:
    obuf.k[inv.x*FFTXTPIX+inv.y] += rc.rc[inv.x];
    return;
case 3:  /* znajdowanie poczatkow podciagow wag do zsumowania */
    n0 = BinSearch ( inv.x );
         /* sumowanie wag */
    if ( obuf.k[n0]-1 == inv.x ) {
      s = wbuf.w[FFTXTPIX+n0];
      for ( i = n0+1;  i % FFTXTPIX > 0 && obuf.k[i]-1 == inv.x;  i++ )
        s += wbuf.w[FFTXTPIX+i];
      a.a[inv.x] = s;
      rc.rc[ctl.H+1+inv.x] = pxbuf.pix[n0];
    }
    return;
default:
    return;
  }
} /*main*/

