#version 450 core

#define HIRES
#ifdef HIRES
#define HW 18
#else
#define HW 16
#endif

layout(local_size_x=1) in;

layout(std430,binding=3) buffer FFWeightBuf { float w[]; } wbuf;

uniform CtlBlock {
    int   stage, step, width, height, N, H, nrows, ncols, first, txts;
    uint  nelem, p0, mi, nnz;
    bool  reverse;
    float C;
    vec3  colour;
  } ctl;

#define WPIX(I,J) wbuf.w[(J)*6*HW+(I)]

void ComputeFFWeights ( uvec3 u )
{
  vec3  p;
  float d, c, s, f;
  uint  i0, i1, i2, i3, j0, j1;

  i0 = HW-1-u.x;  i1 = HW+u.x;
  j0 = HW-1-u.y;  j1 = HW+u.y;
  p = vec3 ( (float(u.x)+0.5)/float(HW),
             (float(u.y)+0.5)/float(HW), ctl.C );
  d = dot ( p, p );
  f = /*ctl.C**/ctl.C/(/*float(HW*HW)**/d*d);
  WPIX(i0,j0) = WPIX(i0,j1) = WPIX(i1,j0) = WPIX(i1,j1) = f;
  i0 = 3*HW-1-u.x;  i1 = 3*HW+u.x;
  i2 = i0+2*HW;  i3 = i1+2*HW;
  j0 = u.y;  j1 = HW+u.y;
  p = vec3 ( 1.0, (float(u.x)+0.5)/float(HW),
             ctl.C*(float(u.y)+0.5)/float(HW) );
  d = dot ( p, p );
  f = /*ctl.C**/p.z/(/*float(HW*HW)**/d*d);
  WPIX(i0,j0) = WPIX(i1,j0) = WPIX(i2,j0) = WPIX(i3,j0) =
  WPIX(i0,j1) = WPIX(i1,j1) = WPIX(i2,j1) = WPIX(i3,j1) = f;
} /*ComputeFFWeights*/

void main ( void )
{
  uvec3 inv;
  uint  i;

  inv = gl_GlobalInvocationID;
  switch ( ctl.stage ) {
case 0:
    ComputeFFWeights ( inv );
    return;
case 1:  /* kopiowanie do sumowania */
    wbuf.w[ctl.H+inv.x] = wbuf.w[inv.x];
    return;
case 2:  /* sumowanie parami */
    if ( (i = inv.x+(ctl.N+1)/2) < ctl.N )
      wbuf.w[ctl.H+inv.x] += wbuf.w[ctl.H+i];
    return;
case 3:  /* normalizacja */
    wbuf.w[inv.x] /= wbuf.w[ctl.N];
    return;
default:
    return;
  }
} /*main*/

