#version 450 core

#define RESTART_IND_UINT 0xFFFFFFFF

layout(local_size_x=1) in;

layout(std430,binding=2) buffer Lbuf { vec4 l[]; } l;
layout(std430,binding=9) buffer VMap { uvec2 px[]; } vmap;

layout(rgba32f,binding=1) uniform image2D irradtxt;

uniform CtlBlock {
    int   stage, step, width, height, N, H, nrows, ncols, first, txts;
    uint  nelem, p0, mi, nnz;
    bool  reverse;
    float C;
    vec3  colour;
  } ctl;

const ivec2 d[8] =
   {ivec2( 1, 0), ivec2( 1, 1), ivec2( 0, 1), ivec2(-1, 1),
    ivec2(-1, 0), ivec2(-1,-1), ivec2( 0,-1), ivec2( 1,-1)};

void main ( void )
{
  ivec2 xy;
  uint  z, el;
  vec4  tx;
  int   i, j;

  xy = ivec2 ( gl_GlobalInvocationID.xy );
  z = xy.y*ctl.width + xy.x;
  el = vmap.px[z].x;
  switch ( ctl.stage ) {
case 0:
    if ( el != RESTART_IND_UINT )
      imageStore ( irradtxt, xy, l.l[el] );
    else
      imageStore ( irradtxt, xy, vec4(0.0) );
    return;
case 1:
    if ( el == RESTART_IND_UINT ) {
      for ( i = j = 0, tx = vec4(0.0);  i < 8;  i++ )
        if ( vmap.px[(xy.y+d[i].y)*ctl.width + (xy.x+d[i].x)].x
             != RESTART_IND_UINT ) {
          tx += imageLoad ( irradtxt, xy+d[i] );
          j ++;
        }
      if ( j > 0 )
        imageStore ( irradtxt, xy, tx/float(j) );
    }
    return;
  }
} /*main*/

