
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <math.h>

#include "openglheader.h"

#include "utilities.h"
#include "meshes.h"
#include "meshespriv.h"
#include "GPUsparsemat.h"

static GLuint progid = 0;
static GLuint rbuf, rbbp;
static GLint  uvofs[16]; 

/* ////////////////////////////////////////////////////////////////////////// */
void LoadMeshRefinementMatrixProgram ( void )
{
  const GLchar *filename[] = { "mdm.comp.glsl" };
  const GLchar *uvnames[] =
    { "RefineBlock", "stage", "nsattr", "inv", "inhe",  "infac", "outnv",
      "outnhe", "outnfac", "invb", "inei", "fvf", "maxonv", "fvhe", "prN0",
      "prN", "prStep" };
  GLuint shader_id;
  GLint  size;

  shader_id = CompileShaderFiles ( GL_COMPUTE_SHADER, 1, &filename[0] );
  progid = LinkShaderProgram ( 1, &shader_id, "meshes refinement 0" );
  glDeleteShader ( shader_id );
/*PrintProgramResources ( prog->progid, "meshes refinement 0" );*/
  GetAccessToUniformBlock ( progid, 16, uvnames, &size, uvofs, &rbbp );
  glGenBuffers ( 1, &rbuf );
  glBindBufferBase ( GL_UNIFORM_BUFFER, rbbp, rbuf );
  glBufferData ( GL_UNIFORM_BUFFER, size, NULL, GL_DYNAMIC_DRAW );
  ExitIfGLError ( "LoadMeshRefinementMatrixProgram" );
} /*LoadMeshRefinementMatrixProgram*/

void DeleteMeshRefinementMatrixProgram ( void )
{
  glUseProgram ( 0 );
  if ( progid ) { glDeleteProgram ( progid );  progid = 0; }
  glDeleteBuffers ( 1, &rbuf );
  ExitIfGLError ( "DeleteMeshRefinementMatrixProgram" );
} /*DeleteMeshRefinementMatrixProgram*/

/* ////////////////////////////////////////////////////////////////////////// */
static void PrefixSum ( GLint *uvofs, GLuint N0, GLuint N )
{
  GLuint k, m, d;
  GLint  z = 0;

  SETUVAR ( 0, GLint, z )
  SETUVAR ( 13, GLuint, N0 )
  SETUVAR ( 14, GLuint, N )
  d = N/2;
  for ( k = 0, m = N-1;  m > 0;  k++, m >>= 1 ) {
    SETUVAR ( 15, GLuint, k )
    glDispatchCompute ( d, 1, 1 );
    glMemoryBarrier ( GL_SHADER_STORAGE_BARRIER_BIT );
  }
  ExitIfGLError ( "PrefixSum" );
} /*PrefixSum*/

static void SumUp ( GLint *uvofs, GLuint n0, GLuint n )
{
  GLint one = 1;

  SETUVAR ( 0, GLint, one )
  SETUVAR ( 13, GLuint, n0 )
  while ( n > 1 ) {
    SETUVAR ( 14, GLuint, n )
    glDispatchCompute ( n/2, 1, 1 );
    n = (n+1)/2;
    glMemoryBarrier ( GL_SHADER_STORAGE_BARRIER_BIT );
  }
  ExitIfGLError ( "SumUp" );
} /*SumUp*/

static void ExecStage ( GLint *uvofs, int stage, int gsize )
{
  SETUVAR ( 0, GLint, stage )
  glDispatchCompute ( gsize, 1, 1 );
  glMemoryBarrier ( GL_SHADER_STORAGE_BARRIER_BIT );
} /*ExecStage*/

/* ////////////////////////////////////////////////////////////////////////// */
char GPUmeshDoublingMatrix ( GPUmesh *inmesh, GPUmesh *outmesh,
                             MeshRefineMatrix *mm )
{
  int    inv, inhe, infac, invb, inei, onv, onhe, onfac, fvf, maxonv, fvhe;
  GLint  bufsize;
  GLuint auxbuf = 0, drc = 0, da = 0;

  glUseProgram ( progid );
  glBindBufferBase ( GL_UNIFORM_BUFFER, rbbp, rbuf );
  inv = inmesh->nv;  inhe = inmesh->nhe;  infac = inmesh->nfac;
  glBindBufferBase ( SSB, 1, inmesh->MVFBUF );
  glBindBufferBase ( SSB, 2, inmesh->MHEBUF );
  SETUVAR ( 1, GLint, inmesh->nsattr )
  SETUVAR ( 2, GLint, inv )
  SETUVAR ( 3, GLint, inhe )
  SETUVAR ( 4, GLint, infac )
  maxonv = inhe+2*inv;  SETUVAR ( 11, GLint, maxonv )
  bufsize = (3*inv + 4*inhe + infac + 3)*sizeof(GLint);
  glGenBuffers ( 1, &auxbuf );
  glBindBufferBase ( SSB, 0, auxbuf );
  glBufferData ( SSB, bufsize, NULL, GL_DYNAMIC_DRAW );
  ExecStage ( uvofs, 2, inv );     /* TagVertex */
  SumUp ( uvofs, 0, inv );
  glGetBufferSubData ( SSB, 0, sizeof(GLint), &invb );
  inei = (inhe-invb)/2;
  onv = inhe + 2*invb;     SETUVAR ( 5, GLint, onv )
  onhe = 8*(inei + invb);  SETUVAR ( 6, GLint, onhe )
  onfac = infac + inei + invb + inv;  SETUVAR ( 7, GLint, onfac )
  fvhe = onhe - 2*invb;    SETUVAR ( 12, GLint, fvhe )
  if ( !ReallocGPUmesh ( outmesh, onv, onhe, onfac, inmesh->nsattr,
                         inmesh->pdim, inmesh->pofs, inmesh->nvofs ) )
    goto failure;
  glBindBufferBase ( SSB, 4, outmesh->MVFBUF );
  glBindBufferBase ( SSB, 5, outmesh->MHEBUF );
  glGenBuffers ( 1, &drc );
  glBindBufferBase ( SSB, 3, drc );
  glBufferData ( SSB, (onv+onv+1)*sizeof(GLint), NULL, GL_DYNAMIC_DRAW );
  glGenBuffers ( 1, &da );
  glBindBufferBase ( SSB, 6, da );
  glBufferData ( SSB, onv*sizeof(GLfloat), NULL, GL_DYNAMIC_DRAW );
  glBindBufferBase ( SSB, 0, auxbuf );
  ExecStage ( uvofs, 3, inhe );    /* DSetECN */
  PrefixSum ( uvofs, maxonv, inhe+1 );
  ExecStage ( uvofs, 4, inv );     /* DSetVCN */
  PrefixSum ( uvofs, maxonv+inhe+1, inv+1 );
  ExecStage ( uvofs, 5, inv );     /* DCopyVC */
  ExecStage ( uvofs, 6, onv-1 );   /* DSetOVdeg */
  PrefixSum ( uvofs, 0, onv );
  ExecStage ( uvofs, 7, onv );     /* DSetOVfhe */
  ExecStage ( uvofs, 8, infac );   /* DSetWLF */
  ExecStage ( uvofs, 9, inhe );    /* DSetEFN1 */
  PrefixSum ( uvofs, maxonv+inhe+inv+2, inhe );
  ExecStage ( uvofs, 10, inhe );   /* DSetEFN2 */
  glBindBuffer ( GL_COPY_READ_BUFFER, inmesh->MVFBUF );
  glBindBuffer ( GL_COPY_WRITE_BUFFER, outmesh->MVFBUF );
  glCopyBufferSubData ( GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER,
                 inv*sizeof(GLint), onv*sizeof(GLint), infac*sizeof(GLint) );
  SETUVAR ( 8, GLint, invb )
  SETUVAR ( 9, GLint, inei )
  fvf = infac+inei+invb;  SETUVAR ( 10, GLint, fvf )
  ExecStage ( uvofs, 11, inei+invb );  /* DSetOMfac1  */
  ExecStage ( uvofs, 12, inv );    /* DSetOMfac2 */
  PrefixSum ( uvofs, 0, fvf-infac+1 );
  ExecStage ( uvofs, 13, inv );    /* DSetOMfac3 */
  ExecStage ( uvofs, 14, inhe );   /* DBindNewhe1 */
  PrefixSum ( uvofs, 0, inhe );
  ExecStage ( uvofs, 15, inhe );   /* DBindNewhe2 */
  ExecStage ( uvofs, 16, inhe );   /* DBindNewhe3 */
  ExecStage ( uvofs, 17, infac );  /* DSetIFDeg */
  PrefixSum ( uvofs, maxonv+3*inhe+inv+3, infac );
  ExecStage ( uvofs, 18, infac );  /* DSetOMfhei1 */
  ExecStage ( uvofs, 19, inhe );   /* DSetOMfhei2 */
  ExecStage ( uvofs, 20, inv );    /* DSetTgv */
  PrefixSum ( uvofs, 0, inv );
  ExecStage ( uvofs, 21, inv );    /* DSetOMfhei3 */
  glDeleteBuffers ( 1, &auxbuf );
  glUseProgram ( 0 );
  mm->cm = inmesh;  mm->fm = outmesh;
  mm->m = mm->nnz = outmesh->nv;  mm->n = inmesh->nv;
  mm->rc = drc;  mm->a = da;
  ExitIfGLError ( "GPUmeshDoublingMatrix" );
  return true;

failure:
  glDeleteBuffers ( 1, &auxbuf );
  glDeleteBuffers ( 1, &drc );
  glDeleteBuffers ( 1, &da );
  glUseProgram ( 0 );
  memset ( mm, 0, sizeof(MeshRefineMatrix) );
  return false;
} /*GPUmeshDoublingMatrix*/

char GPUmeshAveragingMatrix ( GPUmesh *inmesh, GPUmesh *outmesh,
                              MeshRefineMatrix *mm )
{
  int    inv, inhe, infac, invb, onv, onhe, onfac, nnz;
  GLint  bufsize, bs;
  GLuint auxbuf = 0, arc = 0, aa = 0;

  glUseProgram ( progid );
  glBindBufferBase ( GL_UNIFORM_BUFFER, rbbp, rbuf );
  inv = inmesh->nv;  inhe = inmesh->nhe;  infac = inmesh->nfac;

  glBindBufferBase ( SSB, 1, inmesh->MVFBUF );
  glBindBufferBase ( SSB, 2, inmesh->MHEBUF );
  SETUVAR ( 1, GLint, inmesh->nsattr )
  SETUVAR ( 2, GLint, inv )
  SETUVAR ( 3, GLint, inhe )
  SETUVAR ( 4, GLint, infac )
  bs = inv > infac ? inv : infac;
  bufsize = (2*inv+2*inhe+infac+bs)*sizeof(GLint);
  glGenBuffers ( 1, &auxbuf );
  glBindBufferBase ( SSB, 0, auxbuf );
  glBufferData ( SSB, bufsize, NULL, GL_DYNAMIC_DRAW );
  ExecStage ( uvofs, 2, inv );  /* TagVertex */
  SumUp ( uvofs, 0, inv );
  glGetBufferSubData ( SSB, 0, sizeof(GLint), &invb );
  onfac = inv - invb;  SETUVAR ( 7, GLint, onfac )
  if ( invb == 0 ) {
    onv = infac;  onhe = inhe;
    ExecStage ( uvofs, 22, infac );  /* ASetNvi1 */
    ExecStage ( uvofs, 23, inhe );  /* ASetNhei1 */
    ExecStage ( uvofs, 24, inv );  /* ASetNfi1 */
  }
  else {
    ExecStage ( uvofs, 25, infac );  /* ASetNvi2 */
    PrefixSum ( uvofs, 0, infac );
    SumUp ( uvofs, 2*infac+inhe+inv, infac );
    glGetBufferSubData ( SSB, (2*infac+inhe+inv)*sizeof(GLint),
                         sizeof(GLint), &onv );
    ExecStage ( uvofs, 26, inv );  /* ASetNfi2, true */
    PrefixSum ( uvofs, 2*infac+inhe, inv );
    ExecStage ( uvofs, 27, inhe );  /* ASetNhei2 */
    PrefixSum ( uvofs, 2*infac, inhe );
    glGetBufferSubData ( SSB, (2*infac+inhe-1)*sizeof(GLint),
                         sizeof(GLint), &onhe );
    ExecStage ( uvofs, 28, infac );  /* ASetNvi2, false */
    ExecStage ( uvofs, 29, inhe );  /* ASetNhei3 */
    ExecStage ( uvofs, 30, inv );  /* ASetNfi3 */
  }
  SETUVAR ( 5, GLint, onv )
  SETUVAR ( 6, GLint, onhe )
  if ( !ReallocGPUmesh ( outmesh, onv, onhe, onfac, inmesh->nsattr,
                         inmesh->pdim, inmesh->pofs, inmesh->nvofs ) )
    goto failure;
  glBindBufferBase ( SSB, 4, outmesh->MVFBUF );
  glBindBufferBase ( SSB, 5, outmesh->MHEBUF );
  ExecStage ( uvofs, 31, inv );    /* AClearFVd */
  ExecStage ( uvofs, 32, inv );    /* ASetFVd1 */
  PrefixSum ( uvofs, 2*infac+inhe+inv, inv );
  ExecStage ( uvofs, 33, inv );    /* ASetFVd2 */
  ExecStage ( uvofs, 34, infac );  /* AClearFvd */
  ExecStage ( uvofs, 35, infac );  /* ASetOMVert, true */
  PrefixSum ( uvofs, 2*infac+inhe+inv, infac );
  ExecStage ( uvofs, 36, infac );  /* ASetOMVert, false */
  ExecStage ( uvofs, 37, inhe );   /* ABindHe */
  ExecStage ( uvofs, 38, inv );    /* ASetOMfacHe */
  ExecStage ( uvofs, 39, infac );  /* Average0 */
  PrefixSum ( uvofs, 2*infac+inhe+inv, infac );
  glBindBuffer ( SSB, auxbuf );
  glGetBufferSubData ( SSB, (3*infac+inhe+inv-1)*sizeof(GLuint),
                       sizeof(GLuint), &nnz );
  glGenBuffers ( 1, &arc );
  glBindBufferBase ( SSB, 3, arc );
  glBufferData ( SSB, (onv+nnz+1)*sizeof(GLuint), NULL, GL_DYNAMIC_DRAW );
  glGenBuffers ( 1, &aa );
  glBindBufferBase ( SSB, 6, aa );
  glBufferData ( SSB, nnz*sizeof(GLfloat), NULL, GL_DYNAMIC_DRAW );
  ExecStage ( uvofs, 40, infac );  /* Average1 */
  glUseProgram ( 0 );
  glDeleteBuffers ( 1, &auxbuf );
  ExitIfGLError ( "GPUmeshAveragingMatrix" );
  mm->cm = inmesh;  mm->fm = outmesh;
  mm->m = outmesh->nv;  mm->n = inmesh->nv;  mm->nnz = nnz;
  mm->rc = arc;  mm->a = aa;
  return true;

failure:
  glUseProgram ( 0 );
  glDeleteBuffers ( 1, &auxbuf );
  glDeleteBuffers ( 1, &arc );
  glDeleteBuffers ( 1, &aa );
  memset ( mm, 0, sizeof(MeshRefineMatrix) );
  return false;
} /*GPUMeshAveragingMatrix*/

char GPUmeshRefinementMatrix ( int n, GPUmesh *inmesh, GPUmesh *outmesh,
                               MeshRefineMatrix *mm )
{
  GPUmesh          mmesh, *am, *bm, *cm;
  MeshRefineMatrix md, ma;
  int              i;

  if ( n < 1 )
    return false;
  memset ( &mmesh, 0, sizeof(GPUmesh) );
  if ( n & 0x01 ) { am = &mmesh;  bm = outmesh; }
             else { am = outmesh;  bm = &mmesh; }
  if ( !GPUmeshDoublingMatrix ( inmesh, am, &md ) )
    goto failure;
  for ( i = 0; i < n; i++ ) {
    if ( !GPUmeshAveragingMatrix ( am, bm, &ma ) )
      goto failure;
    if ( !GPUMultSparseMatricesf ( ma.m, ma.n, md.n,
                                   ma.nnz, ma.rc, ma.a, md.nnz, md.rc, md.a,
                                   &mm->nnz, &mm->rc, &mm->a ) )
      goto failure;
    glDeleteBuffers ( 4, am->mbuf );
    memset ( am->mbuf, 0, 4*sizeof(GLuint) );
    mm->m = ma.m;  mm->n = md.n;
    cm = am;  am = bm;  bm = cm;
    GPUDeleteMeshRefinementMatrix ( &md );
    GPUDeleteMeshRefinementMatrix ( &ma );
    md = *mm;
  }
  mm->lmax = 0;
  mm->cm = inmesh;  mm->fm = outmesh;
  return true;

failure:
  glDeleteBuffers ( 4, am->mbuf );
  glDeleteBuffers ( 4, bm->mbuf );
  return false;
} /*GPUmeshRefinementMatrix*/

void GPUMatrixRefineMesh ( MeshRefineMatrix *mm )
{
  GPUMultSparseMatrixVectorf ( mm->m, mm->n, mm->nnz, &mm->lmax,
                               mm->rc, mm->a, mm->cm->nsattr,
                               mm->cm->VCBUF, mm->fm->VCBUF );
} /*GPUMatrixRefineMesh*/

void GPUDeleteMeshRefinementMatrix ( MeshRefineMatrix *mm )
{
  glDeleteBuffers ( 1, &mm->rc );
  glDeleteBuffers ( 1, &mm->a );
  memset ( mm, 0, sizeof(MeshRefineMatrix) );
} /*GPUDeleteMeshRefinementMatrix*/

