
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <math.h>
#include "openglheader.h"

#include "utilities.h"
#include "GPUsparsemat.h"

static GLuint program_id;
static GLuint uloc[5];

void LoadGPUMultMVfShader ( void )
{
  static const char *filename[] = { "multmvf.comp.glsl" };
  static const GLchar *uname[] = { "stage", "m", "nnz", "dim", "t" };
  GLuint shader_id;
  int i;

  shader_id = CompileShaderFiles ( GL_COMPUTE_SHADER, 1, &filename[0] );
  program_id = LinkShaderProgram ( 1, &shader_id, "GPUMultMVf" );
  for ( i = 0; i < 5; i++ )
    uloc[i] = glGetUniformLocation ( program_id, uname[i] );
  glDeleteShader ( shader_id );
  ExitIfGLError ( "LoadGPUMultMVShader" );
} /*LoadGPUMultMVfShader*/

void DeleteGPUMultMVfProgram ( void )
{
  glDeleteProgram ( program_id );
} /*DeleteGPUMultMVfProgram*/

#define COMPUTE(SIZEX,SIZEY,SIZEZ) \
  { glDispatchCompute ( SIZEX, SIZEY, SIZEZ ); \
    glMemoryBarrier ( GL_SHADER_STORAGE_BARRIER_BIT ); }
#define EXECSTAGE(STAGE,SIZEX,SIZEY,SIZEZ) \
  { glUniform1i ( uloc[0], STAGE );  COMPUTE ( SIZEX, SIZEY, SIZEZ ) }

void GPUMultSparseMatrixVectorf ( GLuint ybuf,
                        GPUSparseMatrix *a, GLuint dim, GLuint xbuf )
{
  GLuint auxb[2], t;

  glUseProgram ( program_id );
  glUniform1ui ( uloc[1], a->m );
  glUniform1ui ( uloc[2], a->nnz );
  glUniform1ui ( uloc[3], dim );
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 0, a->buf[0] );
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 1, a->buf[1] );
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 2, xbuf );
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 3, ybuf );
  glGenBuffers ( 2, auxb );
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 5, auxb[1] );
  glBufferData ( GL_SHADER_STORAGE_BUFFER, a->nnz*dim*sizeof(GLfloat),
                 NULL, GL_DYNAMIC_DRAW );
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 4, auxb[0] );
  glBufferData ( GL_SHADER_STORAGE_BUFFER, a->m*sizeof(GLuint),
                 NULL, GL_DYNAMIC_DRAW );
  EXECSTAGE ( 0, a->m, 1, 1 )
  if ( !a->lmax ) {
    glUniform1i ( uloc[0], 1 );  /* stage = 1 */
    for ( t = a->m; t > 1; t = (t+1)/2 ) {
      glUniform1ui ( uloc[4], t );
      COMPUTE ( t/2, 1, 1 )
    }
    glGetBufferSubData ( GL_SHADER_STORAGE_BUFFER, 0,
                         sizeof(GLuint), &a->lmax );
    EXECSTAGE ( 0, a->m, 1, 1 )
  }
  EXECSTAGE ( 2, a->nnz, 1, 1 )
  glUniform1i ( uloc[0], 3 );  /* stage = 3 */
  for ( t = a->lmax; t > 1; t = (t+1)/2 ) {
    glUniform1ui ( uloc[4], t );
    COMPUTE ( a->m, (t/2), 1 )
  }
  EXECSTAGE ( 4, a->m, 1, 1 )
  glUseProgram ( 0 );
  glDeleteBuffers ( 2, auxb );
  ExitIfGLError ( "GPUMultSparseMatrixVectorf" );
} /*GPUMultSparseMatrixVectorf*/
#undef EXECSTAGE
#undef COMPUTE
