
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <math.h>

#include "../utilities/openglheader.h"
#include "../utilities/utilities.h"
#include "trans.h"
#include "lights.h"
#include "balance.h"
#include "balanceprivate.h"

/* ////////////////////////////////////////////////////////////////////////// */
#define NBSHADERS  31
#define NBPROGRAMS 19

/*#define NBTBUOFFS 2*/

/*#define DEBUG_CHECKFFW*/
/*#define DEBUG_VIEWFF*/
/*#define DEBUG_SUMFFW*/
/*#define DEBUG_BFF1*/
/*#define OPTIMISE_CF*/

/* ////////////////////////////////////////////////////////////////////////// */
/* z programu szaderow nie udalo sie odczytac wielkosci elementu tablicy, */
/* ale wyglada na to, ze rozmiar struktury PatchRect i przesuniecia pol   */
/* sa identyczne, jak nadane przez kompilator C na podstawie ponizszej    */
/* deklaracji */
typedef struct {
    GLfloat w, h, x0, y0;
    GLint   melw, melh;
    GLint   objID;
  } PatchRect;

/* ////////////////////////////////////////////////////////////////////////// */
static GLuint bprog_id[NBPROGRAMS];

static GLuint uctlbbp = GL_INVALID_INDEX;
static const GLchar *UBPRNames[1] = { "Pr" };
static const GLchar *UCTLNames[NCTLNAMES+1] =
  { "CtlBlock", "CtlBlock.stage", "CtlBlock.step", "CtlBlock.width",
    "CtlBlock.height", "CtlBlock.N", "CtlBlock.H", "CtlBlock.nrows",
    "CtlBlock.ncols", "CtlBlock.first", "CtlBlock.txts", "CtlBlock.p0",
    "CtlBlock.nelem", "CtlBlock.mi", "CtlBlock.nnz", "CtlBlock.reverse",
    "CtlBlock.C", "CtlBlock.colour" };
static GLint  uctlbsize, uctlbofs[NCTLNAMES];

static GLuint fftrbbp = GL_INVALID_INDEX;
static GLint  fftrbsize, fftrbofs[NFFTRUOFFS];
static const GLchar *UFFTBNames[NFFTRUOFFS+1] =
  { "FFTransBlock", "FFTransBlock.mm", "FFTransBlock.vpm" };

/* ////////////////////////////////////////////////////////////////////////// */
#define NCVECT 6
/*#define NCVECT  26*/
#define SQRT1_2 0.70710678
#define SQRT1_3 0.57735027

static const float cvect[26][3] =
  {{1.0,0.0,0.0},{-1.0,0.0,0.0},{0.0,1.0,0.0},{0.0,-1.0,0.0},
   {0.0,0.0,1.0},{0.0,0.0,-1.0},
   {SQRT1_2,SQRT1_2,0.0},{-SQRT1_2,-SQRT1_2,0.0},
   {-SQRT1_2,SQRT1_2,0.0},{SQRT1_2,-SQRT1_2,0.0},
   {SQRT1_2,0.0,SQRT1_2},{-SQRT1_2,0.0,-SQRT1_2},
   {-SQRT1_2,0.0,SQRT1_2},{SQRT1_2,0.0,-SQRT1_2},
   {0.0,SQRT1_2,SQRT1_2},{0.0,-SQRT1_2,-SQRT1_2},
   {0.0,-SQRT1_2,SQRT1_2},{0.0,SQRT1_2,-SQRT1_2},
   {SQRT1_3,SQRT1_3,SQRT1_3},{-SQRT1_3,-SQRT1_3,-SQRT1_3},
   {-SQRT1_3,SQRT1_3,SQRT1_3},{SQRT1_3,-SQRT1_3,-SQRT1_3},
   {SQRT1_3,-SQRT1_3,SQRT1_3},{-SQRT1_3,SQRT1_3,-SQRT1_3},
   {SQRT1_3,SQRT1_3,-SQRT1_3},{-SQRT1_3,-SQRT1_3,SQRT1_3}};

/* ////////////////////////////////////////////////////////////////////////// */
static GLuint GetAccessToCTLBlockUniform ( GLuint program_id )
{
  if ( uctlbbp == GL_INVALID_INDEX )
    GetAccessToUniformBlock ( program_id, NCTLNAMES, &UCTLNames[0],
                              &uctlbsize, uctlbofs, &uctlbbp );
  else
    AttachUniformBlockToBP ( program_id, UCTLNames[0], uctlbbp );
  return uctlbbp;
} /*GetAccessToCTLBlockUniform*/

static GLuint NewUniformCTLBlock ( void )
{
  return NewUniformBuffer ( uctlbsize, uctlbbp );
} /*NewUniformCTLBlock*/

void CTLUniformi ( BalanceElements *belem, int n, GLint value )
{
  glNamedBufferSubData ( BUF_UCTL, uctlbofs[n], sizeof(GLint), &value );
} /*CTLUniformi*/

void CTLUniformui ( BalanceElements *belem, int n, GLuint value )
{
  glNamedBufferSubData ( BUF_UCTL, uctlbofs[n], sizeof(GLuint), &value );
} /*CTLUniformui*/

void CTLUniformf ( BalanceElements *belem, int n, GLfloat value )
{
  glNamedBufferSubData ( BUF_UCTL, uctlbofs[n], sizeof(GLfloat), &value );
} /*CTLUniformf*/

void CTLUniform3fv ( BalanceElements *belem, int n, const GLfloat value[3] )
{
  glNamedBufferSubData ( BUF_UCTL, uctlbofs[n], 3*sizeof(GLfloat), value );
} /*CTLUniform3fv*/

GLuint GetAccessToFFTransBlockUniform ( GLuint program_id )
{
  if ( fftrbbp == GL_INVALID_INDEX )
    GetAccessToUniformBlock ( program_id, NFFTRUOFFS, &UFFTBNames[0],
                              &fftrbsize, fftrbofs, &fftrbbp );
  else
    AttachUniformBlockToBP ( program_id, UFFTBNames[0], fftrbbp );
  return fftrbbp;
} /*GetAccessToFFTransBlockUniform*/

GLuint NewUniformFFTransBlock ( void )
{
  return NewUniformBuffer ( fftrbsize, fftrbbp );
} /*GetAccessToFFTransBlockUniform*/

void LoadBalanceShaders ( void )
{
  static const char *filename[NBSHADERS] =
    { "b3.vert.glsl", "b3.geom.glsl", "b3.frag.glsl",
      "b3.comp.glsl", "b4.vert.glsl", "b4.frag.glsl",
      "b4.comp.glsl", "b5.vert.glsl", "b5.frag.glsl",
      "b5.comp.glsl", "bff0.comp.glsl",
      "bff1.vert.glsl", "bff1.geom.glsl", "bff1.frag.glsl",
      "bff2.comp.glsl",
      "bff3.comp.glsl", "bff4.comp.glsl",
      "bff5.comp.glsl",
      "bc0.vert.glsl", "bc0.geom.glsl", "bc0.frag.glsl",
      "bc1.comp.glsl", "bc2.comp.glsl", "bc3.comp.glsl",
      "aux0.comp.glsl",
      "b0.vert.glsl", "b0.geom.glsl", "b0.frag.glsl",
      "b1.vert.glsl", "b1.geom.glsl", "b1.frag.glsl" };
  static const GLuint shtype[NBSHADERS] =
    { GL_VERTEX_SHADER, GL_GEOMETRY_SHADER, GL_FRAGMENT_SHADER,
      GL_COMPUTE_SHADER, GL_VERTEX_SHADER, GL_FRAGMENT_SHADER,
      GL_COMPUTE_SHADER, GL_VERTEX_SHADER, GL_FRAGMENT_SHADER,
      GL_COMPUTE_SHADER, GL_COMPUTE_SHADER,
      GL_VERTEX_SHADER, GL_GEOMETRY_SHADER, GL_FRAGMENT_SHADER,
      GL_COMPUTE_SHADER, GL_COMPUTE_SHADER, GL_COMPUTE_SHADER,
      GL_COMPUTE_SHADER,
      GL_VERTEX_SHADER, GL_GEOMETRY_SHADER, GL_FRAGMENT_SHADER,
      GL_COMPUTE_SHADER, GL_COMPUTE_SHADER, GL_COMPUTE_SHADER,
      GL_COMPUTE_SHADER,
      GL_VERTEX_SHADER, GL_GEOMETRY_SHADER, GL_FRAGMENT_SHADER,
      GL_VERTEX_SHADER, GL_GEOMETRY_SHADER, GL_FRAGMENT_SHADER  };
  static const char *name[NBPROGRAMS] =
    { "b0", "b1", "b2", "b3", "b4", "b5",
      "bff0", "bff1", "bff2", "bff3", "bff4", "bff5",
      "bc0", "bc1", "bc2", "bc3", "aux0", "balance",
      "balance1" };
  GLuint sh_id[NBSHADERS];
  int    i;

  for ( i = 0; i < NBSHADERS; i++ )
    sh_id[i] = CompileShaderFiles ( shtype[i], 1, &filename[i] );
  bprog_id[0] = LinkShaderProgram ( 3, sh_id, name[0] );
  bprog_id[1] = LinkShaderProgram ( 1, &sh_id[3], name[1] );
  bprog_id[2] = LinkShaderProgram ( 2, &sh_id[4], name[2] );
  bprog_id[3] = LinkShaderProgram ( 1, &sh_id[6], name[3] );
  bprog_id[4] = LinkShaderProgram ( 2, &sh_id[7], name[4] );
  bprog_id[5] = LinkShaderProgram ( 1, &sh_id[9], name[5] );
        /* programy do obliczania wspolczynnikow ksztaltu */
  bprog_id[6] = LinkShaderProgram ( 1, &sh_id[10], name[6] );
  bprog_id[7] = LinkShaderProgram ( 3, &sh_id[11], name[7] );
  bprog_id[8] = LinkShaderProgram ( 1, &sh_id[14], name[8] );
  bprog_id[9] = LinkShaderProgram ( 1, &sh_id[15], name[9] );
  bprog_id[10] = LinkShaderProgram ( 1, &sh_id[16], name[10] );
  bprog_id[11] = LinkShaderProgram ( 1, &sh_id[17], name[11] );
  bprog_id[12] = LinkShaderProgram ( 3, &sh_id[18], name[12] );
  bprog_id[13] = LinkShaderProgram ( 1, &sh_id[21], name[13] );
  bprog_id[14] = LinkShaderProgram ( 1, &sh_id[22], name[14] );
  bprog_id[15] = LinkShaderProgram ( 1, &sh_id[23], name[15] );
  bprog_id[16] = LinkShaderProgram ( 1, &sh_id[24], name[16] );
        /* programy do wykonywania koncowych obrazow */
  bprog_id[17] = LinkShaderProgram ( 3, &sh_id[25], name[17] );
  bprog_id[18] = LinkShaderProgram ( 3, &sh_id[28], name[18] );
  GetAccessToCTLBlockUniform ( bprog_id[0] );
  AttachUniformBlockToBP ( bprog_id[1], UCTLNames[0], uctlbbp );
  AttachUniformBlockToBP ( bprog_id[2], UCTLNames[0], uctlbbp );
  AttachUniformBlockToBP ( bprog_id[3], UCTLNames[0], uctlbbp );
  AttachUniformBlockToBP ( bprog_id[4], UCTLNames[0], uctlbbp );
  AttachUniformBlockToBP ( bprog_id[5], UCTLNames[0], uctlbbp );
  AttachUniformBlockToBP ( bprog_id[6], UCTLNames[0], uctlbbp );
  AttachUniformBlockToBP ( bprog_id[7], UCTLNames[0], uctlbbp );
  AttachUniformBlockToBP ( bprog_id[8], UCTLNames[0], uctlbbp );
  AttachUniformBlockToBP ( bprog_id[10], UCTLNames[0], uctlbbp );
  AttachUniformBlockToBP ( bprog_id[11], UCTLNames[0], uctlbbp );
  AttachUniformBlockToBP ( bprog_id[12], UCTLNames[0], uctlbbp );
  AttachUniformBlockToBP ( bprog_id[15], UCTLNames[0], uctlbbp );
  AttachUniformBlockToBP ( bprog_id[17], UCTLNames[0], uctlbbp );
  AttachUniformBlockToBP ( bprog_id[18], UCTLNames[0], uctlbbp );
  AttachUniformMatBlockToBP ( bprog_id[0] );
  AttachUniformLightBlockToBP ( bprog_id[12] );
  AttachUniformMatBlockToBP ( bprog_id[12] );
  AttachUniformTransBlockToBP ( bprog_id[4] );
  AttachUniformTransBlockToBP ( bprog_id[12] );
  AttachUniformTransBlockToBP ( bprog_id[17] );
  AttachUniformLightBlockToBP ( bprog_id[17] );
  AttachUniformMatBlockToBP ( bprog_id[17] );
  AttachUniformTransBlockToBP ( bprog_id[18] );
  AttachUniformLightBlockToBP ( bprog_id[18] );
  GetAccessToFFTransBlockUniform ( bprog_id[7] );
  GetAccessToStorageBlock ( bprog_id[2], 0, &UBPRNames[0], &i, NULL, NULL );
  for ( i = 0; i < NBSHADERS; i++ )
    glDeleteShader ( sh_id[i] );
  ExitIfGLError ( "LoadBalanceShaders" );
} /*LoadBalanceShaders*/

void DeleteBalancePrograms ( void )
{
  int i;

  for ( i = 0; i < NBPROGRAMS; i++ )
    glDeleteProgram ( bprog_id[i] );
} /*DeleteBalancePrograms*/

/* ////////////////////////////////////////////////////////////////////////// */
static int FindMacroElSize ( int ps )
{
  int d0, d1, r;

  if ( ps > MACROEL_DSIZE )
    for ( d0 = MACROEL_DSIZE-1, d1 = MACROEL_DSIZE; ; d0--, d1++ ) {
      r = ps % d1;
      if ( r == 0 || 4*r > 3*d1 )
        return d1;
      r = ps % d0;
      if ( r == 0 || 4*r > 3*d0 )
        return d0;
    }
  return ps;
} /*FindMacroElSize*/

void LoadPatchRectSizes ( BalanceElements *belem )
{
  TriangPatch *ts;
  PatchRect   *pr;
  int         i, bsize, step;

  step = sizeof(PatchRect);
  bsize = belem->ntrpatch*step;
  if ( !(pr = malloc ( bsize )) )
    ExitOnError ( "LoadPatchRectSizes" );
  for ( i = 0; i < belem->ntrpatch; i++ ) {
    ts = &belem->trpatch[i];
    pr[i].w = ts->w;  pr[i].h = ts->h;
    pr[i].x0 = ts->x0;  pr[i].y0 = ts->y0;
    pr[i].melw = FindMacroElSize ( (int)ts->w-2 );
    pr[i].melh = FindMacroElSize ( (int)ts->h-2 );
    pr[i].objID = ts->objid;
  }
  BUF_PRS = NewStorageBuffer ( bsize, 3 );
  glBufferSubData ( GL_SHADER_STORAGE_BUFFER, 0, bsize, pr );
  free ( pr );
  ExitIfGLError ( "LoadPatchRectSizes" );
} /*LoadPatchRectSizes*/

/* ////////////////////////////////////////////////////////////////////////// */
void V3AddMf ( float v[3], const float v1[3], float s, const float v2[3] )
{
  v[0] = v1[0]+s*v2[0];  v[1] = v1[1]+s*v2[1];  v[2] = v1[2]+s*v2[2];
} /*V3AddMf*/

static float SqAngle ( float x, float y )
{
  if ( x > 0.0 )
    return y >= 0.0 ? y/(x+y) : 3.0+x/(x-y);
  else if ( x < 0.0 )
    return y >= 0.0 ? 1.0+x/(x-y) : 2.0+y/(x+y);
  else
    return y > 0.0 ? 1.0 : (y < 0.0 ? 3.0 : -1.0);
} /*SqAngle*/

static char _Less ( void *data, int i, int j )
{
  GLfloat (*p)[3];

  p = (GLfloat(*)[3])data;
  return p[i][2] < p[j][2];
} /*_Less*/

static void _Swap ( void *data, int i, int j )
{
  GLfloat (*p)[3];
  GLfloat q[3];

  if ( i != j ) {
    p = (GLfloat(*)[3])data;
    V3Copyf ( q, p[i] );  V3Copyf ( p[i], p[j] );  V3Copyf ( p[j], q );
  }
} /*_Swap*/

static int FindConvexHullf ( int n, GLfloat p[][3] )
{
  int   i, j;
  float xmin, ymin, b, c;

    /* Algorytm Grahama otoczki wypuklej */
  if ( n < 2 )
    return n;
  for ( j = 0, xmin = p[0][0], ymin = p[0][1], i = 1;  i < n;  i++ )
    if ( p[i][1] < ymin || (p[i][1] == ymin && p[i][0] < xmin) )
      { j = i, xmin = p[i][0], ymin = p[i][1]; }
  if ( j > 0 )
    { p[j][0] = p[0][0], p[j][1] = p[0][1], p[0][0] = xmin, p[0][1] = ymin; }
  for ( i = 1; i < n; i++ )
    p[i][2] = SqAngle ( p[i][0]-xmin, p[i][1]-ymin );
  QuickSort ( &p[1], n-1, _Less, _Swap );
  for ( i = 1; i < n && p[i][2] < 0.0 ; i++ )
    ;
  for ( b = p[1][2], j = 1, i++;  i < n;  i++ ) {
    p[i][2] = c = SqAngle ( p[i][0]-p[j][0], p[i][1]-p[j][1] );
    if ( c < 0.0 )
      continue;
    while ( c <= b && j > 0 ) {
      j--;
      c = SqAngle ( p[i][0]-p[j][0], p[i][1]-p[j][1] );
      b = p[j][2];
    }
    if ( (++j) != i )
      V3Copyf ( &p[j], &p[i] );
    p[j][2] = b = c;
  }
  if ( (c = SqAngle ( xmin-p[j][0], ymin-p[j][1] )) >= 0.0 ) {
    while ( c <= b && j > 1 ) {
      j--;
      c = SqAngle ( xmin-p[j][0], ymin-p[j][1] );
      b = p[j][2];
    }
  }
  else
    j--;
  return j+1;
} /*FindConvexHullf*/

/* ////////////////////////////////////////////////////////////////////////// */
static void FindCHRect ( int n, GLfloat convh[][3], float sa, float ca,
                         float *x, float *y, float *w, float *h )
{
  int i;
  float x0, y0, x1, y1, x2, y2;

  x0 = x1 = ca*convh[0][0] - sa*convh[0][1];
  y0 = y1 = sa*convh[0][0] + ca*convh[0][1];
  for ( i = 1; i < n; i++ ) {
    x2 = ca*convh[i][0] - sa*convh[i][1];
    y2 = sa*convh[i][0] + ca*convh[i][1];
    if ( x2 < x0 )      x0 = x2;
    else if ( x2 > x1 ) x1 = x2;
    if ( y2 < y0 )      y0 = y2;
    else if ( y2 > y1 ) y1 = y2;
  }
  *x = x0;  *y = y0;  *w = x1-x0 ;  *h = y1-y0;
} /*FindCHRect*/

static void FitTrSetInRect ( TriangPatch *trpatch, int n, GLfloat convh[][3],
                             float *sang, float *cang )
{
  float sa, ca, b, samin, camin, bmin;
  float x, y, w, h, xmin, ymin, wmin, hmin;
  int   i;

  samin = convh[n-1][1]-convh[0][1];  camin = convh[0][0]-convh[n-1][0];
  b = 1.0/sqrt ( samin*samin + camin*camin );
  samin *= b;  camin *= b;
  FindCHRect ( n, convh, samin, camin, &xmin, &ymin, &wmin, &hmin );
  bmin = wmin*hmin;
  for ( i = 0; i < n-1; i++ ) {
    sa = convh[i][1]-convh[i+1][1];  ca = convh[i+1][0]-convh[i][0];
    b = 1.0/sqrt ( sa*sa + ca*ca );
    sa *= b;  ca *= b;
    FindCHRect ( n, convh, sa, ca, &x, &y, &w, &h );
    if ( (b = w*h) < bmin )
      { bmin = b;  xmin = x;  ymin = y;  wmin = w;  hmin = h;
        samin = sa;  camin = ca; }
  }
  trpatch->x0 = xmin;  trpatch->y0 = ymin;  trpatch->w = wmin;  trpatch->h = hmin;
  *sang = samin;  *cang = camin;
} /*FitTrSetInRect*/

/* ////////////////////////////////////////////////////////////////////////// */
static void _ProcessObjTriangPatch ( TriangDesc *trdesc,
                  int tr, TriangPatch *trpatch,
                  int *vtag, GLfloat vpos[][3], GLfloat txb[][2],
                  float eld, GLfloat convh[][3] )
{
  float tsnv[3], hrv[3], gamma, s, sa, ca, w, h, sx, sy, x0, y0;
  int   i, j, k, p;

        /* skonstruuj odbicie */
  V3Copyf ( tsnv, trdesc[trpatch->ftr].nvect );
  for ( i = trpatch->ftr+1; i < trpatch->ftr+trpatch->ntr; i++ )
    V3Addf ( tsnv, tsnv, trdesc[i].nvect );
/* sprawdzenie */
  for ( i = trpatch->ftr; i < trpatch->ftr+trpatch->ntr; i++ )
    if ( V3DotProductf ( tsnv, trdesc[i].nvect ) <= 0.0 )
      ExitOnError ( "_ProcessObjTriangPatch" );
/* konstrukcja odbicia Householdera */
  gamma = sqrt ( V3DotProductf ( tsnv, tsnv ) );
  V3Copyf ( hrv, tsnv );
  hrv[2] += tsnv[2] > 0.0 ? gamma : -gamma;
  gamma = -2.0/V3DotProductf ( hrv, hrv );
        /* odbij wierzcholki */
  for ( i = trpatch->ftr, k = 0;  i < trpatch->ftr+trpatch->ntr;  i++ )
    for ( j = 0; j < 3; j++ ) {
      p = trdesc[i].ind[j];
      if ( vtag[p] != tr+1 ) {
        vtag[p] = tr+1;
        s = V3DotProductf ( hrv, vpos[p] );
        V3AddMf ( convh[k], vpos[p], gamma*s, hrv );
        memcpy ( txb[p], convh[k++], 2*sizeof(GLfloat) );
      }
    }
        /* znajdz otoczke wypukla */
  k = FindConvexHullf ( k, convh );
        /* znajdz najmniejszy prostokat dla otoczki */
  FitTrSetInRect ( trpatch, k, convh, &sa, &ca );
  w = trpatch->w;  trpatch->w = (int)ceil ( w / eld );  sx = trpatch->w / w;
  h = trpatch->h;  trpatch->h = (int)ceil ( h / eld );  sy = trpatch->h / h;
  trpatch->w += 2;  trpatch->h += 2;
        /* przeksztalc wierzcholki */
  x0 = trpatch->x0;    y0 = trpatch->y0;
  for ( i = trpatch->ftr;  i < trpatch->ftr+trpatch->ntr;  i++ )
    for ( j = 0; j < 3;  j++ ) {
      p = trdesc[i].ind[j];
      if ( vtag[p] == tr+1 ) {
        vtag[p] = -(tr+1);
        s = ca*txb[p][0] - sa*txb[p][1];
        txb[p][1] = sa*txb[p][0] + ca*txb[p][1];
        txb[p][0] = sx*(s - x0) + 1.0;
        txb[p][1] = sy*(txb[p][1] - y0) +1.0;
      }
    }
} /*_ProcessObjTriangPatch*/

void ProcessObjTriangPatches ( BalanceElements *belem, BalanceObject *bobj )
{
  int     i;
  int     *vtag;
  GLfloat *convh;

  if ( !(vtag = malloc ( bobj->cnvert*sizeof(int) )) ||
       !(convh = malloc ( 3*bobj->cnvert*sizeof(GLfloat) )) ||
       !(bobj->txb = malloc ( 2*bobj->cnvert*sizeof(GLfloat) )) )
    ExitOnError ( "ProcessObjTriangPatchs 0" );
  for ( i = 0; i < bobj->cnvert; i++ )
    vtag[i] = 0;
  for ( i = 0; i < bobj->ntrpatch; i++ )
    _ProcessObjTriangPatch ( bobj->trdesc, i, &bobj->trpatch[i],
                             vtag, (GLfloat(*)[3])bobj->vertpos,
                             (GLfloat(*)[2])bobj->txb, bobj->eld,
                             (GLfloat(*)[3])convh );
  free ( vtag );
  free ( convh );
} /*ProcessObjTriangPatches*/

/* ////////////////////////////////////////////////////////////////////////// */
typedef struct {
    float       w, h, x0, y0;
    int         left, right, height;
    TriangPatch *trpatch;
  } TrSetTreeVertex;

static char _PrioCmp ( TrSetTreeVertex *v1, TrSetTreeVertex *v2 )
{
  return (v1->w < v2->w) ||
         (v1->w == v2->w && v1->h < v2->h);
} /*_PrioCmp*/

static void _DownHeap ( int *tsind, TrSetTreeVertex *tstv, int f, int l )
{
  int i, j;

  for ( i = 2*f+1; i <= l; ) {
    j = i+1;
    if ( j <= l && _PrioCmp ( &tstv[tsind[j]], &tstv[tsind[i]] ) )
      i = j;
    if ( _PrioCmp ( &tstv[tsind[i]], &tstv[tsind[f]] ) ) {
      j = tsind[i],  tsind[i] = tsind[f],  tsind[f] = j;
      f = i,  i = 2*f+1;
    }
    else
      break;
  }
} /*_DownHeap*/

static void _UpHeap ( int *tsind, TrSetTreeVertex *tstv, int n )
{
  int i, j;

  while ( n > 0 ) {
    i = (n-1)/2;
    if ( _PrioCmp ( &tstv[tsind[n]], &tstv[tsind[i]] ) ) {
      j = tsind[i],  tsind[i] = tsind[n],  tsind[n] = j;
      n = i;
    }
    else
      break;
  }
} /*_UpHeap*/

static int _BuildTrSetTree ( TrSetTreeVertex *tstv,
                             int nvert, float *vertpos,
                             int ntr, TriangDesc *trdesc,
                             int ntrpatch, TriangPatch *trpatch )
{
  int   *tsind;
  float w, h;
  int   i, j, k, l;

  if ( !(tsind = malloc ( ntrpatch*sizeof(int) )) )
    ExitOnError ( "_BuildTrSetTree" );
        /* utworz liscie drzewa */
  for ( i = 0; i < ntrpatch; i++ ) {
    tstv[i].trpatch = &trpatch[i];
    if ( trpatch[i].w > trpatch[i].h )
      { tstv[i].w = trpatch[i].w;  tstv[i].h = trpatch[i].h; }
    else
      { tstv[i].w = trpatch[i].h;  tstv[i].h = trpatch[i].w; }
    tstv[i].left = tstv[i].right = -1;
    tstv[i].height = 1;
    tsind[i] = i;
  }
        /* HeapSort */
  for ( i = ntrpatch/2-1; i >= 0; i-- )
    _DownHeap ( tsind, tstv, i, ntrpatch-1 );
  for ( i = 1, l = ntrpatch;  i < ntrpatch;  i++, l++ ) {
        /* wybierz dwa wierzcholki z kolejki priorytetowej */
    j = tsind[0];  tsind[0] = tsind[ntrpatch-i];
    _DownHeap ( tsind, tstv, 0, ntrpatch-1-i );
    k = tsind[0];  tsind[0] = tsind[ntrpatch-i-1];
    _DownHeap ( tsind, tstv, 0, ntrpatch-2-i );
        /* utworz nowy wierzcholek */
    tstv[l].trpatch = NULL;
    tstv[l].left = j;  tstv[l].right = k;
    w = tstv[j].w > tstv[k].w ? tstv[j].w : tstv[k].w;
    h = tstv[j].h + tstv[k].h;
    if ( w > h ) { tstv[l].w = w;  tstv[l].h = h; }
            else { tstv[l].w = h;  tstv[l].h = w; }
    tstv[l].height = (tstv[j].height > tstv[k].height ?
                      tstv[j].height : tstv[k].height) + 1;
      /* wstaw go do kolejki priorytetowej */
    tsind[ntrpatch-1-i] = l;
    _UpHeap ( tsind, tstv, ntrpatch-1-i );
  }
  free ( tsind );
  return l-1;
} /*_BuildTrSetTree*/

static void _FlipTriangPatch ( TriangPatch *ts )
{
  float z;

  z = ts->w;  ts->w = ts->h;  ts->h = z;
} /*_FlipTriangPatch*/

static void _FlipTreeVertex ( TrSetTreeVertex *tstv )
{
  float z;

  z = tstv->w;  tstv->w = tstv->h;  tstv->h = z;
} /*_FlipTreeVertex*/

static void _FlipTriangVertex ( float txb[2] )
{
  float z;

  z = txb[0];  txb[0] = txb[1];  txb[1] = z;
} /*_FlipTriangVertex*/

static void _PositionTriangPatches ( TrSetTreeVertex *tstv, int root,
                                     char *flip )
{
  int       sp, stk[100];
  int       v, l, r;
  TriangPatch *ts;
  float     z;

  tstv[root].x0 = tstv[root].y0 = 0.0;
  for ( stk[sp = 0] = root; sp >= 0; ) {
    v = stk[sp--];
    if ( (ts = tstv[v].trpatch) ) {
      ts->x0 = tstv[v].x0;  ts->y0 = tstv[v].y0;
      if ( ts->w != tstv[v].w ) {
        _FlipTriangPatch ( ts );
        flip[v] = true;
      }
    }
    else {
      l = tstv[v].left;  r = tstv[v].right;
      tstv[l].x0 = tstv[r].x0 = tstv[v].x0;
      tstv[l].y0 = tstv[r].y0 = tstv[v].y0;
      if ( (z = tstv[r].w + tstv[l].w) == tstv[v].w )
        tstv[l].x0 += tstv[r].w;
      else if ( z == tstv[v].h ) {
        _FlipTreeVertex ( &tstv[r] );  _FlipTreeVertex ( &tstv[l] );
        tstv[l].y0 += tstv[r].h;
      }
      else if ( (z = tstv[r].h + tstv[l].h) == tstv[v].w ) {
        _FlipTreeVertex ( &tstv[r] );  _FlipTreeVertex ( &tstv[l] );
        tstv[l].x0 += tstv[r].w;
      }
      else
        tstv[l].y0 += tstv[r].h;
      if ( tstv[l].height > tstv[r].height )
        { stk[++sp] = l;  stk[++sp] = r; }
      else
        { stk[++sp] = r;  stk[++sp] = l; }
    }
  }
} /*_PositionTriangPatches*/

static void MapPatchVertices ( BalanceElements *belem, char *flip )
{
  TriangDesc  *trd;
  TriangPatch *ts;
  float       *txb, tx, ty;
  int         nvert;
  char        *tag;
  int         i, j, l, p;
  GLuint      *ind;

  nvert = belem->nvert;
  if ( !(tag = malloc ( nvert*sizeof(char) )) )
    ExitOnError ( "MapTrSetVertices");
  memset ( tag, 0, nvert*sizeof(char) );
  trd = belem->trdesc;
  txb = belem->txb;
  for ( i = 0; i < belem->ntrpatch; i++ ) {
    ts = &belem->trpatch[i];
    tx = ts->x0;  ty = ts->y0;
    for ( j = ts->ftr; j < ts->ftr+ts->ntr; j++ ) {
      ind = trd[j].ind;
      for ( l = 0; l < 3; l++ ) {
        p = ind[l];
        if ( !tag[p] ) {
          if ( flip[i] )
            _FlipTriangVertex ( &txb[2*p] );
          txb[2*p] += tx;  txb[2*p+1] += ty;
          tag[p] = 1;
        }
      }
    }
  }
  free ( tag );
} /*MapPatchVertices*/

/* ////////////////////////////////////////////////////////////////////////// */
static void SetupBElemVAO ( BalanceElements *belem )
{
  GLuint *ind;
  int    i, j, k, ntr;

  glGenVertexArrays ( 1, &belem->tvao );
  glBindVertexArray ( belem->tvao );
  glGenBuffers ( 4, belem->tvbo );
  glBindBuffer ( GL_ARRAY_BUFFER, BUF_VERTPOS );
  glBufferData ( GL_ARRAY_BUFFER, belem->nvert*3*sizeof(GLfloat),
                 belem->vertpos, GL_STATIC_DRAW );
  glEnableVertexAttribArray ( 0 );
  glVertexAttribPointer ( 0, 3, GL_FLOAT, GL_FALSE,
                          3*sizeof(GLfloat), (GLvoid*)0 );
  glBindBuffer ( GL_ARRAY_BUFFER, BUF_TXC );
  glBufferData ( GL_ARRAY_BUFFER, belem->nvert*2*sizeof(GLfloat),
                 belem->txc, GL_STATIC_DRAW );
  glEnableVertexAttribArray ( 1 );
  glVertexAttribPointer ( 1, 2, GL_FLOAT, GL_FALSE,
                          2*sizeof(GLfloat), (GLvoid*)0 );
  glBindBuffer ( GL_ARRAY_BUFFER, BUF_TXB );
  glBufferData ( GL_ARRAY_BUFFER, belem->nvert*2*sizeof(GLfloat),
                 belem->txb, GL_STATIC_DRAW );
  glEnableVertexAttribArray ( 2 );
  glVertexAttribPointer ( 2, 2, GL_FLOAT, GL_FALSE,
                          2*sizeof(GLfloat), (GLvoid*)0 );
  ntr = belem->ntr;
  if ( !(ind = malloc ( 3*ntr*sizeof(GLuint) )) )
    ExitOnError ( "SetupBElemVAO" );
  for ( i = k = 0;  i < ntr;  i++ )
    for ( j = 0; j < 3; j++ )
      ind[k++] = belem->trdesc[i].ind[j];
  glBindBuffer ( GL_ELEMENT_ARRAY_BUFFER, BUF_ELIND );
  glBufferData ( GL_ELEMENT_ARRAY_BUFFER, ntr*3*sizeof(GLuint),
                 ind, GL_STATIC_DRAW );
  free ( ind );
  ExitIfGLError ( "SetupBElemVAO" );
} /*_SetupBElemVAO*/

static GLint FindNElem ( BalanceElements *belem, GLint n )
{
  GLint nelem;

  /*glUseProgram ( bprog_id[1] );*/  /* b3.comp */
  CTLUniformi ( belem, CTL_STAGE, 2 );
  for ( ; n > 1; n = (n+1)/2 ) {
    CTLUniformi ( belem, CTL_N, n );
    COMPUTE ( n/2, 1, 1 )
  }
  glGetNamedBufferSubData ( BUF_VARBUF, 0, sizeof(GLuint), &nelem );
  ExitIfGLError ( "FindNElem" );
  return nelem;
} /*FindNElem*/

static void NetSort ( BalanceElements *belem, int n )
{
  GLuint steps, nn, h, h2, h4, gsize, i;

  /*glUseProgram ( bprog_id[1] );*/  /* b3.comp */
  CTLUniformi ( belem, CTL_STAGE, 3 );
  CTLUniformi ( belem, CTL_N, n );
  for ( nn = n-1, steps = 0;  nn;  nn >>= 1, steps ++ )
    ;
  nn = 1 << steps;  gsize = nn/2;
  for ( i = 0, h2 = 1, h = 2;  i < steps;  i++, h2 = h, h <<= 1 ) {
    CTLUniformi ( belem, CTL_REVERSE, GL_TRUE );
    CTLUniformi ( belem, CTL_H, h );
    COMPUTE ( gsize, 1, 1 );
    CTLUniformi ( belem, CTL_REVERSE, GL_FALSE );
    for ( h4 = h2 >> 1;  h2 > 1;  h2 = h4, h4 >>= 1 ) {
      CTLUniformi ( belem, CTL_H, h2 );
      COMPUTE ( gsize, 1, 1 );
    }
  }
  ExitIfGLError ( "NetSort" );
} /*NetSort*/

static void PrefixSum ( BalanceElements *belem, int n )
{
  GLint k, m, d;

  /*glUseProgram ( bprog_id[1] );*/  /* b3.comp */
  CTLUniformi ( belem, CTL_STAGE, 5 );
  CTLUniformi ( belem, CTL_N, n );
  d = n/2;
  for ( k = 0, m = n-1;  m > 0;  k++, m >>= 1 ) {
    CTLUniformi ( belem, CTL_STEP, k );
    COMPUTE ( d, 1, 1 )
  }
  ExitIfGLError ( "PrefixSum" );
} /*PrefixSum*/

static void TexturePatchVertices ( BalanceElements *belem )
{
  BalanceObject *bobj;
  GLuint        bfbo, mbuf;
  GLint         *buf;
  int           i, w, h, ntr, nelem, nmacroelem;

        /* przygotuj rysowanie trojkatow na teksturze roboczej */
        /* przygotuj bufory i tekstury robocze */
  w = belem->irrtxt_width;  h = belem->irrtxt_height;  ntr = belem->ntr;
  BUF_VARBUF = NewStorageBuffer ( w*h*4*sizeof(GLuint), 0 );
  BUF_VMAP = NewStorageBuffer ( w*h*2*sizeof(GLuint), 9 );
        /* przygotuj bufor ramki */
  glGenFramebuffers ( 1, &bfbo );
  glBindFramebuffer ( GL_DRAW_FRAMEBUFFER, bfbo );
  glFramebufferParameteri ( GL_DRAW_FRAMEBUFFER,
                            GL_FRAMEBUFFER_DEFAULT_WIDTH, w );
  glFramebufferParameteri ( GL_DRAW_FRAMEBUFFER,
                            GL_FRAMEBUFFER_DEFAULT_HEIGHT, h );
  if ( glCheckFramebufferStatus ( GL_DRAW_FRAMEBUFFER ) !=
         GL_FRAMEBUFFER_COMPLETE )
    ExitOnError ( "TexturePatchVertices 0" );
        /* rysuj trojkaty */
  CTLUniformi ( belem, CTL_WIDTH, belem->irrtxt_width );
  CTLUniformi ( belem, CTL_HEIGHT, belem->irrtxt_height );
  glViewport ( 0, 0, w, h );
  glUseProgram ( bprog_id[1] );  /* b3.comp */
  CTLUniformi ( belem, CTL_STAGE, 0 );  /* kasuj BUF_VARBUF i BUF_VMAP */
  COMPUTE ( w*h, 1, 1 );
          /* przeslij numery materialow trojkatow do bufora */
  BUF_MATBUF = NewStorageBuffer ( ntr*sizeof(GLuint), 11 );
  if ( !(buf = malloc ( ntr*sizeof(GLint) )) )
    ExitOnError ( "TexturePatchVertices 1" );
  for ( i = 0; i < ntr; i++ )
    buf[i] = belem->trdesc[i].matnum;
  glBufferSubData ( GL_SHADER_STORAGE_BUFFER, 0, ntr*sizeof(GLint), buf );
  free ( buf );
  BUF_ABUF = NewStorageBuffer ( w*h*4*sizeof(GLfloat), 6 );
  glUseProgram ( bprog_id[0] );  /* b3.vert, b3.geom, b3.frag */
  for ( i = 0; i < belem->nobj; i++ ) {
    bobj = &belem->objtab[i];
    CTLUniformi ( belem, CTL_FIRST, bobj->ftrdesc );
    CTLUniformi ( belem, CTL_TXTS, bobj->txts );
    glPolygonMode ( GL_FRONT_AND_BACK, GL_LINE );
    glDrawElements ( GL_TRIANGLES, 3*bobj->cntr, GL_UNSIGNED_INT,
                     (GLvoid*)(bobj->ftrdesc*3*sizeof(GLuint)) );
    glFlush ();
    glPolygonMode ( GL_FRONT_AND_BACK, GL_FILL );
    glDrawElements ( GL_TRIANGLES, 3*bobj->cntr, GL_UNSIGNED_INT,
                     (GLvoid*)(bobj->ftrdesc*3*sizeof(GLuint)) );
    glFlush ();
  }
  BUF_OBJID = NewStorageBuffer ( w*h*sizeof(GLuint), 8 );
  LoadPatchRectSizes ( belem );
  glUseProgram ( bprog_id[2] );  /* b4.vert, b4.frag */
  glBindVertexArray ( empty_vao );
  glDrawArraysInstanced ( GL_TRIANGLE_STRIP, 0, 4, belem->ntrpatch );
  glFlush ();
  ExitIfGLError ( "TexturePatchVertices 2" );
        /* posprzataj po rysowaniu */
  glBindVertexArray ( 0 );
  glBindFramebuffer ( GL_DRAW_FRAMEBUFFER, 0 );
  glDeleteFramebuffers ( 1, &bfbo );
        /* przefiltruj elementy */
          /* wierzcholki w ukladach modeli */
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 1, BUF_VERTPOS );
          /* wspolrzedne wierzcholkow na teksturze */
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 2, BUF_TXB );
          /* indeksy wierzcholkow trojkatow */
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 3, BUF_ELIND );
          /* punkty srodkowe elementow w ukladach modeli */
  BUF_TBUF = NewStorageBuffer ( w*h*4*sizeof(GLfloat), 4 );
  glUseProgram ( bprog_id[1] );  /* b3.comp */
  CTLUniformi ( belem, CTL_STAGE, 1 );  /* ProcessPixel */
  COMPUTE ( w, h, 1 );
        /* policz elementy */
  belem->nelem = nelem = FindNElem ( belem, w*h );
  if ( !(belem->VarBuf = malloc ( (nelem*5+w*h*2)*sizeof(GLuint) )) )
    ExitOnError ( "TexturePatchVertices 2" );
  belem->ObjIdBuf = &belem->VarBuf[nelem*4];
  belem->VarMap = &belem->ObjIdBuf[nelem];
  NetSort ( belem, w*h );
        /* ponumeruj makroelementy */
  CTLUniformi ( belem, CTL_STAGE, 4 );
  COMPUTE ( nelem, 1, 1 );
  PrefixSum ( belem, nelem );
  glGetNamedBufferSubData ( BUF_VARBUF, 0, nelem*4*sizeof(GLuint),
                            belem->VarBuf );
  glGetNamedBufferSubData ( BUF_VARBUF, (nelem-1)*4*sizeof(GLuint),
                            sizeof(GLuint), &nmacroelem );
  belem->nmacroelem = ++nmacroelem;

  mbuf = NewStorageBuffer ( nelem*sizeof(GLuint), 7 );
  CTLUniformi ( belem, CTL_STAGE, 6 );
  COMPUTE ( nelem, 1, 1 );
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 8, 0 );
  glDeleteBuffers ( 1, &BUF_OBJID );
  BUF_OBJID = mbuf;
  glGetNamedBufferSubData ( BUF_VARBUF, 0, nelem*4*sizeof(GLuint), 
                            belem->VarBuf );
  glGetNamedBufferSubData ( BUF_OBJID, 0, nelem*sizeof(GLuint),
                            belem->ObjIdBuf );
/*
printf ( "irradiance texture size: %d x %d\n", w, h );
printf ( "\n#triangles = %d\n", ntr );
printf ( "#patches = %d\n", belem->ntrpatch );
printf ( "#macroelements = %d\n", nmacroelem );
printf ( "#elements = %d, %c wykorzystania %7.2f\n\n", nelem,
         '%', 100.0*(float)nelem/(float)(w*h) );
*/
  ExitIfGLError ( "TexturePatchVertices" );
} /*TexturePatchVertices*/

static void ComputeMatricesDandA ( BalanceElements *belem )
{
  int nelem, nmacroelem;

  nelem = belem->nelem;  nmacroelem = belem->nmacroelem;
        /* utworz macierz albedo elementow */
  BUF_ALBMAT = NewStorageBuffer ( nelem*4*sizeof(GLfloat), 7 );
  BUF_CP = NewStorageBuffer ( nelem*4*sizeof(GLfloat), 5 );
  glUseProgram ( bprog_id[5] );    /* b5.comp, rysuj na vmap */
  COMPUTE ( nelem, 1, 1 );
        /* utworz macierz usredniania makroelementow */
  belem->avgmat.m = nmacroelem;  belem->avgmat.n = belem->avgmat.nnz = nelem;
  belem->avgmat.lmax = 0;
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 4, BUF_CP );
  belem->avgmat.buf[0] =
      NewStorageBuffer ( (nelem+nmacroelem+1)*sizeof(GLuint), 5 );
  belem->avgmat.buf[1] = NewStorageBuffer ( nelem*sizeof(GLfloat), 6 );
  glUseProgram ( bprog_id[3] );  /* b4.comp */
  CTLUniformi ( belem, CTL_NROWS, nmacroelem );
  CTLUniformi ( belem, CTL_NCOLS, nelem );
  CTLUniformi ( belem, CTL_STAGE, 0 );
  COMPUTE ( nelem, 1, 1 );
  CTLUniformi ( belem, CTL_STAGE, 1 );
  COMPUTE ( nmacroelem, 1, 1 );
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 8, BUF_OBJID );
  glUseProgram ( 0 );
  ExitIfGLError ( "ComputeMatricesDandA" );
} /*ComputeMatricesDandA*/

static void MapTrianglePatches ( BalanceElements *belem, int nobj )
{
  SceneObject     *obj, *inst;
  BalanceObject   *bobj, *binst;
  float           *vertpos, *tnv, *txb, *txc;
  TriangDesc      *trdesc;
  TriangPatch     *trpatch;
  TrSetTreeVertex *tstv;
  int             nvert, ntr, ntrpatch, root;
  int             i, j, k;
  char            *flip;

        /* policz wierzcholki, trojkaty i zespoly trojkatow */
  belem->nobj = nobj;
  for ( i = nvert = ntr = ntrpatch = 0; i < nobj; i++ ) {
    nvert += belem->objtab[i].cnvert;
    ntr += belem->objtab[i].cntr;
    ntrpatch += belem->objtab[i].ntrpatch;
  }
/*  printf ( "nobj = %d, nvert = %d, ntr = %d, ntrpatch = %d\n",
           nobj, nvert, ntr, ntrpatch );*/
        /* zbuduj wspolne tablice dla wszystkich obiektow */
  if ( !(belem->vertpos = vertpos = malloc ( 7*nvert*sizeof(GLfloat) )) ||
       !(belem->trdesc = trdesc = malloc ( ntr*sizeof(TriangDesc) )) ||
       !(belem->trpatch = trpatch = malloc ( ntrpatch*sizeof(TriangPatch) )) )
    ExitOnError ( "MapTrianglePatches 0" );
  belem->txc = txc = &belem->vertpos[3*nvert];
  belem->txb = txb = &txc[2*nvert];
  nvert = ntr = ntrpatch = 0;
  for ( k = nobj-1; k >= 0; k-- ) {
    bobj = &belem->objtab[k];  obj = bobj->obj;
    for ( inst = bobj->obj; inst->previnst; inst = inst->previnst )
      ;
    binst = inst->bobj;
    memcpy ( &vertpos[3*nvert], binst->vertpos, 3*bobj->cnvert*sizeof(GLfloat) );
    memcpy ( &txc[2*nvert], binst->txc, 2*bobj->cnvert*sizeof(GLfloat) );
    memcpy ( &txb[2*nvert], binst->txb, 2*bobj->cnvert*sizeof(GLfloat) );
    memcpy ( &trdesc[ntr], binst->trdesc, bobj->cntr*sizeof(TriangDesc) );
    memcpy ( &trpatch[ntrpatch], bobj->trpatch, bobj->ntrpatch*sizeof(TriangPatch) );
    if ( !obj->previnst ) {
      free ( bobj->vertpos );
      free ( bobj->trdesc );
      free ( bobj->txb );
    }
    free ( bobj->trpatch );
    bobj->ftrdesc = ntr;
    bobj->vertpos = vertpos;
    bobj->txc = txc;
    bobj->txb = txb;
    bobj->trdesc = trdesc;
    bobj->trpatch = &trpatch[ntrpatch];
    for ( i = 0; i < bobj->ntrpatch; i++ )
      bobj->trpatch[i].ftr += ntr;
    for ( i = 0; i < bobj->cntr; i++ )
      for ( j = 0; j < 3; j++ )
        trdesc[ntr+i].ind[j] += nvert;
    nvert += bobj->cnvert;  ntr += bobj->cntr;  ntrpatch += bobj->ntrpatch;
  }
  belem->ntrpatch = ntrpatch;
  belem->ntr = ntr;
  belem->nvert = nvert;
        /* zbuduj drzewo zespolow trojkatow */
  if ( !(tstv = malloc ( (2*ntrpatch-1)*sizeof(TrSetTreeVertex) )) ||
       !(flip = malloc ( ntrpatch*sizeof(char) )) )
    ExitOnError ( "MapTrianglePatches 1" );
  root = _BuildTrSetTree ( tstv, nvert, vertpos, ntr, trdesc, ntrpatch, trpatch );
        /* przydziel polozenia zespolom trojkatow na teksturach roboczych */
  memset ( flip, 0, ntrpatch*sizeof(char) );
  _PositionTriangPatches ( tstv, root, flip );
  belem->irrtxt_width = tstv[root].w;  belem->irrtxt_height = tstv[root].h;
  free ( tstv );
        /* oblicz wsp"olrzedne wierzcholkow na roboczych teksturach */
  MapPatchVertices ( belem, flip );
  free ( flip );
  SetupBElemVAO ( belem );
  TexturePatchVertices ( belem );
  ComputeMatricesDandA ( belem );
        /* utworz bufor z wektorami normalnymi trojkatow */
  if ( !(tnv = malloc ( ntr*4*sizeof(GLfloat) )) )
    ExitOnError ( "MapTrianglePatches 1" );
  BUF_TRNV = NewStorageBuffer ( ntr*4*sizeof(GLfloat), 10 );
  memset ( tnv, 0, ntr*4*sizeof(GLfloat) );
  for ( i = j = 0;  i < ntr;  i++, j += 4 )
    memcpy ( &tnv[j], belem->trdesc[i].nvect, 3*sizeof(GLfloat) );
  glBufferSubData ( GL_SHADER_STORAGE_BUFFER, 0, ntr*4*sizeof(GLfloat), tnv );
  free ( tnv );
} /*MapTrianglePatches*/

/* ////////////////////////////////////////////////////////////////////////// */
static char CalcTrNormal ( GLfloat normal[3], GLfloat vpos[][3],
                           GLuint ind[3] )
{
  float v1[3], v2[3];

  V3Subtractf ( v1, vpos[ind[1]], vpos[ind[0]] );
  V3Subtractf ( v2, vpos[ind[2]], vpos[ind[0]] );
  V3CrossProductf ( normal, v1, v2 );
  return fabs ( normal[0] ) + fabs ( normal[1] ) + fabs ( normal[2] ) > MNNVECT;
} /*CalcTrNormal*/

static void _EnterTriangles ( BalanceElements *belem, BalanceObject *bobj,
                              int nvert, GLfloat vpos[][3], GLfloat txc[][2],
                              int ntr, GLuint *index, GLint matnum,
                              char onepatch )
{
  SceneObject *obj;
  TriangDesc  *trdesc;
  int         i, j, cnvert, cntr;

  obj = bobj->obj;
  if ( (bobj->cnvert + nvert > obj->nvert) ||
       (bobj->cntr + ntr > obj->ntr) )
    ExitOnError ( "_EnterTriagles" );
  cnvert = bobj->cnvert;  cntr = bobj->cntr;
  trdesc = bobj->trdesc;
  if ( nvert > 0 )
    memcpy ( &bobj->vertpos[3*cnvert], vpos, 3*nvert*sizeof(GLfloat) );
  if ( txc )
    memcpy ( &bobj->txc[2*cnvert], txc, 2*nvert*sizeof(GLfloat) );
  else
    memset ( &bobj->txc[2*cnvert], 0, 2*nvert*sizeof(GLfloat) );
  for ( i = j = 0;  i < ntr;  i++ ) {
    memcpy ( &trdesc[cntr+j].ind, &index[3*i], 3*sizeof(GLuint) );
    trdesc[cntr+j].matnum = matnum;
    trdesc[cntr+j].trp = onepatch ? bobj->ntrpatch : RESTART_IND_UINT;
    if ( CalcTrNormal ( trdesc[cntr+j].nvect, (GLfloat(*)[3])bobj->vertpos,
                        trdesc[cntr+j].ind ) )
      j ++;
  }
  if ( onepatch )
    bobj->ntrpatch ++;
  bobj->cnvert += nvert;
  bobj->cntr += j;
} /*_EnterTriangles*/

static GLuint *_ExpandIndices ( int nind, GLenum itype, GLvoid *ind )
{
  GLuint   *idx;
  GLubyte  *idxb;
  GLushort *idxs;
  int      i;

  if ( (idx = malloc ( nind*sizeof(GLuint) )) ) {
    if ( ind )
      switch ( itype ) {
    case GL_UNSIGNED_BYTE:
        idxb = (GLubyte*)ind;
        for ( i = 0; i < nind; i++ )
          idx[i] = idxb[i] != RESTART_IND_UBYTE ? idxb[i] : RESTART_IND_UINT;
        break;
    case GL_UNSIGNED_SHORT:
        idxs = (GLushort*)ind;
        for ( i = 0; i < nind; i++ )
          idx[i] = idxs[i] != RESTART_IND_USHORT ? idxs[i] : RESTART_IND_UINT;
        break;
    case GL_UNSIGNED_INT:
        memcpy ( idx, ind, nind*sizeof(GLuint) );
        break;
    default:
        ExitOnError ( "_ExpandIndices 0" );
      }
    else
      for ( i = 0; i < nind; i++ )
        idx[i] = i;
  }
  else
    ExitOnError ( "_ExpandIndices 1" );
  return idx;
} /*_ExpandIndices*/

/* ////////////////////////////////////////////////////////////////////////// */
static int _TriangNClass ( TriangDesc *trd )
{
  float d, dmax;
  int   i, j;

  if ( trd->trp != RESTART_IND_UINT )
    return NCVECT+trd->trp;
  else {
    dmax = V3DotProductf ( trd->nvect, cvect[j = 0] );
    for ( i = 1; i < NCVECT; i++ )
      if ( (d = V3DotProductf ( trd->nvect, cvect[i])) > dmax )
        { j = i;  dmax = d; }
    return j;
  }
} /*_TriangNClass*/

static int _FindID ( int *ident, int e )
{
  if ( e != ident[e] )
    ident[e] = _FindID ( ident, ident[e] );
  return ident[e];
} /*_FindID*/

static void _Union ( int *ident, int *rank, int e1, int e2 )
{
  int i1, i2;

  i1 = _FindID ( ident, e1 );
  i2 = _FindID ( ident, e2 );
  if ( i1 != i2 ) {
    if ( rank[i1] < rank[i2] )
      ident[i1] = i2;
    else {
      ident[i2] = i1;
      if (rank[i2] == rank[i1] )
        rank[i1] ++;
    }
  }
} /*_Union*/

static void _FindUnions ( int nv, int *ident, int *rank, int *tag,
                          TriangDesc *trdesc, int *tcount, int *tclass, int j )
{
  int i, k;

  for ( i = 0; i < nv; i++ )
    { ident[i] = i;  rank[i] = 1; }
  for ( i = tcount[j]; i < tcount[j+1]; i++ ) {
    k = tclass[i];
    _Union ( ident, rank, trdesc[k].ind[0], trdesc[k].ind[1] );
    _Union ( ident, rank, trdesc[k].ind[0], trdesc[k].ind[2] );
    if ( tag )
      tag[trdesc[k].ind[0]] = tag[trdesc[k].ind[1]] = tag[trdesc[k].ind[2]] = j;
  }
} /*_FindUnions*/

typedef struct {
    TriangDesc *trdesc;
    int        *ident;
  } _QSTrData;

static char _TrLess ( void *data, int i, int j )
{
  _QSTrData *d;

  d = (_QSTrData*)data;
  return d->ident[d->trdesc[i].ind[0]] < d->ident[d->trdesc[j].ind[0]];
} /*_TrLess*/

static void _TrSwap ( void *data, int i, int j )
{
  _QSTrData  *d;
  TriangDesc tmp;

  if ( i != j ) {
    d = (_QSTrData*)data;
    tmp = d->trdesc[i];  d->trdesc[i] = d->trdesc[j];  d->trdesc[j] = tmp;
  }
} /*_TrLess*/

static void _SortObjTriangles ( BalanceElements *belem, BalanceObject *obj,
                                int objid, int *tcount, int *tclass )
{
  GLfloat      *vertpos, *txc, *newvertpos, *newtxc;
  TriangDesc   *trdesc, *newtrdesc;
  TriangPatch  *trpatch;
  int          *tclass0, *ident, *rank, *tag, *newvertind, *newident, *nvclass;
  char         *setb;
  int          nv, ntr, newnv, ntrpatch;
  int          i, j, k, l, m, t, p;
  _QSTrData    d;

  nv = obj->cnvert;  ntr = obj->cntr;
  vertpos = obj->vertpos;  txc = obj->txc;  trdesc = obj->trdesc;

  if ( !(tclass0 = malloc ( ntr*sizeof(int) )) )
    ExitOnError ( "_SortObjTriangles 0" );
  memset ( tcount, 0, (NCVECT+1+obj->ntrpatch)*sizeof(int) );
  for ( i = 0;  i < ntr;  i++ ) {
    tclass0[i] = _TriangNClass ( &trdesc[i] );
    tcount[tclass0[i]] ++;
  }
  for ( i = 1; i <= NCVECT+obj->ntrpatch; i++ )
    tcount[i] += tcount[i-1];
  for ( i = ntr-1; i >= 0; i-- )
    tclass[--tcount[tclass0[i]]] = i;
  free ( tclass0 );

  if ( !(ident = malloc ( (4*nv+NCVECT+1+obj->ntrpatch)*sizeof(int) )) )
    ExitOnError ( "_SortObjTriangles 1" );
  rank = &ident[nv];  tag = &rank[nv];  newvertind = &tag[nv];
  nvclass = &newvertind[nv];
  for ( i = 0; i < nv; i++ )
    tag[i] = -1;
  nvclass[0] = 0;
  for ( j = 0; j < NCVECT; j++ ) {
    nvclass[j+1] = nvclass[j];
    if ( tcount[j+1] > tcount[j] ) {
      _FindUnions ( nv, ident, rank, tag, trdesc, tcount, tclass, j );
      for ( i = 0; i < nv; i++ )
        if ( tag[i] == j )
          nvclass[j+1] ++;
    }
  }
  for ( ; j < NCVECT+obj->ntrpatch; j++ ) {
    nvclass[j+1] = nvclass[j];
    for ( i = tcount[j]; i < tcount[j+1]; i++ ) {
      k = tclass[i];
      for ( l = 0; l < 3; l++ ) {
        p = trdesc[k].ind[l];
        if ( tag[p] != j ) {
          tag[p] = j;
          nvclass[j+1] ++;
        }
      }
    }
  }
  obj->cnvert = newnv = nvclass[NCVECT+obj->ntrpatch];
  if ( !(newvertpos = malloc ( newnv*5*sizeof(GLfloat) )) ||
       !(newtrdesc = malloc ( ntr*sizeof(TriangDesc) )) ||
       !(newident = malloc ( newnv*sizeof(int) )) ||
       !(setb = malloc ( ntr*sizeof(char))) )
    ExitOnError ( "_SortObjTriangles 2" );
  obj->vertpos = newvertpos;
  obj->txc = newtxc = &newvertpos[3*newnv];
  obj->trdesc = newtrdesc;
  memset ( setb, false, ntr*sizeof(char) );
  for ( i = 0; i < nv; i++ )
    tag[i] = -1;
  for ( j = m = t = ntrpatch = 0;  j < NCVECT;  j++ ) {
    if ( tcount[j+1] > tcount[j] ) {
      _FindUnions ( nv, ident, rank, NULL, trdesc, tcount, tclass, j );
      for ( i = 0; i < nv; i++ )
        ident[i] = _FindID ( ident, ident[i] );
      for ( i = tcount[j];  i < tcount[j+1];  i++, t ++ ) {
        newtrdesc[t] = trdesc[k = tclass[i]];
        for ( l = 0; l < 3; l++ ) {
          p = trdesc[k].ind[l];
          if ( tag[p] != j ) {
            tag[p] = j;
            memcpy ( &newvertpos[3*m], &vertpos[3*p], 3*sizeof(GLfloat) );
            memcpy ( &newtxc[2*m], &txc[2*p], 2*sizeof(GLfloat) );
            newident[m] = ident[p];
            newvertind[p] = m ++;
          }
          newtrdesc[t].ind[l] = newvertind[p];
        }
      }
      if ( tcount[j+1]-tcount[j] > 1 ) {
        d.trdesc = &newtrdesc[tcount[j]];
        d.ident = newident;
        QuickSort ( (void*)&d, tcount[j+1]-tcount[j], _TrLess, _TrSwap );
      }
      ntrpatch ++;
      setb[tcount[j]] = true;
      l = newident[newtrdesc[tcount[j]].ind[0]];
      for ( i = tcount[j]+1; i < tcount[j+1]; i++ ) {
        k = newident[newtrdesc[i].ind[0]];
        if ( k != l ) {
          ntrpatch ++;
          setb[i] = true;
        }
        l = k;
      }
    }
  }
  for ( ; j < NCVECT+obj->ntrpatch; j++ ) {
    for ( i = tcount[j];  i < tcount[j+1];  i++, t++ ) {
      newtrdesc[t] = trdesc[k = tclass[i]];
      for ( l = 0; l < 3; l++ ) {
        p = trdesc[k].ind[l];
        if ( tag[p] != j ) {
          tag[p] = j;
          memcpy ( &newvertpos[3*m], &vertpos[3*p], 3*sizeof(GLfloat) );
          memcpy ( &newtxc[2*m], &txc[2*p], 2*sizeof(GLfloat) );
          newident[m] = ident[p];
          newvertind[p] = m ++;
        }
        newtrdesc[t].ind[l] = newvertind[p];
      }
    }
    ntrpatch ++;
    setb[tcount[j]] = true;
  } 
  if ( !(trpatch = malloc ( ntrpatch*sizeof(TriangPatch) )) )
    ExitOnError ( "_SortObjTriangles 3" );
  memset ( trpatch, 0, ntrpatch*sizeof(TriangPatch) );
  obj->trpatch = trpatch;
  for ( j = i = p = 0;  j < NCVECT+obj->ntrpatch;  j++ )
    if ( tcount[j+1] > tcount[j] ) {
      for ( i = tcount[j]; i < tcount[j+1]; i++ )
        if ( setb[i] ) {
          trpatch[p].nvclass = j;
          trpatch[p].ftr = i;
          trpatch[p].objid = objid;
          p ++;
        }
    }
  obj->ntrpatch = ntrpatch;
  for ( i = 0; i < ntrpatch-1; i++ )
    trpatch[i].ntr = trpatch[i+1].ftr - trpatch[i].ftr;
  trpatch[ntrpatch-1].ntr = ntr - trpatch[ntrpatch-1].ftr;
  free ( vertpos );
  free ( trdesc );
  free ( ident );
  free ( newident );
  free ( setb );
} /*_SortObjTriangles*/

/* ////////////////////////////////////////////////////////////////////////// */
void EnterTriangles ( BalanceElements *belem, GLenum mode,
                      int nvert, GLfloat vpos[][3], GLfloat txc[][2],
                      int nind, GLenum itype, GLvoid *index, GLint matnum,
                      char onepatch )
{
  BalanceObject *bobj;
  GLuint        *idx0, *idx1;
  int           i, j, k, ntr;

  bobj = belem->current;
  switch ( mode ) {
case GL_TRIANGLES:
    idx1 = _ExpandIndices ( nind, itype, index );
    ntr = nind/3;
    break;
case GL_TRIANGLE_STRIP:
    if ( !(idx1 = malloc ( 3*(nind-2)*sizeof(GLuint) )) )
      ExitOnError ( "EnterTriangles 0" );
    idx0 = _ExpandIndices ( nind, itype, index );
    for ( i = 2, j = k = 0;  i < nind;  i++, k++ ) {
      while ( idx0[i-2] == RESTART_IND_UINT && i < nind )
        i ++, k = 0;
      if ( idx0[i-1] == RESTART_IND_UINT || idx0[i] == RESTART_IND_UINT ) {
        i += 2, k = 0;
        continue;
      }
      idx1[j++] = idx0[i-2];
      if ( !(k & 0x01) ) { idx1[j++] = idx0[i-1];  idx1[j++] = idx0[i]; }
                    else { idx1[j++] = idx0[i];  idx1[j++] = idx0[i-1]; }
    }
    free ( idx0 );
    ntr = j/3;
    break;
case GL_TRIANGLE_FAN:
    if ( !(idx1 = malloc ( 3*(nind-2)*sizeof(GLuint) )) )
      ExitOnError ( "EnterTriangles 1" );
    idx0 = _ExpandIndices ( nind, itype, index );
    for ( i = 2, j = 0;  i < nind;  i++ ) {
      idx1[j++] = idx0[0];  idx1[j++] = idx0[i-1];  idx1[j++] = idx0[i];
    }
    free ( idx0 );
    ntr = j/3;
    break;
default:
    ExitOnError ( "EnterTriangles 2" );
    break;
  }
  _EnterTriangles ( belem, bobj, nvert, vpos, txc, ntr, idx1, matnum, onepatch );
  free ( idx1 );
} /*EnterTriangles*/

BalanceObject *BeginEnteringObjTriangles ( BalanceElements *belem,
                                           SceneObject *obj, GLint txts,
                                           int nvert, int ntr, float eld )
{
  BalanceObject *bobj;

  if ( belem->nobj >= MAX_OBJECTS )
    ExitOnError ( "BeginEnteringObjTriangles 0" );
  if ( belem->current )
    ExitOnError ( "BeginEnteringObjTriangles 1");
  bobj = belem->current = &belem->objtab[belem->nobj];
  bobj->obj = obj;  obj->bobj = bobj;
  if ( !(bobj->vertpos = malloc ( (nvert*5+ntr*3)*sizeof(GLfloat) )) ||
       !(bobj->trdesc = malloc ( ntr*sizeof(TriangDesc) )) )
    ExitOnError ( "BeginEnteringObjTriangles 2" );
  obj->previnst = NULL;
  obj->nvert = nvert;
  obj->ntr = ntr;
  bobj->txts = txts;
  bobj->txc = &bobj->vertpos[3*nvert];
  bobj->cnvert = bobj->cntr = bobj->ntrpatch = 0;
  bobj->eld = eld;
  return bobj;
} /*BeginEnteringObjTriangles*/

void EndEnteringObjTriangles ( BalanceElements *belem )
{
  int           *tcount, *tclass;
  BalanceObject *bobj;

  if ( !(bobj = belem->current) )
    ExitOnError ( "EndEnteringObjTriangles 0" );;
  if ( !(tcount = malloc ( (NCVECT+1+bobj->ntrpatch+bobj->cntr)*sizeof(int) )) )
    ExitOnError ( "EndEnteringObjTriangles 1" );
  tclass = &tcount[NCVECT+1+bobj->ntrpatch];
  _SortObjTriangles ( belem, bobj, belem->nobj, tcount, tclass );
  free ( tcount );
  ProcessObjTriangPatches ( belem, bobj );
  bobj->obj->previnst = NULL;
  belem->nobj ++;
  belem->current = NULL;
} /*EndEnteringObjTriangles*/

void EnterNewObjElemInstance ( BalanceElements *belem, SceneObject *inst )
{
  SceneObject   *obj;
  BalanceObject *bobj, *pbobj;
  int           i, ntrpatch;

  if ( belem->nobj >= MAX_OBJECTS )
    ExitOnError ( "EnterNewObjElemInstance 0" );
  if ( !(obj = inst->previnst) )
    ExitOnError ( "EnterNewObjElemInstance 1" );
  pbobj = obj->bobj;
  bobj = &belem->objtab[belem->nobj];
  *bobj = *pbobj;
  bobj->obj = inst;
  if ( !(bobj->trpatch = malloc ( bobj->ntrpatch*sizeof(TriangPatch) )) )
    ExitOnError ( "EnterNewObjElemInstance 2" );
  ntrpatch = pbobj->ntrpatch;
  memcpy ( bobj->trpatch, pbobj->trpatch, ntrpatch*sizeof(TriangPatch) );
  for ( i = 0; i < ntrpatch; i++ )
    bobj->trpatch[i].objid = belem->nobj;
  belem->nobj ++;
} /*EnterNewObjElemInstance*/

void EnterObjectCopy ( BalanceElements *belem,
                       SceneObject *object, SceneObject *copy )
{
  *copy = *object;
  copy->previnst = object;
  EnterNewObjElemInstance ( belem, copy );
} /*EnterObjectCopy*/

void DestroySceneObject ( SceneObject *obj )
{
  if ( !obj->previnst ) {
    glDeleteVertexArrays ( 1, &obj->vao );
    glDeleteBuffers ( 3, obj->vbo );
  }
} /*DestroySceneObject*/

void BeginEnteringBalanceElements ( BalanceElements *belem,
                                    float ffnear, float fffar )
{
  memset ( belem, 0, sizeof(BalanceElements) );
  belem->nobj = 0;
  BUF_UCTL = NewUniformCTLBlock ();
  belem->fftr.fftrbuf = NewUniformFFTransBlock ();
  belem->ffnear = ffnear;  belem->fffar = fffar;
} /*BeginEnteringTriangles*/

void EndEnteringBalanceElements ( BalanceElements *belem )
{
  if ( !belem->nobj || belem->current )
    ExitOnError ( "EndEnteringBalanceElements 0");
  MapTrianglePatches ( belem, belem->nobj );
  ComputeFormFactors ( belem );
  BUF_LE = NewStorageBuffer ( 4*belem->nelem*sizeof(GLfloat), 2 );
  BUF_L0 = NewStorageBuffer ( 4*belem->nelem*sizeof(GLfloat), 1 );
  BUF_L1 = NewStorageBuffer ( 4*belem->nmacroelem*sizeof(GLfloat), 1 );
  glGenTextures ( 1, &belem->irrtxt );
  glBindTexture ( GL_TEXTURE_RECTANGLE, belem->irrtxt );
  glTexStorage2D ( GL_TEXTURE_RECTANGLE, 1, GL_RGBA32F,
                   belem->irrtxt_width, belem->irrtxt_height );
  ExitIfGLError ( "EndEnteringBalanceElements" );
} /*EndEnteringTriangles*/

void DeleteBalanceElements ( BalanceElements *belem )
{
  if ( belem->vertpos ) free ( belem->vertpos );
  if ( belem->trdesc )  free ( belem->trdesc );
  if ( belem->trpatch ) free ( belem->trpatch );
  if ( belem->VarBuf )  free ( belem->VarBuf );
  glDeleteVertexArrays ( 1, &belem->tvao );
  glDeleteBuffers ( 1, &belem->fftr.fftrbuf );
  glDeleteBuffers ( NBEBUFFERS, belem->tvbo );
  glDeleteBuffers ( 2, belem->ffmat.buf );
  glDeleteBuffers ( 2, belem->avgmat.buf );
  glDeleteTextures ( 1, &belem->irrtxt );
} /*DeleteBalanceElements*/

/* ////////////////////////////////////////////////////////////////////////// */
static const GLfloat ccolour[26][3] =
  {{1.0,0.0,0.0},{0.7,0.0,0.0},{0.0,1.0,0.0},
   {0.0,0.7,0.0},{0.1,0.4,1.0},{0.05,0.3,0.7},
   {0.6,0.6,0.0},{0.8,0.8,0.0},{0.6,0.0,0.6},{0.8,0.0,0.8},
   {0.5,0.3,0.0},{0.3,0.5,0.0},{0.3,0.0,0.5},{0.5,0.0,0.3},
   {0.0,0.5,0.3},{0.0,0.3,0.5},{0.0,0.6,0.4},{0.0,0.4,0.6},
   {0.7,0.7,0.7},{0.6,0.6,0.6},{0.5,0.5,0.5},{0.4,0.4,0.4},
   {0.3,0.3,0.3},{0.2,0.2,0.2},{0.1,0.1,0.1},{0.8,0.8,0.8}};

#define NTRSC 1000

static GLfloat (*trscolour)[3] = NULL;

GLfloat RandomFloat ( void )
{
  float res;
  unsigned int tmp;   
  static unsigned int seed = 0xFFFF0C59;

  seed *= 16807;
  tmp = seed ^ (seed >> 4) ^ (seed << 15);
  *((unsigned int *) &res) = (tmp >> 9) | 0x3F800000;
  return res - 1.0;
} /*RandomFloat*/

static void GenTRSColours ( void )
{
  int i;

  if ( !trscolour ) {
    if ( !(trscolour = malloc ( NTRSC*3*sizeof(GLfloat) )) )
      ExitOnError ( "GenTRSColours" );
    for ( i = 0; i < NTRSC; i++ ) {
      trscolour[i][0] = 0.25+0.75*RandomFloat ();
      trscolour[i][1] = 0.25+0.75*RandomFloat ();
      trscolour[i][2] = 0.25+0.75*RandomFloat ();
    }
  }
} /*GenTRSColours*/

void DrawElemCPoints ( BalanceElements *belem, TransBl *trans, int el )
{
  int           i, k, l, nelem;
  BalanceObject *bobj;

  glUseProgram ( bprog_id[4] );  /* b5.vert, b5.frag */
  glBindVertexArray ( empty_vao );
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 0, BUF_VARBUF );
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 4, BUF_CP );
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 10, BUF_TRNV );
  glPointSize ( 3.0 );
  nelem = belem->nelem;
  for ( i = 0; i < belem->nobj; i++ ) {
    for ( k = 0; k < nelem; k++ )
      if ( belem->ObjIdBuf[k] == i )
        break;
    for ( l = nelem; l > k; l-- )
      if ( belem->ObjIdBuf[l-1] == i )
        break;
    if ( k >= l )
      continue;
    bobj = &belem->objtab[i];
    LoadMMatrix ( trans, bobj->obj->mm );
    if ( el < 0 ) {
      CTLUniformi ( belem, CTL_FIRST, k );
      glDrawArraysInstanced ( GL_LINES, 0, 2, l-k );
    }
    else if ( el >= k && el < l ) {
      CTLUniformi ( belem, CTL_FIRST, el );
      glDrawArraysInstanced ( GL_LINES, 0, 2, 1 );
    }
  }
  glBindVertexArray ( 0 );
  ExitIfGLError ( "DrawElemCPoints" );
} /*DrawElemCPoints*/

void DrawTrClass ( BalanceElements *belem, TransBl *trans, char ropt )
{
  int           i, j;
  BalanceObject *bobj;

  switch ( ropt ) {
case ROPT_TRCLASS1: i = 2;  break;
case ROPT_TRCLASS2: i = 3;  break;
case ROPT_TRCLASS3: i = 0;  break;
case ROPT_MACROEL:  i = 1;  break;
default: return;
  }
  CTLUniformi ( belem, CTL_TXTS, (GLint)i );
  glUseProgram ( bprog_id[18] );
  GenTRSColours ();
  glBindVertexArray ( belem->tvao );
  for ( j = 0; j < belem->nobj; j++ ) {
    bobj = &belem->objtab[j];
    LoadMMatrix ( trans, bobj->obj->mm );
    for ( i = 0; i < bobj->ntrpatch; i++ ) {
      switch ( ropt ) {
    case ROPT_TRCLASS1:
        if ( bobj->trpatch[i].nvclass < NCVECT )
          CTLUniform3fv ( belem, CTL_COLOUR, ccolour[bobj->trpatch[i].nvclass] );
        else
          CTLUniform3fv ( belem, CTL_COLOUR, trscolour[i % NTRSC] );
        break;
    case ROPT_TRCLASS2:
    case ROPT_TRCLASS3:
        CTLUniform3fv ( belem, CTL_COLOUR, trscolour[i % NTRSC] );
        break;
    case ROPT_MACROEL:
        glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 0, BUF_VARBUF );
        glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 9, BUF_VMAP );
        CTLUniform3fv ( belem, CTL_COLOUR, trscolour[i % NTRSC] );
        break;
      }
      glDrawElements ( GL_TRIANGLES, 3*bobj->trpatch[i].ntr, GL_UNSIGNED_INT,
                       (GLvoid*)(3*sizeof(GLuint)*bobj->trpatch[i].ftr) );
    }
  }
  glUseProgram ( 0 );
  glBindVertexArray ( 0 );
  ExitIfGLError ( "DrawTrClass" );
} /*DrawTrClass*/

/* ////////////////////////////////////////////////////////////////////////// */
void _GetElCPNV ( BalanceElements *belem, int i,
                  GLfloat cp[3], GLfloat nv[3] )
{
  GLuint k, t;

  k = 4*i;
  glGetNamedBufferSubData ( BUF_CP, k*sizeof(GLfloat), 3*sizeof(GLfloat), cp );
  t = belem->VarBuf[k+1];
  memcpy ( nv, &belem->trdesc[t].nvect, 3*sizeof(GLfloat) );
} /*GetElCPNV*/

/* ////////////////////////////////////////////////////////////////////////// */
void LoadFFViewports ( FFTransBl *fftrans )
{
  static const GLfloat viewports[NVIEWPORTS*4] =
    { 0.0,0.0,FS,FS, FS,0.0,FS,HS, FS,HS,FS,HS, 2*FS,0.0,FS,HS, 2*FS,HS,FS,HS,
      0.0,FS,FS,FS, FS,FS,FS,HS, FS,FS+HS,FS,HS, 2*FS,FS,FS,HS, 2*FS,FS+HS,FS,HS };

  glViewportArrayv ( 0, NVIEWPORTS, viewports );
} /*LoadFFViewports*/

static void ConstructFFPMatrices ( FFTransBl *fftrans, float near, float far )
{
  GLfloat pm[16];
  static const GLfloat rotm[2][16] =
    {{1.0,0.0,0.0,0.0, 0.0,0.0,1.0,0.0, 0.0,-1.0,0.0,0.0, 0.0,0.0,0.0,1.0},
     {0.0,1.0,0.0,0.0, -1.0,0.0,0.0,0.0, 0.0,0.0,1.0,0.0, 0.0,0.0,0.0,1.0}};
  float nf;

  nf = sqrt ( near*far );
        /* konstrukcja macierzy przeksztalcen perspektywicznych */
  M4x4Frustumf ( fftrans->pm[0], NULL,
                 -near/CF, near/CF, -near/CF, near/CF, near, nf );
  M4x4Frustumf ( pm, NULL, -near, near, 0.0, near*CF, near, nf );
  M4x4Multf ( fftrans->pm[1], pm, rotm[0] );
  M4x4Multf ( fftrans->pm[2], fftrans->pm[1], rotm[1] );
  M4x4Multf ( fftrans->pm[3], fftrans->pm[2], rotm[1] );
  M4x4Multf ( fftrans->pm[4], fftrans->pm[3], rotm[1] );
  M4x4Frustumf ( fftrans->pm[5], NULL,
                 -nf/CF, nf/CF, -nf/CF, nf/CF, nf, far );
  M4x4Frustumf ( pm, NULL, -nf, nf, 0.0, nf*CF, nf, far );
  M4x4Multf ( fftrans->pm[6], pm, rotm[0] );
  M4x4Multf ( fftrans->pm[7], fftrans->pm[6], rotm[1] );
  M4x4Multf ( fftrans->pm[8], fftrans->pm[7], rotm[1] );
  M4x4Multf ( fftrans->pm[9], fftrans->pm[8], rotm[1] );
} /*ConstructFFPMatrices*/

void M4x4ViewPVf ( GLfloat vm[16], float p[3], float v[3] )
{
  float w1[3], w2[3], gamma, t;

        /* konstrukcja przejscia do ukladu wspolrzednych, w ktorym p */
        /* jest poczatkiem, a v ma zwrot przeciwny do wersora osi z */
        /* to jest zlozenie przesuniecia z odbiciem Householdera */
  memset ( vm, 0, 16*sizeof(GLfloat) );
  gamma = sqrt ( V3DotProductf ( v, v ) );
  memcpy ( w1, v, 3*sizeof(float) );
  w1[2] += v[2] > 0.0 ? gamma : -gamma;
  gamma = 2.0 / V3DotProductf ( w1, w1 );
  t = gamma * V3DotProductf ( w1, p );
  vm[12] = t*w1[0]-p[0];  vm[13] = t*w1[1]-p[1];  vm[14] = t*w1[2]-p[2];
  w2[0] = gamma*w1[0];  w2[1] = gamma*w1[1];  w2[2] = gamma*w1[2];
  vm[0] = 1.0-w1[0]*w2[0];  vm[5] = 1.0-w1[1]*w2[1];
  vm[10] = 1.0-w1[2]*w2[2];  vm[1] = vm[4] = -w1[1]*w2[0];
  vm[2] = vm[8] = -w1[2]*w2[0];  vm[6] = vm[9] = -w1[1]*w2[2];
  vm[15] = 1.0;
  if ( v[2] <= 0.0 )
    { vm[2] = -vm[2];  vm[6] = -vm[6];  vm[10] = -vm[10];  vm[14] = -vm[14]; }
} /*M4x4ViewPVf*/

void LoadFFVPMatrices ( FFTransBl *fftrans, GLfloat cp[3], GLfloat nv[3] )
{
  GLfloat vm[16];
  int     i;

  M4x4ViewPVf ( vm, cp, nv );
  for ( i = 0; i < NVIEWPORTS; i++ )
    M4x4Multf ( fftrans->vpm[i], fftrans->pm[i], vm );
  glBindBufferBase ( GL_UNIFORM_BUFFER, fftrbbp, fftrans->fftrbuf );
  glBufferSubData ( GL_UNIFORM_BUFFER, fftrbofs[1],
                    NVIEWPORTS*16*sizeof(GLfloat), fftrans->vpm );
  ExitIfGLError ( "LoadFFVPMatrices" );
} /*LoadFFVPMatrices*/

/* ////////////////////////////////////////////////////////////////////////// */
static void ComputeFFWeights ( BalanceElements *belem, GLuint ffbuf[4] )
{
  int n;

  ffbuf[0] = NewStorageBuffer ( (FFBLOCKSIZE+1)*FFTXTPIX*sizeof(GLfloat), 3 );
  glUseProgram ( bprog_id[6] );  /* bff0.comp */
  CTLUniformf ( belem, CTL_C, CF );
  CTLUniformi ( belem, CTL_STAGE, 0 );
  COMPUTE ( FFTXTSIZE/2, FFTXTSIZE/2, 1 )
  CTLUniformi ( belem, CTL_STAGE, 1 );
  CTLUniformi ( belem, CTL_H, FFTXTPIX );
  COMPUTE ( FFTXTPIX, 1, 1 )
        /* sumowanie parami */
  CTLUniformi ( belem, CTL_STAGE, 2 );
  for ( n = FFTXTPIX; n > 1; n = (n+1)/2 ) {
    CTLUniformi ( belem, CTL_N, n );
    COMPUTE ( n/2, 1, 1 )
  }
        /* dzielenie przez sume */
  CTLUniformi ( belem, CTL_STAGE, 3 );
  CTLUniformi ( belem, CTL_N, FFTXTPIX );
  COMPUTE ( FFTXTPIX, 1, 1 )
  ExitIfGLError ( "ComputeFFWeights" );
} /*ComputeFFWeights*/

/* ////////////////////////////////////////////////////////////////////////// */
static void GPUComputeFF ( BalanceElements *belem, GLuint ffbuf[4],
                           int nseq, GPUSparseMatrix *bldesc )
{
  unsigned int k, m, d, nnz;

  glUseProgram ( bprog_id[9] );  /* bff3.comp */
  COMPUTE ( nseq, 1, 1 )
  glUseProgram ( bprog_id[10] );  /* bff4.comp */
  CTLUniformi ( belem, CTL_STAGE, 0 );
  COMPUTE ( nseq+1, 1, 1 );
  if ( nseq > 1 ) {
    CTLUniformi ( belem, CTL_STAGE, 1 );
    CTLUniformi ( belem, CTL_H, (GLuint)nseq );
    d = nseq/2;
    for ( k = 0, m = nseq;  m > 0;  k++, m >>= 1 ) {
      CTLUniformi ( belem, CTL_STEP, (GLint)k );
      COMPUTE ( d, 1, 1 );
    }
  }
  glGetNamedBufferSubData ( ffbuf[3], nseq*sizeof(GLuint),
                            sizeof(GLuint), &nnz );
  bldesc->nnz = nnz;
/*  printf ( "nnz = %d\n", nnz );*/
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 1, bldesc->buf[0] );
  glBufferData ( GL_SHADER_STORAGE_BUFFER, (nseq+1+nnz)*sizeof(GLuint),
                 NULL, GL_DYNAMIC_DRAW );
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 2, bldesc->buf[1] );
  glBufferData ( GL_SHADER_STORAGE_BUFFER, nnz*sizeof(GLfloat),
                 NULL, GL_DYNAMIC_DRAW );
  glCopyNamedBufferSubData ( ffbuf[3], bldesc->buf[0], 0, 0,
                             (nseq+1)*sizeof(GLuint) );
  CTLUniformi ( belem, CTL_STAGE, 2 );
  COMPUTE ( nseq+1, FFTXTPIX, 1 );
  CTLUniformi ( belem, CTL_STAGE, 3 );
  CTLUniformi ( belem, CTL_N, FFTXTPIX*nseq );
  COMPUTE ( nnz, 1, 1 );
  ExitIfGLError ( "GPUComputeFF" );
} /*GPUComputeFF*/

static GLuint PrepareFFFramebuffer ( BalanceElements *belem )
{
  GLuint       fffbo;
  const GLenum buffers = GL_COLOR_ATTACHMENT0;

  glGenTextures ( 2, belem->fftxt );
  glActiveTexture ( GL_TEXTURE0+47 );
  glBindTexture ( GL_TEXTURE_2D, belem->fftxt[0] );
  glTexStorage2D ( GL_TEXTURE_2D, 1, GL_R32UI,
                   3*FFTXTSIZE, 2*FFTXTSIZE );
  glBindTexture ( GL_TEXTURE_2D, belem->fftxt[1] );
  glTexStorage2D ( GL_TEXTURE_2D, 1, GL_DEPTH_COMPONENT32F,
                   3*FFTXTSIZE, 2*FFTXTSIZE );
  glGenFramebuffers ( 1, &fffbo );
  glBindFramebuffer ( GL_DRAW_FRAMEBUFFER, fffbo );
  glFramebufferTexture ( GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
                         belem->fftxt[0], 0 );
  glFramebufferTexture ( GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,
                         belem->fftxt[1], 0 );
  glDrawBuffers ( 1, &buffers );
  glBindTexture ( GL_TEXTURE_2D, 0 );
  if ( glCheckFramebufferStatus ( GL_DRAW_FRAMEBUFFER ) !=
         GL_FRAMEBUFFER_COMPLETE )
    ExitOnError ( "PrepareFFFramebuffer" );
  ExitIfGLError ( "PrepareFFFramebuffer" );
  return fffbo;
} /*PrepareFFFramebuffer*/

static void LoadFFMMatrix ( GLuint bp, GLint *ofs,
                            FFTransBl *fftrans, GLfloat mm[16] )
{
  memcpy ( fftrans->mm, mm, 16*sizeof(GLfloat) );
  glBindBufferBase ( GL_UNIFORM_BUFFER, bp, fftrans->fftrbuf );
  glBufferSubData ( GL_UNIFORM_BUFFER, ofs[0], 16*sizeof(GLfloat), mm );
} /*LoadFFMMatrix*/

void DrawViewFromElem ( BalanceElements *belem, GLuint prog_id0, GLuint prog_id1,
                        GLuint bp, GLint *ofs,
                        int j, GLfloat elcp[3], GLfloat elnv[3] )
{
  BalanceObject *bobj;
  int           i;
  GLuint        inval = RESTART_IND_UINT;

  glUseProgram ( prog_id0 );  /* bff1.vert, bff1.geom, bff1.frag */
  glClearTexImage ( belem->fftxt[0], 0, GL_RED_INTEGER, GL_UNSIGNED_INT, &inval );
  glClear ( GL_DEPTH_BUFFER_BIT );
  LoadFFVPMatrices ( &belem->fftr, elcp, elnv );
  for ( i = 0; i < belem->nobj; i++ ) {
    bobj = &belem->objtab[i];
    LoadFFMMatrix ( bp, ofs, &belem->fftr, bobj->obj->mm );
    glDrawElements ( GL_TRIANGLES, 3*bobj->cntr, GL_UNSIGNED_INT,
                     (GLvoid*)(bobj->ftrdesc*3*sizeof(GLuint)) );
  }
  glFinish ();
  glUseProgram ( prog_id1 );  /* bff2.comp */
  CTLUniformui ( belem, CTL_P0, (GLuint)(j*FFTXTPIX) );
  COMPUTE ( 3*FFTXTSIZE, FFTXTSIZE, 1 );
  ExitIfGLError ( "DrawViewFromElem" );
} /*DrawViewFromElem*/

static void AssembleFFMatrix ( BalanceElements *belem,
                        int nblocks, GPUSparseMatrix *ffbldesc )
{
  int    i;
  GLuint m, nnz;

  glUseProgram ( bprog_id[11] );
        /* obliczenie wielkosci i rezerwacja buforow */
  for ( i = 0, m = nnz = 0;  i < nblocks;  i++ )
    { m += ffbldesc[i].m;  nnz += ffbldesc[i].nnz; }
  belem->ffmat.m = m;  belem->ffmat.n = belem->nmacroelem;
  belem->ffmat.nnz = nnz;  belem->ffmat.lmax = 0;
  glGenBuffers ( 2, belem->ffmat.buf );
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 3, belem->ffmat.buf[0] );
  glBufferData ( GL_SHADER_STORAGE_BUFFER, (m+1+nnz)*sizeof(GLuint),
                 NULL, GL_DYNAMIC_DRAW  );
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 4, belem->ffmat.buf[1] );
  glBufferData ( GL_SHADER_STORAGE_BUFFER, nnz*sizeof(GLfloat),
                 NULL, GL_DYNAMIC_DRAW );
        /* kopiowanie tablic r */
  CTLUniformi ( belem, CTL_STAGE, 0 );
  for ( i = 0, m = nnz = 0;  i < nblocks;  i++ ) {
    glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 1, ffbldesc[i].buf[0] );
    CTLUniformi ( belem, CTL_H, m );
    CTLUniformui ( belem, CTL_NNZ, nnz );
    COMPUTE ( ffbldesc[i].m+1, 1, 1 );
    m += ffbldesc[i].m;  nnz += ffbldesc[i].nnz;
  }
        /* kopiowanie tablic c i a */
  CTLUniformi ( belem, CTL_STAGE, 1 );
  CTLUniformui ( belem, CTL_H, m );
  for ( i = 0, nnz = 0;  i < nblocks;  i++ ) {
    glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 1, ffbldesc[i].buf[0] );
    glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 2, ffbldesc[i].buf[1] );
    CTLUniformui ( belem, CTL_NNZ, nnz );
    CTLUniformui ( belem, CTL_MI, ffbldesc[i].m );
    COMPUTE ( ffbldesc[i].nnz, 1, 1 );
    nnz += ffbldesc[i].nnz;
  }
  ExitIfGLError ( "AssembleFFMatrix" );
} /*AssembleFFMatrix*/

/* ////////////////////////////////////////////////////////////////////////// */
static void SetupObjMMti ( BalanceElements *belem )
{
  int i;

  for ( i = 0; i < belem->nobj; i++ )
    M4x4TInvertf ( belem->objtab[i].mmti, belem->objtab[i].obj->mm );
} /*SetupObjMMti*/

void ComputeFormFactors ( BalanceElements *belem )
{
  GLuint          fffbo, ffbuf[4], *ffmatblbuf;
  GPUSparseMatrix *ffbldesc;
  GLfloat         *elcp, cp[4], nv[3], *mm, *mmti;
  int             nelem, nblocks, N, t, b, i, j, k;
  GLuint          *vardata, inval = RESTART_IND_UINT, on;

glFinish ();
  SetupObjMMti ( belem );
  ComputeFFWeights ( belem, ffbuf );
  ConstructFFPMatrices ( &belem->fftr, belem->ffnear, belem->fffar );
  N = 3*FFTXTSIZE*FFTXTSIZE;
  ffbuf[1] = NewStorageBuffer ( ((FFBLOCKSIZE+2)*N+1)*sizeof(GLuint), 7 );
  ffbuf[2] = NewStorageBuffer ( (FFBLOCKSIZE*N+1)*sizeof(GLuint), 4 );
  ffbuf[3] = NewStorageBuffer ( (FFBLOCKSIZE*N+1)*sizeof(GLuint), 5 );
  fffbo = PrepareFFFramebuffer ( belem );
  glBindImageTexture ( 0, belem->fftxt[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI );
  LoadFFViewports ( &belem->fftr );
  ExitIfGLError ( "ComputeFormFactors 1" );

  nelem = belem->nelem;
  if ( !(elcp = malloc ( nelem*4*sizeof(GLfloat) )) )
    ExitOnError ( "ComputeFormFactors 1" );
  glGetNamedBufferSubData ( BUF_CP, 0, nelem*4*sizeof(GLfloat), elcp );
  ExitIfGLError ( "ComputeFormFactors 2" );
  vardata = belem->VarBuf;
  glBindVertexArray ( belem->tvao );
  ExitIfGLError ( "ComputeFormFactors 2" );
  nblocks = (nelem+FFBLOCKSIZE-1) / FFBLOCKSIZE;
  if ( !(ffmatblbuf = malloc ( 2*nblocks*sizeof(GLuint) )) ||
       !(ffbldesc = malloc ( nblocks*sizeof(GPUSparseMatrix) )) )
    ExitOnError ( "ComputeFormFactors 2" );
  memset ( ffbldesc, 0, nblocks*sizeof(GPUSparseMatrix) );
  glGenBuffers ( 2*nblocks, ffmatblbuf );
  for ( b = j = 0;  b < nblocks;  b++, j += 2 ) {
    ffbldesc[b].buf[0] = ffmatblbuf[j];
    ffbldesc[b].buf[1] = ffmatblbuf[j+1];
  }
  on = RESTART_IND_UINT;
  mm = mmti = NULL;
  glEnable ( GL_DEPTH_TEST );
  glDisable ( GL_DEPTH_CLAMP );
  for ( b = i = k = 0;  i < nelem;  b++ ) {
/* Kasuje bufor z tablica zmiennych typu uint, chce im nadac wartosci 2^32-1, */
/* tj. ustawic wszystkie bity na 1. Ale gdy drugi parametr mial wartosc */
/* GL_R32UI, to na komputerze stacjonarnym dzialalo to dobrze, a na laptopie */
/* przypisywalo zmiennym wartosci 2047, tj. 11 bitow dostawalo wartosc 1, */
/* a pozostale 0. Najwyrazniej jakis blad w implementacji OpenGL-a, nie umiem */
/* inaczej tego sobie wytlumaczyc. */
    glClearNamedBufferData ( ffbuf[1], GL_R8UI, GL_RED,
                             GL_UNSIGNED_INT, &inval );
    for ( j = 0;  j < FFBLOCKSIZE && i < nelem;  j++, i++, k += 4 ) {
      t = vardata[k+1];
      if ( on != belem->ObjIdBuf[i] ) {
        on = belem->ObjIdBuf[i];
        mm = belem->objtab[on].obj->mm;
        mmti = belem->objtab[on].mmti;
      }
      M4x4MultMP3f ( cp, mm, &elcp[k] );
      M4x4MultMV3f ( nv, mmti, belem->trdesc[t].nvect );
      DrawViewFromElem ( belem, bprog_id[7], bprog_id[8],
                         fftrbbp, fftrbofs, j, cp, nv );
    }
    ffbldesc[b].m = j;
    ffbldesc[b].n = belem->nmacroelem;
    GPUComputeFF ( belem, ffbuf, j, &ffbldesc[b] );
  }
  AssembleFFMatrix ( belem, nblocks, ffbldesc );
  free ( elcp );
  glBindVertexArray ( 0 );
  glUseProgram ( 0 );
  glBindFramebuffer ( GL_DRAW_FRAMEBUFFER, 0 );
  glDeleteFramebuffers ( 1, &fffbo );
  glDeleteTextures ( 2, belem->fftxt );
  glDeleteBuffers ( 4, ffbuf );
  glDeleteBuffers ( 2*nblocks, ffmatblbuf );
  free ( ffmatblbuf );
  free ( ffbldesc );
glFinish ();
  printf ( "Form factors matrix: m = %d, n = %d, nnz = %d\n",
           belem->ffmat.m, belem->ffmat.n, belem->ffmat.nnz );
  ExitIfGLError ( "ComputeFormFactorsA" );
} /*ComputeFormFactors*/

/* ////////////////////////////////////////////////////////////////////////// */
#define VTXT_MAG 4

static void ComputeElemEmission ( BalanceElements *belem, TransBl *trans )
{
  GLuint        efbo, etxt;
  GLsizei       w, h;
  BalanceObject *bobj;
  int           i;

  w = VTXT_MAG*belem->irrtxt_width;
  h = VTXT_MAG*belem->irrtxt_height;
  ExitIfGLError ( "ComputeElemEmission 0" );
  glGenTextures ( 1, &etxt );
  glActiveTexture ( GL_TEXTURE0+49 );
  glBindTexture ( GL_TEXTURE_2D, etxt );
  glTexStorage2D ( GL_TEXTURE_2D, 1, GL_RGBA32F, w, h );
  glBindTexture ( GL_TEXTURE_2D, 0 );
  glGenFramebuffers ( 1, &efbo );
  glBindFramebuffer ( GL_DRAW_FRAMEBUFFER, efbo );
  glFramebufferTexture ( GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, etxt, 0 );
  if ( glCheckFramebufferStatus ( GL_DRAW_FRAMEBUFFER ) !=
       GL_FRAMEBUFFER_COMPLETE )
    ExitOnError ( "ComputeElemEmission" );
  glViewport ( 0, 0, w, h );
  glClearColor ( 0.0, 0.0, 0.0, 0.0 );
  glClear ( GL_COLOR_BUFFER_BIT );
  ExitIfGLError ( "ComputeElemEmission 1" );
  glUseProgram ( bprog_id[12] );  /* bc0.vert,bc0.geom,bc0.frag */
  glBindVertexArray ( belem->tvao );
  ExitIfGLError ( "ComputeElemEmission 2" );
  for ( i = 0; i < belem->nobj; i++ ) {
    bobj = &belem->objtab[i];
    LoadMMatrix ( trans, bobj->obj->mm );
    glPolygonMode ( GL_FRONT_AND_BACK, GL_LINE );
    CTLUniformi ( belem, CTL_FIRST, bobj->ftrdesc );
    glDrawElements ( GL_TRIANGLES, 3*bobj->cntr, GL_UNSIGNED_INT,
                     (GLvoid*)(bobj->ftrdesc*3*sizeof(GLuint)) );
    glFlush ();
    glPolygonMode ( GL_FRONT_AND_BACK, GL_FILL );
    glDrawElements ( GL_TRIANGLES, 3*bobj->cntr, GL_UNSIGNED_INT,
                     (GLvoid*)(bobj->ftrdesc*3*sizeof(GLuint)) );
    glFlush ();
    ExitIfGLError ( "ComputeElemEmission 3" );
  }
  glFinish ();
  glBindVertexArray ( 0 );
  glBindFramebuffer ( GL_DRAW_FRAMEBUFFER, 0 );
  glDeleteFramebuffers ( 1, &efbo );
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 0, BUF_VARBUF );
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 1, BUF_LE );
  glBindImageTexture ( 0, etxt, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA32F );
  glUseProgram ( bprog_id[13] );  /* bc1.comp */
  COMPUTE ( belem->nelem, 1, 1 );
  glDeleteTextures ( 1, &etxt );
  ExitIfGLError ( "ComputeElemEmission" );
} /*ComputeElemEmission*/

static void FinishBalanceIteration ( BalanceElements *belem,
                                     GLuint LeBuf, GLuint LBuf )
{
  glUseProgram ( bprog_id[14] );  /* bc2.comp */
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 0, BUF_ALBMAT );
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 1, LeBuf );
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 2, LBuf );
  COMPUTE ( belem->nelem, 1, 1 )
  ExitIfGLError ( "FinishBalanceIteration" );
} /*FinishBalanceIteration*/

static void SetupEnvIrradianceTexture ( BalanceElements *belem, GLuint lb )
{
  glBindBufferBase ( GL_SHADER_STORAGE_BUFFER, 2, lb );
  glBindImageTexture ( 1, belem->irrtxt, 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA32F );
  glUseProgram ( bprog_id[15] );  /* bc3.comp */
  CTLUniformi ( belem, CTL_STAGE, 0 );
  glDispatchCompute ( belem->irrtxt_width, belem->irrtxt_height, 1 );
  glMemoryBarrier ( GL_SHADER_IMAGE_ACCESS_BARRIER_BIT );
  CTLUniformi ( belem, CTL_STAGE, 1 );
  glDispatchCompute ( belem->irrtxt_width, belem->irrtxt_height, 1 );
  glMemoryBarrier ( GL_SHADER_IMAGE_ACCESS_BARRIER_BIT );
  ExitIfGLError ( "SetupEnvIrradianceTexture" );
} /*SetupEnvIrradianceTexture*/

char ComputeLightBalance ( BalanceElements *belem, TransBl *trans, int niter )
{
  int    i, nelem;

  nelem = belem->nelem;
  ComputeElemEmission ( belem, trans );
  glCopyNamedBufferSubData ( BUF_LE, BUF_L0, 0, 0, 4*nelem*sizeof(GLfloat) );
  ExitIfGLError ( "ComputeLightBalance 0" );
  for ( i = 0; i < niter; i++ ) {
    GPUSMultSparseMatrixVectorf ( BUF_L1, &belem->avgmat, 4, BUF_L0 );
    GPUSMultSparseMatrixVectorf ( BUF_L0, &belem->ffmat, 4, BUF_L1 );
    FinishBalanceIteration ( belem, BUF_LE, BUF_L0 );
  }
  GPUSMultSparseMatrixVectorf ( BUF_L1, &belem->avgmat, 4, BUF_L0 );
  GPUSMultSparseMatrixVectorf ( BUF_L0, &belem->ffmat, 4, BUF_L1 );
  SetupEnvIrradianceTexture ( belem, BUF_L0 );
  glUseProgram ( 0 );
  ExitIfGLError ( "ComputeLightBalance" );
  return true;
} /*ComputeLightBalance*/

void DrawRadianceElements ( BalanceElements *belem, TransBl *trans )
{
  BalanceObject   *bobj;
  int             i;

  glUseProgram ( bprog_id[17] );
  glActiveTexture ( GL_TEXTURE0+MAX_NLIGHTS+MAX_TEXTURES );
  glBindTexture ( GL_TEXTURE_RECTANGLE, belem->irrtxt );
  glBindVertexArray ( belem->tvao );
  for ( i = 0; i < belem->nobj; i++ ) {
    bobj = &belem->objtab[i];
    LoadMMatrix ( trans, bobj->obj->mm );
    CTLUniformi ( belem, CTL_FIRST, bobj->ftrdesc );
    CTLUniformi ( belem, CTL_TXTS, bobj->txts );
    glDrawElements ( GL_TRIANGLES, 3*bobj->cntr, GL_UNSIGNED_INT,
                     (GLvoid*)(bobj->ftrdesc*3*sizeof(GLuint)) );
  }
  glBindVertexArray ( 0 );
  ExitIfGLError ( "DrawRadianceElements" );
} /*DrawRadianceElements*/

