stb/tests/caveview/cave_render.c

   1 // This file renders vertex buffers, converts raw meshes
   2 // to GL meshes, and manages threads that do the raw-mesh
   3 // building (found in cave_mesher.c)
   4
   5
   6 #include "stb_voxel_render.h"
   7
   8 #define STB_GLEXT_DECLARE "glext_list.h"
   9 #include "stb_gl.h"
  10 #include "stb_image.h"
  11 #include "stb_glprog.h"
  12
  13 #include "caveview.h"
  14 #include "cave_parse.h"
  15 #include "stb.h"
  16 #include "sdl.h"
  17 #include "sdl_thread.h"
  18 #include <math.h>
  19 #include <assert.h>
  20
  21 //#define STBVOX_CONFIG_TEX1_EDGE_CLAMP
  22
  23
  24 // currently no dynamic way to set mesh cache size or view distance
  25 //#define SHORTVIEW
  26
  27
  28 stbvox_mesh_maker g_mesh_maker;
  29
  30 GLuint main_prog;
  31 GLint uniform_locations[64];
  32
  33 //#define MAX_QUADS_PER_DRAW        (65536 / 4) // assuming 16-bit indices, 4 verts per quad
  34 //#define FIXED_INDEX_BUFFER_SIZE   (MAX_QUADS_PER_DRAW * 6 * 2)  // 16*1024 * 12 == ~192KB
  35
  36 // while uploading texture data, this holds our each texture
  37 #define TEX_SIZE  64
  38 uint32 texture[TEX_SIZE][TEX_SIZE];
  39
  40 GLuint voxel_tex[2];
  41
  42 // chunk state
  43 enum
  44 {
  45    STATE_invalid,
  46    STATE_needed,
  47    STATE_requested,
  48    STATE_abandoned,
  49    STATE_valid,
  50 };
  51
  52 // mesh is 32x32x255 ... this is hardcoded in that
  53 // a mesh covers 2x2 minecraft chunks, no #defines for it
  54 typedef struct
  55 {
  56    int state;
  57    int chunk_x, chunk_y;
  58    int num_quads;
  59    float priority;
  60    int vbuf_size, fbuf_size;
  61
  62    float transform[3][3];
  63    float bounds[2][3];
  64
  65    GLuint vbuf;// vbuf_tex;
  66    GLuint fbuf, fbuf_tex;
  67
  68 } chunk_mesh;
  69
  70 void scale_texture(unsigned char *src, int x, int y, int w, int h)
  71 {
  72    int i,j,k;
  73    assert(w == 256 && h == 256);
  74    for (j=0; j < TEX_SIZE; ++j) {
  75       for (i=0; i < TEX_SIZE; ++i) {
  76          uint32 val=0;
  77          for (k=0; k < 4; ++k) {
  78             val >>= 8;
  79             val += src[ 4*(x+(i>>2)) + 4*w*(y+(j>>2)) + k]<<24;
  80          }
  81          texture[j][i] = val;
  82       }
  83    }
  84 }
  85
  86 void build_base_texture(int n)
  87 {
  88    int x,y;
  89    uint32 color = stb_rand() | 0x808080;
  90    for (y=0; y<TEX_SIZE; ++y)
  91       for (x=0; x<TEX_SIZE; ++x) {
  92          texture[y][x] = (color + (stb_rand()&0x1f1f1f))|0xff000000;
  93       }
  94 }
  95
  96 void build_overlay_texture(int n)
  97 {
  98    int x,y;
  99    uint32 color = stb_rand();
 100    if (color & 16)
 101       color = 0xff000000;
 102    else
 103       color = 0xffffffff;
 104    for (y=0; y<TEX_SIZE; ++y)
 105       for (x=0; x<TEX_SIZE; ++x) {
 106          texture[y][x] = 0;
 107       }
 108
 109    for (y=0; y < TEX_SIZE/8; ++y) {
 110       for (x=0; x < TEX_SIZE; ++x) {
 111          texture[y][x] = color;
 112          texture[TEX_SIZE-1-y][x] = color;
 113          texture[x][y] = color;
 114          texture[x][TEX_SIZE-1-y] = color;
 115       }
 116    }
 117 }
 118
 119 // view radius of about 1024 = 2048 columns / 32 columns-per-mesh = 2^11 / 2^5 = 64x64
 120 // so we need bigger than 64x64 so we can precache, which means we have to be
 121 // non-power-of-two, or we have to be pretty huge
 122 #define CACHED_MESH_NUM_X   128
 123 #define CACHED_MESH_NUM_Y   128
 124
 125
 126 chunk_mesh cached_chunk_mesh[CACHED_MESH_NUM_Y][CACHED_MESH_NUM_X];
 127
 128 void free_chunk(int slot_x, int slot_y)
 129 {
 130    chunk_mesh *cm = &cached_chunk_mesh[slot_y][slot_x];
 131    if (cm->state == STATE_valid) {
 132       glDeleteTextures(1, &cm->fbuf_tex);
 133       glDeleteBuffersARB(1, &cm->vbuf);
 134       glDeleteBuffersARB(1, &cm->fbuf);
 135       cached_chunk_mesh[slot_y][slot_x].state = STATE_invalid;
 136    }
 137 }
 138
 139 void upload_mesh(chunk_mesh *cm, uint8 *build_buffer, uint8 *face_buffer)
 140 {
 141    glGenBuffersARB(1, &cm->vbuf);
 142    glBindBufferARB(GL_ARRAY_BUFFER_ARB, cm->vbuf);
 143    glBufferDataARB(GL_ARRAY_BUFFER_ARB, cm->num_quads*4*sizeof(uint32), build_buffer, GL_STATIC_DRAW_ARB);
 144    glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
 145
 146    glGenBuffersARB(1, &cm->fbuf);
 147    glBindBufferARB(GL_TEXTURE_BUFFER_ARB, cm->fbuf);
 148    glBufferDataARB(GL_TEXTURE_BUFFER_ARB, cm->num_quads*sizeof(uint32), face_buffer , GL_STATIC_DRAW_ARB);
 149    glBindBufferARB(GL_TEXTURE_BUFFER_ARB, 0);
 150
 151    glGenTextures(1, &cm->fbuf_tex);
 152    glBindTexture(GL_TEXTURE_BUFFER_ARB, cm->fbuf_tex);
 153    glTexBufferARB(GL_TEXTURE_BUFFER_ARB, GL_RGBA8UI, cm->fbuf);
 154    glBindTexture(GL_TEXTURE_BUFFER_ARB, 0);
 155 }
 156
 157 static void upload_mesh_data(raw_mesh *rm)
 158 {
 159    int cx = rm->cx;
 160    int cy = rm->cy;
 161    int slot_x = (cx >> 1) & (CACHED_MESH_NUM_X-1);
 162    int slot_y = (cy >> 1) & (CACHED_MESH_NUM_Y-1);
 163    chunk_mesh *cm;
 164
 165    free_chunk(slot_x, slot_y);
 166
 167    cm = &cached_chunk_mesh[slot_y][slot_x];
 168    cm->num_quads = rm->num_quads;
 169
 170    upload_mesh(cm, rm->build_buffer, rm->face_buffer);
 171    cm->vbuf_size = rm->num_quads*4*sizeof(uint32);
 172    cm->fbuf_size = rm->num_quads*sizeof(uint32);
 173    cm->priority = 100000;
 174    cm->chunk_x = cx;
 175    cm->chunk_y = cy;
 176
 177    memcpy(cm->bounds, rm->bounds, sizeof(cm->bounds));
 178    memcpy(cm->transform, rm->transform, sizeof(cm->transform));
 179
 180    // write barrier here
 181    cm->state = STATE_valid;
 182 }
 183
 184 GLint uniform_loc[16];
 185 float table3[128][3];
 186 float table4[64][4];
 187 GLint tablei[2];
 188
 189 float step=0;
 190
 191 #ifdef SHORTVIEW
 192 int view_dist_in_chunks = 50;
 193 #else
 194 int view_dist_in_chunks = 80;
 195 #endif
 196
 197 void setup_uniforms(float pos[3])
 198 {
 199    int i,j;
 200    step += 1.0f/60.0f;
 201    for (i=0; i < STBVOX_UNIFORM_count; ++i) {
 202       stbvox_uniform_info raw, *ui=&raw;
 203       stbvox_get_uniform_info(&raw, i);
 204       uniform_loc[i] = -1;
 205
 206       if (i == STBVOX_UNIFORM_texscale || i == STBVOX_UNIFORM_texgen || i == STBVOX_UNIFORM_color_table)
 207          continue;
 208
 209       if (ui) {
 210          void *data = ui->default_value;
 211          uniform_loc[i] = stbgl_find_uniform(main_prog, ui->name);
 212         switch (i) {
 213             case STBVOX_UNIFORM_face_data:
 214                tablei[0] = 2;
 215                data = tablei;
 216                break;
 217
 218             case STBVOX_UNIFORM_tex_array:
 219                glActiveTextureARB(GL_TEXTURE0_ARB);
 220                glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[0]);
 221                glActiveTextureARB(GL_TEXTURE1_ARB);
 222                glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[1]);
 223                glActiveTextureARB(GL_TEXTURE0_ARB);
 224                tablei[0] = 0;
 225                tablei[1] = 1;
 226                data = tablei;
 227                break;
 228
 229             case STBVOX_UNIFORM_color_table:
 230                data = ui->default_value;
 231                ((float *)data)[63*4+3] = 2.0f; // emissive
 232                break;
 233
 234             case STBVOX_UNIFORM_camera_pos:
 235                data = table3[0];
 236                table3[0][0] = pos[0];
 237                table3[0][1] = pos[1];
 238                table3[0][2] = pos[2];
 239                table3[0][3] = stb_max(0,(float)sin(step*2)*0.125f);
 240                break;
 241
 242             case STBVOX_UNIFORM_ambient: {
 243                float bright = 1.0;
 244                //float bright = 0.75;
 245                float amb[3][3];
 246
 247                // ambient direction is sky-colored upwards
 248                // "ambient" lighting is from above
 249                table4[0][0] =  0.3f;
 250                table4[0][1] = -0.5f;
 251                table4[0][2] =  0.9f;
 252
 253                amb[1][0] = 0.3f; amb[1][1] = 0.3f; amb[1][2] = 0.3f; // dark-grey
 254                amb[2][0] = 1.0; amb[2][1] = 1.0; amb[2][2] = 1.0; // white
 255
 256                // convert so (table[1]*dot+table[2]) gives
 257                // above interpolation
 258                //     lerp((dot+1)/2, amb[1], amb[2])
 259                //     amb[1] + (amb[2] - amb[1]) * (dot+1)/2
 260                //     amb[1] + (amb[2] - amb[1]) * dot/2 + (amb[2]-amb[1])/2
 261
 262                for (j=0; j < 3; ++j) {
 263                   table4[1][j] = (amb[2][j] - amb[1][j])/2 * bright;
 264                   table4[2][j] = (amb[1][j] + amb[2][j])/2 * bright;
 265                }
 266
 267                // fog color
 268                table4[3][0] = 0.6f, table4[3][1] = 0.7f, table4[3][2] = 0.9f;
 269                table4[3][3] = 1.0f / (view_dist_in_chunks * 16);
 270                table4[3][3] *= table4[3][3];
 271
 272                data = table4;
 273                break;
 274             }
 275          }
 276
 277          switch (ui->type) {
 278             case STBVOX_UNIFORM_TYPE_sampler: stbglUniform1iv(uniform_loc[i], ui->array_length, data); break;
 279             case STBVOX_UNIFORM_TYPE_vec2:    stbglUniform2fv(uniform_loc[i], ui->array_length, data); break;
 280             case STBVOX_UNIFORM_TYPE_vec3:    stbglUniform3fv(uniform_loc[i], ui->array_length, data); break;
 281             case STBVOX_UNIFORM_TYPE_vec4:    stbglUniform4fv(uniform_loc[i], ui->array_length, data); break;
 282          }
 283       }
 284    }
 285 }
 286
 287 GLuint unitex[64], unibuf[64];
 288 void make_texture_buffer_for_uniform(int uniform, int slot)
 289 {
 290    GLenum type;
 291    stbvox_uniform_info raw, *ui=&raw;
 292    GLint uloc;
 293
 294    stbvox_get_uniform_info(ui, uniform);
 295    uloc = stbgl_find_uniform(main_prog, ui->name);
 296
 297    if (uniform == STBVOX_UNIFORM_color_table)
 298       ((float *)ui->default_value)[63*4+3] = 2.0f; // emissive
 299
 300    glGenBuffersARB(1, &unibuf[uniform]);
 301    glBindBufferARB(GL_ARRAY_BUFFER_ARB, unibuf[uniform]);
 302    glBufferDataARB(GL_ARRAY_BUFFER_ARB, ui->array_length * ui->bytes_per_element, ui->default_value, GL_STATIC_DRAW_ARB);
 303    glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
 304
 305    glGenTextures(1, &unitex[uniform]);
 306    glBindTexture(GL_TEXTURE_BUFFER_ARB, unitex[uniform]);
 307    switch (ui->type) {
 308       case STBVOX_UNIFORM_TYPE_vec2: type = GL_RG32F; break;
 309       case STBVOX_UNIFORM_TYPE_vec3: type = GL_RGB32F; break;
 310       case STBVOX_UNIFORM_TYPE_vec4: type = GL_RGBA32F; break;
 311       default: assert(0);
 312    }
 313    glTexBufferARB(GL_TEXTURE_BUFFER_ARB, type, unibuf[uniform]);
 314    glBindTexture(GL_TEXTURE_BUFFER_ARB, 0);
 315
 316    glActiveTextureARB(GL_TEXTURE0 + slot);
 317    glBindTexture(GL_TEXTURE_BUFFER_ARB, unitex[uniform]);
 318    glActiveTextureARB(GL_TEXTURE0);
 319
 320    stbglUseProgram(main_prog);
 321    stbglUniform1i(uloc, slot);
 322 }
 323
 324 #define MAX_MESH_WORKERS  8
 325 #define MAX_CHUNK_LOAD_WORKERS 2
 326
 327 int num_mesh_workers;
 328 int num_chunk_load_workers;
 329
 330 typedef struct
 331 {
 332    int state;
 333    int request_cx;
 334    int request_cy;
 335    int padding[13];
 336
 337    SDL_sem * request_received;
 338
 339    SDL_sem * chunk_server_done_processing;
 340    int chunk_action;
 341    int chunk_request_x;
 342    int chunk_request_y;
 343    fast_chunk *chunks[4][4];
 344
 345    int padding2[16];
 346    raw_mesh rm;
 347    int padding3[16];
 348
 349    uint8 *build_buffer;
 350    uint8 *face_buffer ;
 351 } mesh_worker;
 352
 353 enum
 354 {
 355    WSTATE_idle,
 356    WSTATE_requested,
 357    WSTATE_running,
 358    WSTATE_mesh_ready,
 359 };
 360
 361 mesh_worker mesh_data[MAX_MESH_WORKERS];
 362 int num_meshes_started; // stats
 363
 364 int request_chunk(int chunk_x, int chunk_y);
 365 void update_meshes_from_render_thread(void);
 366
 367 unsigned char tex2_data[64][4];
 368
 369 void init_tex2_gradient(void)
 370 {
 371    int i;
 372    for (i=0; i < 16; ++i) {
 373       tex2_data[i+ 0][0] = 64 + 12*i;
 374       tex2_data[i+ 0][1] = 32;
 375       tex2_data[i+ 0][2] = 64;
 376
 377       tex2_data[i+16][0] = 255;
 378       tex2_data[i+16][1] = 32 + 8*i;
 379       tex2_data[i+16][2] = 64;
 380
 381       tex2_data[i+32][0] = 255;
 382       tex2_data[i+32][1] = 160;
 383       tex2_data[i+32][2] = 64 + 12*i;
 384
 385       tex2_data[i+48][0] = 255;
 386       tex2_data[i+48][1] = 160 + 6*i;
 387       tex2_data[i+48][2] = 255;
 388    }
 389 }
 390
 391 void set_tex2_alpha(float fa)
 392 {
 393    int i;
 394    int a = (int) stb_lerp(fa, 0, 255);
 395    if (a < 0) a = 0; else if (a > 255) a = 255;
 396    glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[1]);
 397    for (i=0; i < 64; ++i) {
 398       tex2_data[i][3] = a;
 399       glTexSubImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, 0,0,i, 1,1,1, GL_RGBA, GL_UNSIGNED_BYTE, tex2_data[i]);
 400    }
 401 }
 402
 403 void render_init(void)
 404 {
 405    int i;
 406    char *binds[] = { "attr_vertex", "attr_face", NULL };
 407    char *vertex;
 408    char *fragment;
 409    int w=0,h=0;
 410
 411    unsigned char *texdata = stbi_load("terrain.png", &w, &h, NULL, 4);
 412
 413    stbvox_init_mesh_maker(&g_mesh_maker);
 414    for (i=0; i < num_mesh_workers; ++i) {
 415       stbvox_init_mesh_maker(&mesh_data[i].rm.mm);
 416    }
 417
 418    vertex = stbvox_get_vertex_shader();
 419    fragment = stbvox_get_fragment_shader();
 420
 421    {
 422       char error_buffer[1024];
 423       char *main_vertex[] = { vertex, NULL };
 424       char *main_fragment[] = { fragment, NULL };
 425       main_prog = stbgl_create_program(main_vertex, main_fragment, binds, error_buffer, sizeof(error_buffer));
 426       if (main_prog == 0) {
 427          ods("Compile error for main shader: %s\n", error_buffer);
 428          assert(0);
 429          exit(1);
 430       }
 431    }
 432    //init_index_buffer();
 433
 434    make_texture_buffer_for_uniform(STBVOX_UNIFORM_texscale     , 3);
 435    make_texture_buffer_for_uniform(STBVOX_UNIFORM_texgen       , 4);
 436    make_texture_buffer_for_uniform(STBVOX_UNIFORM_color_table  , 5);
 437
 438    glGenTextures(2, voxel_tex);
 439
 440    glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[0]);
 441    glTexImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, GL_RGBA,
 442                       TEX_SIZE,TEX_SIZE,256,
 443                       0,GL_RGBA,GL_UNSIGNED_BYTE,NULL);
 444    for (i=0; i < 256; ++i) {
 445       if (texdata)
 446          scale_texture(texdata, (i&15)*w/16, (h/16)*(i>>4), w,h);
 447       else
 448          build_base_texture(i);
 449       glTexSubImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, 0,0,i, TEX_SIZE,TEX_SIZE,1, GL_RGBA, GL_UNSIGNED_BYTE, texture[0]);
 450    }
 451    glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
 452    glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
 453    glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MAX_ANISOTROPY_EXT, 16);
 454    #ifdef STBVOX_CONFIG_TEX1_EDGE_CLAMP
 455    glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
 456    glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
 457    #endif
 458
 459    glGenerateMipmapEXT(GL_TEXTURE_2D_ARRAY_EXT);
 460
 461    glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[1]);
 462    glTexImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, GL_RGBA,
 463                       1,1,64,
 464                       0,GL_RGBA,GL_UNSIGNED_BYTE,NULL);
 465    init_tex2_gradient();
 466    set_tex2_alpha(0.0);
 467    #if 0
 468    for (i=0; i < 128; ++i) {
 469       //build_overlay_texture(i);
 470       glTexSubImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, 0,0,i, TEX_SIZE,TEX_SIZE,1, GL_RGBA, GL_UNSIGNED_BYTE, texture[0]);
 471    }
 472    #endif
 473    glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
 474    glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
 475    glGenerateMipmapEXT(GL_TEXTURE_2D_ARRAY_EXT);
 476 }
 477
 478 void world_init(void)
 479 {
 480    int a,b,x,y;
 481
 482    Uint64 start_time, end_time;
 483    #ifdef NDEBUG
 484    int range = 32;
 485    #else
 486    int range = 12;
 487    #endif
 488
 489    start_time = SDL_GetPerformanceCounter();
 490
 491    // iterate in 8x8 clusters of qchunks at a time to get better converted-chunk-cache reuse
 492    // than a purely row-by-row ordering is (single-threaded this is a bigger win than
 493    // any of the above optimizations were, since it halves zlib/mc-conversion costs)
 494    for (x=-range; x <= range; x += 16)
 495       for (y=-range; y <= range; y += 16)
 496          for (b=y; b < y+16 && b <= range; b += 2)
 497             for (a=x; a < x+16 && a <= range; a += 2)
 498                while (!request_chunk(a, b)) { // if request fails, all threads are busy
 499                   update_meshes_from_render_thread();
 500                   SDL_Delay(1);
 501                }
 502
 503    // wait until all the workers are done,
 504    // (this is only needed if we want to time
 505    // when the build finishes, or when we want to reset the
 506    // cache size; otherwise we could just go ahead and
 507    // start rendering whatever we've got)
 508    for(;;) {
 509       int i;
 510       update_meshes_from_render_thread();
 511       for (i=0; i < num_mesh_workers; ++i)
 512          if (mesh_data[i].state != WSTATE_idle)
 513             break;
 514       if (i == num_mesh_workers)
 515          break;
 516       SDL_Delay(3);
 517    }
 518
 519    end_time = SDL_GetPerformanceCounter();
 520    ods("Build time: %7.2fs\n", (end_time - start_time) / (float) SDL_GetPerformanceFrequency());
 521
 522    // don't waste lots of storage on chunk caches once it's finished starting-up;
 523    // this was only needed to be this large because we worked in large blocks
 524    // to maximize sharing
 525    reset_cache_size(32);
 526 }
 527
 528 extern SDL_mutex * chunk_cache_mutex;
 529
 530 int mesh_worker_handler(void *data)
 531 {
 532    mesh_worker *mw = data;
 533    mw->face_buffer = malloc(FACE_BUFFER_SIZE);
 534    mw->build_buffer = malloc(BUILD_BUFFER_SIZE);
 535
 536    // this loop only works because the compiler can't
 537    // tell that the SDL_calls don't access mw->state;
 538    // really we should barrier that stuff
 539    for(;;) {
 540       int i,j;
 541       int cx,cy;
 542
 543       // wait for a chunk request
 544       SDL_SemWait(mw->request_received);
 545
 546       // analyze the chunk request
 547       assert(mw->state == WSTATE_requested);
 548       cx = mw->request_cx;
 549       cy = mw->request_cy;
 550
 551       // this is inaccurate as it can block while another thread has the cache locked
 552       mw->state = WSTATE_running;
 553
 554       // get the chunks we need (this takes a lock and caches them)
 555       for (j=0; j < 4; ++j)
 556          for (i=0; i < 4; ++i)
 557             mw->chunks[j][i] = get_converted_fastchunk(cx-1 + i, cy-1 + j);
 558
 559       // build the mesh based on the chunks
 560       mw->rm.build_buffer = mw->build_buffer;
 561       mw->rm.face_buffer = mw->face_buffer;
 562       build_chunk(cx, cy, mw->chunks, &mw->rm);
 563       mw->state = WSTATE_mesh_ready;
 564       // don't need to notify of this, because it gets polled
 565
 566       // when done, free the chunks
 567
 568       // for efficiency we just take the mutex once around the whole thing,
 569       // though this spreads the mutex logic over two files
 570       SDL_LockMutex(chunk_cache_mutex);
 571       for (j=0; j < 4; ++j)
 572          for (i=0; i < 4; ++i) {
 573             deref_fastchunk(mw->chunks[j][i]);
 574             mw->chunks[j][i] = NULL;
 575          }
 576       SDL_UnlockMutex(chunk_cache_mutex);
 577    }
 578    return 0;
 579 }
 580
 581 int request_chunk(int chunk_x, int chunk_y)
 582 {
 583    int i;
 584    for (i=0; i < num_mesh_workers; ++i) {
 585       mesh_worker *mw = &mesh_data[i];
 586       if (mw->state == WSTATE_idle) {
 587          mw->request_cx = chunk_x;
 588          mw->request_cy = chunk_y;
 589          mw->state = WSTATE_requested;
 590          SDL_SemPost(mw->request_received);
 591          ++num_meshes_started;
 592          return 1;
 593       }
 594    }
 595    return 0;
 596 }
 597
 598 void prepare_threads(void)
 599 {
 600    int i;
 601    int num_proc = SDL_GetCPUCount();
 602
 603    if (num_proc > 6)
 604       num_mesh_workers = num_proc/2;
 605    else if (num_proc > 4)
 606       num_mesh_workers = 4;
 607    else
 608       num_mesh_workers = num_proc-1;
 609
 610 // @TODO
 611 //   Thread usage is probably pretty terrible; need to make a
 612 //   separate queue of needed chunks, instead of just generating
 613 //   one request per thread per frame, and a separate queue of
 614 //   results. (E.g. If it takes 1.5 frames to build mesh, thread
 615 //   is idle for 0.5 frames.) To fake this for now, I've just
 616 //   doubled the number of threads to let those serve as a 'queue',
 617 //   but that's dumb.
 618
 619    num_mesh_workers *= 2; // try to get better thread usage
 620
 621    if (num_mesh_workers > MAX_MESH_WORKERS)
 622       num_mesh_workers = MAX_MESH_WORKERS;
 623
 624    for (i=0; i < num_mesh_workers; ++i) {
 625       mesh_worker *data = &mesh_data[i];
 626       data->request_received = SDL_CreateSemaphore(0);
 627       data->chunk_server_done_processing = SDL_CreateSemaphore(0);
 628       SDL_CreateThread(mesh_worker_handler, "mesh worker", data);
 629    }
 630 }
 631
 632
 633 // "better" buffer uploading
 634 #if 0
 635    if (glBufferStorage) {
 636       glDeleteBuffersARB(1, &vb->vbuf);
 637       glGenBuffersARB(1, &vb->vbuf);
 638
 639       glBindBufferARB(GL_ARRAY_BUFFER_ARB, vb->vbuf);
 640       glBufferStorage(GL_ARRAY_BUFFER_ARB, sizeof(build_buffer), build_buffer, 0);
 641       glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
 642    } else {
 643       glBindBufferARB(GL_ARRAY_BUFFER_ARB, vb->vbuf);
 644       glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(build_buffer), build_buffer, GL_STATIC_DRAW_ARB);
 645       glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
 646    }
 647 #endif
 648
 649
 650 typedef struct
 651 {
 652    float x,y,z,w;
 653 } plane;
 654
 655 static plane frustum[6];
 656
 657 static void matd_mul(double out[4][4], double src1[4][4], double src2[4][4])
 658 {
 659    int i,j,k;
 660    for (j=0; j < 4; ++j) {
 661       for (i=0; i < 4; ++i) {
 662          double t=0;
 663          for (k=0; k < 4; ++k)
 664             t += src1[k][i] * src2[j][k];
 665          out[i][j] = t;
 666       }
 667    }
 668 }
 669
 670 // https://fgiesen.wordpress.com/2012/08/31/frustum-planes-from-the-projection-matrix/
 671 static void compute_frustum(void)
 672 {
 673    int i;
 674    GLdouble mv[4][4],proj[4][4], mvproj[4][4];
 675    glGetDoublev(GL_MODELVIEW_MATRIX , mv[0]);
 676    glGetDoublev(GL_PROJECTION_MATRIX, proj[0]);
 677    matd_mul(mvproj, proj, mv);
 678    for (i=0; i < 4; ++i) {
 679       (&frustum[0].x)[i] = (float) (mvproj[3][i] + mvproj[0][i]);
 680       (&frustum[1].x)[i] = (float) (mvproj[3][i] - mvproj[0][i]);
 681       (&frustum[2].x)[i] = (float) (mvproj[3][i] + mvproj[1][i]);
 682       (&frustum[3].x)[i] = (float) (mvproj[3][i] - mvproj[1][i]);
 683       (&frustum[4].x)[i] = (float) (mvproj[3][i] + mvproj[2][i]);
 684       (&frustum[5].x)[i] = (float) (mvproj[3][i] - mvproj[2][i]);
 685    }
 686 }
 687
 688 static int test_plane(plane *p, float x0, float y0, float z0, float x1, float y1, float z1)
 689 {
 690    // return false if the box is entirely behind the plane
 691    float d=0;
 692    assert(x0 <= x1 && y0 <= y1 && z0 <= z1);
 693    if (p->x > 0) d += x1*p->x; else d += x0*p->x;
 694    if (p->y > 0) d += y1*p->y; else d += y0*p->y;
 695    if (p->z > 0) d += z1*p->z; else d += z0*p->z;
 696    return d + p->w >= 0;
 697 }
 698
 699 static int is_box_in_frustum(float *bmin, float *bmax)
 700 {
 701    int i;
 702    for (i=0; i < 6; ++i)
 703       if (!test_plane(&frustum[i], bmin[0], bmin[1], bmin[2], bmax[0], bmax[1], bmax[2]))
 704          return 0;
 705    return 1;
 706 }
 707
 708 float compute_priority(int cx, int cy, float x, float y)
 709 {
 710    float distx, disty, dist2;
 711    distx = (cx*16+8) - x;
 712    disty = (cy*16+8) - y;
 713    dist2 = distx*distx + disty*disty;
 714    return view_dist_in_chunks*view_dist_in_chunks * 16 * 16 - dist2;
 715 }
 716
 717 int chunk_locations, chunks_considered, chunks_in_frustum;
 718 int quads_considered, quads_rendered;
 719 int chunk_storage_rendered, chunk_storage_considered, chunk_storage_total;
 720 int update_frustum = 1;
 721
 722 #ifdef SHORTVIEW
 723 int max_chunk_storage = 450 << 20;
 724 int min_chunk_storage = 350 << 20;
 725 #else
 726 int max_chunk_storage = 900 << 20;
 727 int min_chunk_storage = 800 << 20;
 728 #endif
 729
 730 float min_priority = -500; // this really wants to be in unit space, not squared space
 731
 732 int num_meshes_uploaded;
 733
 734 void update_meshes_from_render_thread(void)
 735 {
 736    int i;
 737    for (i=0; i < num_mesh_workers; ++i) {
 738       mesh_worker *mw = &mesh_data[i];
 739       if (mw->state == WSTATE_mesh_ready) {
 740          upload_mesh_data(&mw->rm);
 741          ++num_meshes_uploaded;
 742          mw->state = WSTATE_idle;
 743       }
 744    }
 745 }
 746
 747 extern float tex2_alpha;
 748 extern int global_hack;
 749 int num_threads_active;
 750 float chunk_server_activity;
 751
 752 void render_caves(float campos[3])
 753 {
 754    float x = campos[0], y = campos[1];
 755    int qchunk_x, qchunk_y;
 756    int cam_x, cam_y;
 757    int i,j, rad;
 758
 759    compute_frustum();
 760
 761    chunk_locations = chunks_considered = chunks_in_frustum = 0;
 762    quads_considered = quads_rendered = 0;
 763    chunk_storage_total = chunk_storage_considered = chunk_storage_rendered = 0;
 764
 765    cam_x = (int) floor(x+0.5);
 766    cam_y = (int) floor(y+0.5);
 767
 768    qchunk_x = (((int) floor(x)+16) >> 5) << 1;
 769    qchunk_y = (((int) floor(y)+16) >> 5) << 1;
 770
 771    glEnable(GL_ALPHA_TEST);
 772    glAlphaFunc(GL_GREATER, 0.5);
 773
 774    stbglUseProgram(main_prog);
 775    setup_uniforms(campos); // set uniforms to default values inefficiently
 776    glActiveTextureARB(GL_TEXTURE2_ARB);
 777    stbglEnableVertexAttribArray(0);
 778
 779    {
 780       float lighting[2][3] = { { campos[0],campos[1],campos[2] }, { 0.75,0.75,0.65f } };
 781       float bright = 8;
 782       lighting[1][0] *= bright;
 783       lighting[1][1] *= bright;
 784       lighting[1][2] *= bright;
 785       stbglUniform3fv(stbgl_find_uniform(main_prog, "light_source"), 2, lighting[0]);
 786    }
 787
 788    if (global_hack)
 789       set_tex2_alpha(tex2_alpha);
 790
 791    num_meshes_uploaded = 0;
 792    update_meshes_from_render_thread();
 793
 794    // traverse all in-range chunks and analyze them
 795    for (j=-view_dist_in_chunks; j <= view_dist_in_chunks; j += 2) {
 796       for (i=-view_dist_in_chunks; i <= view_dist_in_chunks; i += 2) {
 797          float priority;
 798          int cx = qchunk_x + i;
 799          int cy = qchunk_y + j;
 800
 801          priority = compute_priority(cx, cy, x, y);
 802          if (priority >= min_priority) {
 803             int slot_x = (cx>>1) & (CACHED_MESH_NUM_X-1);
 804             int slot_y = (cy>>1) & (CACHED_MESH_NUM_Y-1);
 805             chunk_mesh *cm = &cached_chunk_mesh[slot_y][slot_x];
 806             ++chunk_locations;
 807             if (cm->state == STATE_valid && priority >= 0) {
 808                // check if chunk pos actually matches
 809                if (cm->chunk_x != cx || cm->chunk_y != cy) {
 810                   // we have a stale chunk we need to recreate
 811                   free_chunk(slot_x, slot_y); // it probably will have already gotten freed, but just in case
 812                }
 813             }
 814             if (cm->state == STATE_invalid) {
 815                cm->chunk_x = cx;
 816                cm->chunk_y = cy;
 817                cm->state = STATE_needed;
 818             }
 819             cm->priority = priority;
 820          }
 821       }
 822    }
 823
 824    // draw front-to-back
 825    for (rad = 0; rad <= view_dist_in_chunks; rad += 2) {
 826       for (j=-rad; j <= rad; j += 2) {
 827          // if j is +- rad, then iterate i through all values
 828          // if j isn't +-rad, then i should be only -rad & rad
 829          int step = 2;
 830          if (abs(j) != rad)
 831             step = 2*rad;
 832          for (i=-rad; i <= rad; i += step) {
 833             int cx = qchunk_x + i;
 834             int cy = qchunk_y + j;
 835             int slot_x = (cx>>1) & (CACHED_MESH_NUM_X-1);
 836             int slot_y = (cy>>1) & (CACHED_MESH_NUM_Y-1);
 837             chunk_mesh *cm = &cached_chunk_mesh[slot_y][slot_x];
 838             if (cm->state == STATE_valid && cm->priority >= 0) {
 839                ++chunks_considered;
 840                quads_considered += cm->num_quads;
 841                if (is_box_in_frustum(cm->bounds[0], cm->bounds[1])) {
 842                   ++chunks_in_frustum;
 843
 844                   // @TODO if in range
 845                   stbglUniform3fv(uniform_loc[STBVOX_UNIFORM_transform], 3, cm->transform[0]);
 846                   glBindBufferARB(GL_ARRAY_BUFFER_ARB, cm->vbuf);
 847                   glVertexAttribIPointer(0, 1, GL_UNSIGNED_INT, 4, (void*) 0);
 848                   glBindTexture(GL_TEXTURE_BUFFER_ARB, cm->fbuf_tex);
 849                   glDrawArrays(GL_QUADS, 0, cm->num_quads*4);
 850                   quads_rendered += cm->num_quads;
 851
 852                   chunk_storage_rendered += cm->vbuf_size + cm->fbuf_size;
 853                }
 854                chunk_storage_considered += cm->vbuf_size + cm->fbuf_size;
 855             }
 856          }
 857       }
 858    }
 859
 860    stbglDisableVertexAttribArray(0);
 861    glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
 862    glActiveTextureARB(GL_TEXTURE0_ARB);
 863
 864    stbglUseProgram(0);
 865    num_meshes_started = 0;
 866
 867    {
 868       #define MAX_QUEUE  8
 869       float highest_priority[MAX_QUEUE];
 870       int highest_i[MAX_QUEUE], highest_j[MAX_QUEUE];
 871       float lowest_priority = view_dist_in_chunks * view_dist_in_chunks * 16 * 16.0f;
 872       int lowest_i = -1, lowest_j = -1;
 873
 874       for (i=0; i < MAX_QUEUE; ++i) {
 875          highest_priority[i] = min_priority;
 876          highest_i[i] = -1;
 877          highest_j[i] = -1;
 878       }
 879
 880       for (j=0; j < CACHED_MESH_NUM_Y; ++j) {
 881          for (i=0; i < CACHED_MESH_NUM_X; ++i) {
 882             chunk_mesh *cm = &cached_chunk_mesh[j][i];
 883             if (cm->state == STATE_valid) {
 884                cm->priority = compute_priority(cm->chunk_x, cm->chunk_y, x, y);
 885                chunk_storage_total += cm->vbuf_size + cm->fbuf_size;
 886                if (cm->priority < lowest_priority) {
 887                   lowest_priority = cm->priority;
 888                   lowest_i = i;
 889                   lowest_j = j;
 890                }
 891             }
 892             if (cm->state == STATE_needed) {
 893                cm->priority = compute_priority(cm->chunk_x, cm->chunk_y, x, y);
 894                if (cm->priority < min_priority)
 895                   cm->state = STATE_invalid;
 896                else if (cm->priority > highest_priority[0]) {
 897                   int k;
 898                   highest_priority[0] = cm->priority;
 899                   highest_i[0] = i;
 900                   highest_j[0] = j;
 901                   // bubble this up to right place
 902                   for (k=0; k < MAX_QUEUE-1; ++k) {
 903                      if (highest_priority[k] > highest_priority[k+1]) {
 904                         highest_priority[k] = highest_priority[k+1];
 905                         highest_priority[k+1] = cm->priority;
 906                         highest_i[k] = highest_i[k+1];
 907                         highest_i[k+1] = i;
 908                         highest_j[k] = highest_j[k+1];
 909                         highest_j[k+1] = j;
 910                      } else {
 911                         break;
 912                      }
 913                   }
 914                }
 915             }
 916          }
 917       }
 918
 919
 920       // I couldn't find any straightforward logic that avoids
 921       // the hysteresis problem of continually creating & freeing
 922       // a block on the margin, so I just don't free a block until
 923       // it's out of range, but this doesn't actually correctly
 924       // handle when the cache is too small for the given range
 925       if (chunk_storage_total >= min_chunk_storage && lowest_i >= 0) {
 926          if (cached_chunk_mesh[lowest_j][lowest_i].priority < -1200) // -1000? 0?
 927             free_chunk(lowest_i, lowest_j);
 928       }
 929
 930       if (chunk_storage_total < max_chunk_storage && highest_i[0] >= 0) {
 931          for (j=MAX_QUEUE-1; j >= 0; --j) {
 932             if (highest_j[0] >= 0) {
 933                chunk_mesh *cm = &cached_chunk_mesh[highest_j[j]][highest_i[j]];
 934                if (request_chunk(cm->chunk_x, cm->chunk_y)) {
 935                   cm->state = STATE_requested;
 936                } else {
 937                   // if we couldn't queue this one, skip the remainder
 938                   break;
 939                }
 940             }
 941          }
 942       }
 943    }
 944
 945    update_meshes_from_render_thread();
 946
 947    num_threads_active = 0;
 948    for (i=0; i < num_mesh_workers; ++i) {
 949       num_threads_active += (mesh_data[i].state == WSTATE_running);
 950    }
 951 }