added stb, more binaryout changes"
[henge/apc.git] / stb / tests / caveview / cave_render.c
1 // This file renders vertex buffers, converts raw meshes
2 // to GL meshes, and manages threads that do the raw-mesh
3 // building (found in cave_mesher.c)
4
5
6 #include "stb_voxel_render.h"
7
8 #define STB_GLEXT_DECLARE "glext_list.h"
9 #include "stb_gl.h"
10 #include "stb_image.h"
11 #include "stb_glprog.h"
12
13 #include "caveview.h"
14 #include "cave_parse.h"
15 #include "stb.h"
16 #include "sdl.h"
17 #include "sdl_thread.h"
18 #include <math.h>
19 #include <assert.h>
20
21 //#define STBVOX_CONFIG_TEX1_EDGE_CLAMP
22
23
24 // currently no dynamic way to set mesh cache size or view distance
25 //#define SHORTVIEW
26
27
28 stbvox_mesh_maker g_mesh_maker;
29
30 GLuint main_prog;
31 GLint uniform_locations[64];
32
33 //#define MAX_QUADS_PER_DRAW (65536 / 4) // assuming 16-bit indices, 4 verts per quad
34 //#define FIXED_INDEX_BUFFER_SIZE (MAX_QUADS_PER_DRAW * 6 * 2) // 16*1024 * 12 == ~192KB
35
36 // while uploading texture data, this holds our each texture
37 #define TEX_SIZE 64
38 uint32 texture[TEX_SIZE][TEX_SIZE];
39
40 GLuint voxel_tex[2];
41
42 // chunk state
43 enum
44 {
45 STATE_invalid,
46 STATE_needed,
47 STATE_requested,
48 STATE_abandoned,
49 STATE_valid,
50 };
51
52 // mesh is 32x32x255 ... this is hardcoded in that
53 // a mesh covers 2x2 minecraft chunks, no #defines for it
54 typedef struct
55 {
56 int state;
57 int chunk_x, chunk_y;
58 int num_quads;
59 float priority;
60 int vbuf_size, fbuf_size;
61
62 float transform[3][3];
63 float bounds[2][3];
64
65 GLuint vbuf;// vbuf_tex;
66 GLuint fbuf, fbuf_tex;
67
68 } chunk_mesh;
69
70 void scale_texture(unsigned char *src, int x, int y, int w, int h)
71 {
72 int i,j,k;
73 assert(w == 256 && h == 256);
74 for (j=0; j < TEX_SIZE; ++j) {
75 for (i=0; i < TEX_SIZE; ++i) {
76 uint32 val=0;
77 for (k=0; k < 4; ++k) {
78 val >>= 8;
79 val += src[ 4*(x+(i>>2)) + 4*w*(y+(j>>2)) + k]<<24;
80 }
81 texture[j][i] = val;
82 }
83 }
84 }
85
86 void build_base_texture(int n)
87 {
88 int x,y;
89 uint32 color = stb_rand() | 0x808080;
90 for (y=0; y<TEX_SIZE; ++y)
91 for (x=0; x<TEX_SIZE; ++x) {
92 texture[y][x] = (color + (stb_rand()&0x1f1f1f))|0xff000000;
93 }
94 }
95
96 void build_overlay_texture(int n)
97 {
98 int x,y;
99 uint32 color = stb_rand();
100 if (color & 16)
101 color = 0xff000000;
102 else
103 color = 0xffffffff;
104 for (y=0; y<TEX_SIZE; ++y)
105 for (x=0; x<TEX_SIZE; ++x) {
106 texture[y][x] = 0;
107 }
108
109 for (y=0; y < TEX_SIZE/8; ++y) {
110 for (x=0; x < TEX_SIZE; ++x) {
111 texture[y][x] = color;
112 texture[TEX_SIZE-1-y][x] = color;
113 texture[x][y] = color;
114 texture[x][TEX_SIZE-1-y] = color;
115 }
116 }
117 }
118
119 // view radius of about 1024 = 2048 columns / 32 columns-per-mesh = 2^11 / 2^5 = 64x64
120 // so we need bigger than 64x64 so we can precache, which means we have to be
121 // non-power-of-two, or we have to be pretty huge
122 #define CACHED_MESH_NUM_X 128
123 #define CACHED_MESH_NUM_Y 128
124
125
126 chunk_mesh cached_chunk_mesh[CACHED_MESH_NUM_Y][CACHED_MESH_NUM_X];
127
128 void free_chunk(int slot_x, int slot_y)
129 {
130 chunk_mesh *cm = &cached_chunk_mesh[slot_y][slot_x];
131 if (cm->state == STATE_valid) {
132 glDeleteTextures(1, &cm->fbuf_tex);
133 glDeleteBuffersARB(1, &cm->vbuf);
134 glDeleteBuffersARB(1, &cm->fbuf);
135 cached_chunk_mesh[slot_y][slot_x].state = STATE_invalid;
136 }
137 }
138
139 void upload_mesh(chunk_mesh *cm, uint8 *build_buffer, uint8 *face_buffer)
140 {
141 glGenBuffersARB(1, &cm->vbuf);
142 glBindBufferARB(GL_ARRAY_BUFFER_ARB, cm->vbuf);
143 glBufferDataARB(GL_ARRAY_BUFFER_ARB, cm->num_quads*4*sizeof(uint32), build_buffer, GL_STATIC_DRAW_ARB);
144 glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
145
146 glGenBuffersARB(1, &cm->fbuf);
147 glBindBufferARB(GL_TEXTURE_BUFFER_ARB, cm->fbuf);
148 glBufferDataARB(GL_TEXTURE_BUFFER_ARB, cm->num_quads*sizeof(uint32), face_buffer , GL_STATIC_DRAW_ARB);
149 glBindBufferARB(GL_TEXTURE_BUFFER_ARB, 0);
150
151 glGenTextures(1, &cm->fbuf_tex);
152 glBindTexture(GL_TEXTURE_BUFFER_ARB, cm->fbuf_tex);
153 glTexBufferARB(GL_TEXTURE_BUFFER_ARB, GL_RGBA8UI, cm->fbuf);
154 glBindTexture(GL_TEXTURE_BUFFER_ARB, 0);
155 }
156
157 static void upload_mesh_data(raw_mesh *rm)
158 {
159 int cx = rm->cx;
160 int cy = rm->cy;
161 int slot_x = (cx >> 1) & (CACHED_MESH_NUM_X-1);
162 int slot_y = (cy >> 1) & (CACHED_MESH_NUM_Y-1);
163 chunk_mesh *cm;
164
165 free_chunk(slot_x, slot_y);
166
167 cm = &cached_chunk_mesh[slot_y][slot_x];
168 cm->num_quads = rm->num_quads;
169
170 upload_mesh(cm, rm->build_buffer, rm->face_buffer);
171 cm->vbuf_size = rm->num_quads*4*sizeof(uint32);
172 cm->fbuf_size = rm->num_quads*sizeof(uint32);
173 cm->priority = 100000;
174 cm->chunk_x = cx;
175 cm->chunk_y = cy;
176
177 memcpy(cm->bounds, rm->bounds, sizeof(cm->bounds));
178 memcpy(cm->transform, rm->transform, sizeof(cm->transform));
179
180 // write barrier here
181 cm->state = STATE_valid;
182 }
183
184 GLint uniform_loc[16];
185 float table3[128][3];
186 float table4[64][4];
187 GLint tablei[2];
188
189 float step=0;
190
191 #ifdef SHORTVIEW
192 int view_dist_in_chunks = 50;
193 #else
194 int view_dist_in_chunks = 80;
195 #endif
196
197 void setup_uniforms(float pos[3])
198 {
199 int i,j;
200 step += 1.0f/60.0f;
201 for (i=0; i < STBVOX_UNIFORM_count; ++i) {
202 stbvox_uniform_info raw, *ui=&raw;
203 stbvox_get_uniform_info(&raw, i);
204 uniform_loc[i] = -1;
205
206 if (i == STBVOX_UNIFORM_texscale || i == STBVOX_UNIFORM_texgen || i == STBVOX_UNIFORM_color_table)
207 continue;
208
209 if (ui) {
210 void *data = ui->default_value;
211 uniform_loc[i] = stbgl_find_uniform(main_prog, ui->name);
212 switch (i) {
213 case STBVOX_UNIFORM_face_data:
214 tablei[0] = 2;
215 data = tablei;
216 break;
217
218 case STBVOX_UNIFORM_tex_array:
219 glActiveTextureARB(GL_TEXTURE0_ARB);
220 glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[0]);
221 glActiveTextureARB(GL_TEXTURE1_ARB);
222 glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[1]);
223 glActiveTextureARB(GL_TEXTURE0_ARB);
224 tablei[0] = 0;
225 tablei[1] = 1;
226 data = tablei;
227 break;
228
229 case STBVOX_UNIFORM_color_table:
230 data = ui->default_value;
231 ((float *)data)[63*4+3] = 2.0f; // emissive
232 break;
233
234 case STBVOX_UNIFORM_camera_pos:
235 data = table3[0];
236 table3[0][0] = pos[0];
237 table3[0][1] = pos[1];
238 table3[0][2] = pos[2];
239 table3[0][3] = stb_max(0,(float)sin(step*2)*0.125f);
240 break;
241
242 case STBVOX_UNIFORM_ambient: {
243 float bright = 1.0;
244 //float bright = 0.75;
245 float amb[3][3];
246
247 // ambient direction is sky-colored upwards
248 // "ambient" lighting is from above
249 table4[0][0] = 0.3f;
250 table4[0][1] = -0.5f;
251 table4[0][2] = 0.9f;
252
253 amb[1][0] = 0.3f; amb[1][1] = 0.3f; amb[1][2] = 0.3f; // dark-grey
254 amb[2][0] = 1.0; amb[2][1] = 1.0; amb[2][2] = 1.0; // white
255
256 // convert so (table[1]*dot+table[2]) gives
257 // above interpolation
258 // lerp((dot+1)/2, amb[1], amb[2])
259 // amb[1] + (amb[2] - amb[1]) * (dot+1)/2
260 // amb[1] + (amb[2] - amb[1]) * dot/2 + (amb[2]-amb[1])/2
261
262 for (j=0; j < 3; ++j) {
263 table4[1][j] = (amb[2][j] - amb[1][j])/2 * bright;
264 table4[2][j] = (amb[1][j] + amb[2][j])/2 * bright;
265 }
266
267 // fog color
268 table4[3][0] = 0.6f, table4[3][1] = 0.7f, table4[3][2] = 0.9f;
269 table4[3][3] = 1.0f / (view_dist_in_chunks * 16);
270 table4[3][3] *= table4[3][3];
271
272 data = table4;
273 break;
274 }
275 }
276
277 switch (ui->type) {
278 case STBVOX_UNIFORM_TYPE_sampler: stbglUniform1iv(uniform_loc[i], ui->array_length, data); break;
279 case STBVOX_UNIFORM_TYPE_vec2: stbglUniform2fv(uniform_loc[i], ui->array_length, data); break;
280 case STBVOX_UNIFORM_TYPE_vec3: stbglUniform3fv(uniform_loc[i], ui->array_length, data); break;
281 case STBVOX_UNIFORM_TYPE_vec4: stbglUniform4fv(uniform_loc[i], ui->array_length, data); break;
282 }
283 }
284 }
285 }
286
287 GLuint unitex[64], unibuf[64];
288 void make_texture_buffer_for_uniform(int uniform, int slot)
289 {
290 GLenum type;
291 stbvox_uniform_info raw, *ui=&raw;
292 GLint uloc;
293
294 stbvox_get_uniform_info(ui, uniform);
295 uloc = stbgl_find_uniform(main_prog, ui->name);
296
297 if (uniform == STBVOX_UNIFORM_color_table)
298 ((float *)ui->default_value)[63*4+3] = 2.0f; // emissive
299
300 glGenBuffersARB(1, &unibuf[uniform]);
301 glBindBufferARB(GL_ARRAY_BUFFER_ARB, unibuf[uniform]);
302 glBufferDataARB(GL_ARRAY_BUFFER_ARB, ui->array_length * ui->bytes_per_element, ui->default_value, GL_STATIC_DRAW_ARB);
303 glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
304
305 glGenTextures(1, &unitex[uniform]);
306 glBindTexture(GL_TEXTURE_BUFFER_ARB, unitex[uniform]);
307 switch (ui->type) {
308 case STBVOX_UNIFORM_TYPE_vec2: type = GL_RG32F; break;
309 case STBVOX_UNIFORM_TYPE_vec3: type = GL_RGB32F; break;
310 case STBVOX_UNIFORM_TYPE_vec4: type = GL_RGBA32F; break;
311 default: assert(0);
312 }
313 glTexBufferARB(GL_TEXTURE_BUFFER_ARB, type, unibuf[uniform]);
314 glBindTexture(GL_TEXTURE_BUFFER_ARB, 0);
315
316 glActiveTextureARB(GL_TEXTURE0 + slot);
317 glBindTexture(GL_TEXTURE_BUFFER_ARB, unitex[uniform]);
318 glActiveTextureARB(GL_TEXTURE0);
319
320 stbglUseProgram(main_prog);
321 stbglUniform1i(uloc, slot);
322 }
323
324 #define MAX_MESH_WORKERS 8
325 #define MAX_CHUNK_LOAD_WORKERS 2
326
327 int num_mesh_workers;
328 int num_chunk_load_workers;
329
330 typedef struct
331 {
332 int state;
333 int request_cx;
334 int request_cy;
335 int padding[13];
336
337 SDL_sem * request_received;
338
339 SDL_sem * chunk_server_done_processing;
340 int chunk_action;
341 int chunk_request_x;
342 int chunk_request_y;
343 fast_chunk *chunks[4][4];
344
345 int padding2[16];
346 raw_mesh rm;
347 int padding3[16];
348
349 uint8 *build_buffer;
350 uint8 *face_buffer ;
351 } mesh_worker;
352
353 enum
354 {
355 WSTATE_idle,
356 WSTATE_requested,
357 WSTATE_running,
358 WSTATE_mesh_ready,
359 };
360
361 mesh_worker mesh_data[MAX_MESH_WORKERS];
362 int num_meshes_started; // stats
363
364 int request_chunk(int chunk_x, int chunk_y);
365 void update_meshes_from_render_thread(void);
366
367 unsigned char tex2_data[64][4];
368
369 void init_tex2_gradient(void)
370 {
371 int i;
372 for (i=0; i < 16; ++i) {
373 tex2_data[i+ 0][0] = 64 + 12*i;
374 tex2_data[i+ 0][1] = 32;
375 tex2_data[i+ 0][2] = 64;
376
377 tex2_data[i+16][0] = 255;
378 tex2_data[i+16][1] = 32 + 8*i;
379 tex2_data[i+16][2] = 64;
380
381 tex2_data[i+32][0] = 255;
382 tex2_data[i+32][1] = 160;
383 tex2_data[i+32][2] = 64 + 12*i;
384
385 tex2_data[i+48][0] = 255;
386 tex2_data[i+48][1] = 160 + 6*i;
387 tex2_data[i+48][2] = 255;
388 }
389 }
390
391 void set_tex2_alpha(float fa)
392 {
393 int i;
394 int a = (int) stb_lerp(fa, 0, 255);
395 if (a < 0) a = 0; else if (a > 255) a = 255;
396 glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[1]);
397 for (i=0; i < 64; ++i) {
398 tex2_data[i][3] = a;
399 glTexSubImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, 0,0,i, 1,1,1, GL_RGBA, GL_UNSIGNED_BYTE, tex2_data[i]);
400 }
401 }
402
403 void render_init(void)
404 {
405 int i;
406 char *binds[] = { "attr_vertex", "attr_face", NULL };
407 char *vertex;
408 char *fragment;
409 int w=0,h=0;
410
411 unsigned char *texdata = stbi_load("terrain.png", &w, &h, NULL, 4);
412
413 stbvox_init_mesh_maker(&g_mesh_maker);
414 for (i=0; i < num_mesh_workers; ++i) {
415 stbvox_init_mesh_maker(&mesh_data[i].rm.mm);
416 }
417
418 vertex = stbvox_get_vertex_shader();
419 fragment = stbvox_get_fragment_shader();
420
421 {
422 char error_buffer[1024];
423 char *main_vertex[] = { vertex, NULL };
424 char *main_fragment[] = { fragment, NULL };
425 main_prog = stbgl_create_program(main_vertex, main_fragment, binds, error_buffer, sizeof(error_buffer));
426 if (main_prog == 0) {
427 ods("Compile error for main shader: %s\n", error_buffer);
428 assert(0);
429 exit(1);
430 }
431 }
432 //init_index_buffer();
433
434 make_texture_buffer_for_uniform(STBVOX_UNIFORM_texscale , 3);
435 make_texture_buffer_for_uniform(STBVOX_UNIFORM_texgen , 4);
436 make_texture_buffer_for_uniform(STBVOX_UNIFORM_color_table , 5);
437
438 glGenTextures(2, voxel_tex);
439
440 glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[0]);
441 glTexImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, GL_RGBA,
442 TEX_SIZE,TEX_SIZE,256,
443 0,GL_RGBA,GL_UNSIGNED_BYTE,NULL);
444 for (i=0; i < 256; ++i) {
445 if (texdata)
446 scale_texture(texdata, (i&15)*w/16, (h/16)*(i>>4), w,h);
447 else
448 build_base_texture(i);
449 glTexSubImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, 0,0,i, TEX_SIZE,TEX_SIZE,1, GL_RGBA, GL_UNSIGNED_BYTE, texture[0]);
450 }
451 glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
452 glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
453 glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MAX_ANISOTROPY_EXT, 16);
454 #ifdef STBVOX_CONFIG_TEX1_EDGE_CLAMP
455 glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
456 glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
457 #endif
458
459 glGenerateMipmapEXT(GL_TEXTURE_2D_ARRAY_EXT);
460
461 glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[1]);
462 glTexImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, GL_RGBA,
463 1,1,64,
464 0,GL_RGBA,GL_UNSIGNED_BYTE,NULL);
465 init_tex2_gradient();
466 set_tex2_alpha(0.0);
467 #if 0
468 for (i=0; i < 128; ++i) {
469 //build_overlay_texture(i);
470 glTexSubImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, 0,0,i, TEX_SIZE,TEX_SIZE,1, GL_RGBA, GL_UNSIGNED_BYTE, texture[0]);
471 }
472 #endif
473 glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
474 glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
475 glGenerateMipmapEXT(GL_TEXTURE_2D_ARRAY_EXT);
476 }
477
478 void world_init(void)
479 {
480 int a,b,x,y;
481
482 Uint64 start_time, end_time;
483 #ifdef NDEBUG
484 int range = 32;
485 #else
486 int range = 12;
487 #endif
488
489 start_time = SDL_GetPerformanceCounter();
490
491 // iterate in 8x8 clusters of qchunks at a time to get better converted-chunk-cache reuse
492 // than a purely row-by-row ordering is (single-threaded this is a bigger win than
493 // any of the above optimizations were, since it halves zlib/mc-conversion costs)
494 for (x=-range; x <= range; x += 16)
495 for (y=-range; y <= range; y += 16)
496 for (b=y; b < y+16 && b <= range; b += 2)
497 for (a=x; a < x+16 && a <= range; a += 2)
498 while (!request_chunk(a, b)) { // if request fails, all threads are busy
499 update_meshes_from_render_thread();
500 SDL_Delay(1);
501 }
502
503 // wait until all the workers are done,
504 // (this is only needed if we want to time
505 // when the build finishes, or when we want to reset the
506 // cache size; otherwise we could just go ahead and
507 // start rendering whatever we've got)
508 for(;;) {
509 int i;
510 update_meshes_from_render_thread();
511 for (i=0; i < num_mesh_workers; ++i)
512 if (mesh_data[i].state != WSTATE_idle)
513 break;
514 if (i == num_mesh_workers)
515 break;
516 SDL_Delay(3);
517 }
518
519 end_time = SDL_GetPerformanceCounter();
520 ods("Build time: %7.2fs\n", (end_time - start_time) / (float) SDL_GetPerformanceFrequency());
521
522 // don't waste lots of storage on chunk caches once it's finished starting-up;
523 // this was only needed to be this large because we worked in large blocks
524 // to maximize sharing
525 reset_cache_size(32);
526 }
527
528 extern SDL_mutex * chunk_cache_mutex;
529
530 int mesh_worker_handler(void *data)
531 {
532 mesh_worker *mw = data;
533 mw->face_buffer = malloc(FACE_BUFFER_SIZE);
534 mw->build_buffer = malloc(BUILD_BUFFER_SIZE);
535
536 // this loop only works because the compiler can't
537 // tell that the SDL_calls don't access mw->state;
538 // really we should barrier that stuff
539 for(;;) {
540 int i,j;
541 int cx,cy;
542
543 // wait for a chunk request
544 SDL_SemWait(mw->request_received);
545
546 // analyze the chunk request
547 assert(mw->state == WSTATE_requested);
548 cx = mw->request_cx;
549 cy = mw->request_cy;
550
551 // this is inaccurate as it can block while another thread has the cache locked
552 mw->state = WSTATE_running;
553
554 // get the chunks we need (this takes a lock and caches them)
555 for (j=0; j < 4; ++j)
556 for (i=0; i < 4; ++i)
557 mw->chunks[j][i] = get_converted_fastchunk(cx-1 + i, cy-1 + j);
558
559 // build the mesh based on the chunks
560 mw->rm.build_buffer = mw->build_buffer;
561 mw->rm.face_buffer = mw->face_buffer;
562 build_chunk(cx, cy, mw->chunks, &mw->rm);
563 mw->state = WSTATE_mesh_ready;
564 // don't need to notify of this, because it gets polled
565
566 // when done, free the chunks
567
568 // for efficiency we just take the mutex once around the whole thing,
569 // though this spreads the mutex logic over two files
570 SDL_LockMutex(chunk_cache_mutex);
571 for (j=0; j < 4; ++j)
572 for (i=0; i < 4; ++i) {
573 deref_fastchunk(mw->chunks[j][i]);
574 mw->chunks[j][i] = NULL;
575 }
576 SDL_UnlockMutex(chunk_cache_mutex);
577 }
578 return 0;
579 }
580
581 int request_chunk(int chunk_x, int chunk_y)
582 {
583 int i;
584 for (i=0; i < num_mesh_workers; ++i) {
585 mesh_worker *mw = &mesh_data[i];
586 if (mw->state == WSTATE_idle) {
587 mw->request_cx = chunk_x;
588 mw->request_cy = chunk_y;
589 mw->state = WSTATE_requested;
590 SDL_SemPost(mw->request_received);
591 ++num_meshes_started;
592 return 1;
593 }
594 }
595 return 0;
596 }
597
598 void prepare_threads(void)
599 {
600 int i;
601 int num_proc = SDL_GetCPUCount();
602
603 if (num_proc > 6)
604 num_mesh_workers = num_proc/2;
605 else if (num_proc > 4)
606 num_mesh_workers = 4;
607 else
608 num_mesh_workers = num_proc-1;
609
610 // @TODO
611 // Thread usage is probably pretty terrible; need to make a
612 // separate queue of needed chunks, instead of just generating
613 // one request per thread per frame, and a separate queue of
614 // results. (E.g. If it takes 1.5 frames to build mesh, thread
615 // is idle for 0.5 frames.) To fake this for now, I've just
616 // doubled the number of threads to let those serve as a 'queue',
617 // but that's dumb.
618
619 num_mesh_workers *= 2; // try to get better thread usage
620
621 if (num_mesh_workers > MAX_MESH_WORKERS)
622 num_mesh_workers = MAX_MESH_WORKERS;
623
624 for (i=0; i < num_mesh_workers; ++i) {
625 mesh_worker *data = &mesh_data[i];
626 data->request_received = SDL_CreateSemaphore(0);
627 data->chunk_server_done_processing = SDL_CreateSemaphore(0);
628 SDL_CreateThread(mesh_worker_handler, "mesh worker", data);
629 }
630 }
631
632
633 // "better" buffer uploading
634 #if 0
635 if (glBufferStorage) {
636 glDeleteBuffersARB(1, &vb->vbuf);
637 glGenBuffersARB(1, &vb->vbuf);
638
639 glBindBufferARB(GL_ARRAY_BUFFER_ARB, vb->vbuf);
640 glBufferStorage(GL_ARRAY_BUFFER_ARB, sizeof(build_buffer), build_buffer, 0);
641 glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
642 } else {
643 glBindBufferARB(GL_ARRAY_BUFFER_ARB, vb->vbuf);
644 glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(build_buffer), build_buffer, GL_STATIC_DRAW_ARB);
645 glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
646 }
647 #endif
648
649
650 typedef struct
651 {
652 float x,y,z,w;
653 } plane;
654
655 static plane frustum[6];
656
657 static void matd_mul(double out[4][4], double src1[4][4], double src2[4][4])
658 {
659 int i,j,k;
660 for (j=0; j < 4; ++j) {
661 for (i=0; i < 4; ++i) {
662 double t=0;
663 for (k=0; k < 4; ++k)
664 t += src1[k][i] * src2[j][k];
665 out[i][j] = t;
666 }
667 }
668 }
669
670 // https://fgiesen.wordpress.com/2012/08/31/frustum-planes-from-the-projection-matrix/
671 static void compute_frustum(void)
672 {
673 int i;
674 GLdouble mv[4][4],proj[4][4], mvproj[4][4];
675 glGetDoublev(GL_MODELVIEW_MATRIX , mv[0]);
676 glGetDoublev(GL_PROJECTION_MATRIX, proj[0]);
677 matd_mul(mvproj, proj, mv);
678 for (i=0; i < 4; ++i) {
679 (&frustum[0].x)[i] = (float) (mvproj[3][i] + mvproj[0][i]);
680 (&frustum[1].x)[i] = (float) (mvproj[3][i] - mvproj[0][i]);
681 (&frustum[2].x)[i] = (float) (mvproj[3][i] + mvproj[1][i]);
682 (&frustum[3].x)[i] = (float) (mvproj[3][i] - mvproj[1][i]);
683 (&frustum[4].x)[i] = (float) (mvproj[3][i] + mvproj[2][i]);
684 (&frustum[5].x)[i] = (float) (mvproj[3][i] - mvproj[2][i]);
685 }
686 }
687
688 static int test_plane(plane *p, float x0, float y0, float z0, float x1, float y1, float z1)
689 {
690 // return false if the box is entirely behind the plane
691 float d=0;
692 assert(x0 <= x1 && y0 <= y1 && z0 <= z1);
693 if (p->x > 0) d += x1*p->x; else d += x0*p->x;
694 if (p->y > 0) d += y1*p->y; else d += y0*p->y;
695 if (p->z > 0) d += z1*p->z; else d += z0*p->z;
696 return d + p->w >= 0;
697 }
698
699 static int is_box_in_frustum(float *bmin, float *bmax)
700 {
701 int i;
702 for (i=0; i < 6; ++i)
703 if (!test_plane(&frustum[i], bmin[0], bmin[1], bmin[2], bmax[0], bmax[1], bmax[2]))
704 return 0;
705 return 1;
706 }
707
708 float compute_priority(int cx, int cy, float x, float y)
709 {
710 float distx, disty, dist2;
711 distx = (cx*16+8) - x;
712 disty = (cy*16+8) - y;
713 dist2 = distx*distx + disty*disty;
714 return view_dist_in_chunks*view_dist_in_chunks * 16 * 16 - dist2;
715 }
716
717 int chunk_locations, chunks_considered, chunks_in_frustum;
718 int quads_considered, quads_rendered;
719 int chunk_storage_rendered, chunk_storage_considered, chunk_storage_total;
720 int update_frustum = 1;
721
722 #ifdef SHORTVIEW
723 int max_chunk_storage = 450 << 20;
724 int min_chunk_storage = 350 << 20;
725 #else
726 int max_chunk_storage = 900 << 20;
727 int min_chunk_storage = 800 << 20;
728 #endif
729
730 float min_priority = -500; // this really wants to be in unit space, not squared space
731
732 int num_meshes_uploaded;
733
734 void update_meshes_from_render_thread(void)
735 {
736 int i;
737 for (i=0; i < num_mesh_workers; ++i) {
738 mesh_worker *mw = &mesh_data[i];
739 if (mw->state == WSTATE_mesh_ready) {
740 upload_mesh_data(&mw->rm);
741 ++num_meshes_uploaded;
742 mw->state = WSTATE_idle;
743 }
744 }
745 }
746
747 extern float tex2_alpha;
748 extern int global_hack;
749 int num_threads_active;
750 float chunk_server_activity;
751
752 void render_caves(float campos[3])
753 {
754 float x = campos[0], y = campos[1];
755 int qchunk_x, qchunk_y;
756 int cam_x, cam_y;
757 int i,j, rad;
758
759 compute_frustum();
760
761 chunk_locations = chunks_considered = chunks_in_frustum = 0;
762 quads_considered = quads_rendered = 0;
763 chunk_storage_total = chunk_storage_considered = chunk_storage_rendered = 0;
764
765 cam_x = (int) floor(x+0.5);
766 cam_y = (int) floor(y+0.5);
767
768 qchunk_x = (((int) floor(x)+16) >> 5) << 1;
769 qchunk_y = (((int) floor(y)+16) >> 5) << 1;
770
771 glEnable(GL_ALPHA_TEST);
772 glAlphaFunc(GL_GREATER, 0.5);
773
774 stbglUseProgram(main_prog);
775 setup_uniforms(campos); // set uniforms to default values inefficiently
776 glActiveTextureARB(GL_TEXTURE2_ARB);
777 stbglEnableVertexAttribArray(0);
778
779 {
780 float lighting[2][3] = { { campos[0],campos[1],campos[2] }, { 0.75,0.75,0.65f } };
781 float bright = 8;
782 lighting[1][0] *= bright;
783 lighting[1][1] *= bright;
784 lighting[1][2] *= bright;
785 stbglUniform3fv(stbgl_find_uniform(main_prog, "light_source"), 2, lighting[0]);
786 }
787
788 if (global_hack)
789 set_tex2_alpha(tex2_alpha);
790
791 num_meshes_uploaded = 0;
792 update_meshes_from_render_thread();
793
794 // traverse all in-range chunks and analyze them
795 for (j=-view_dist_in_chunks; j <= view_dist_in_chunks; j += 2) {
796 for (i=-view_dist_in_chunks; i <= view_dist_in_chunks; i += 2) {
797 float priority;
798 int cx = qchunk_x + i;
799 int cy = qchunk_y + j;
800
801 priority = compute_priority(cx, cy, x, y);
802 if (priority >= min_priority) {
803 int slot_x = (cx>>1) & (CACHED_MESH_NUM_X-1);
804 int slot_y = (cy>>1) & (CACHED_MESH_NUM_Y-1);
805 chunk_mesh *cm = &cached_chunk_mesh[slot_y][slot_x];
806 ++chunk_locations;
807 if (cm->state == STATE_valid && priority >= 0) {
808 // check if chunk pos actually matches
809 if (cm->chunk_x != cx || cm->chunk_y != cy) {
810 // we have a stale chunk we need to recreate
811 free_chunk(slot_x, slot_y); // it probably will have already gotten freed, but just in case
812 }
813 }
814 if (cm->state == STATE_invalid) {
815 cm->chunk_x = cx;
816 cm->chunk_y = cy;
817 cm->state = STATE_needed;
818 }
819 cm->priority = priority;
820 }
821 }
822 }
823
824 // draw front-to-back
825 for (rad = 0; rad <= view_dist_in_chunks; rad += 2) {
826 for (j=-rad; j <= rad; j += 2) {
827 // if j is +- rad, then iterate i through all values
828 // if j isn't +-rad, then i should be only -rad & rad
829 int step = 2;
830 if (abs(j) != rad)
831 step = 2*rad;
832 for (i=-rad; i <= rad; i += step) {
833 int cx = qchunk_x + i;
834 int cy = qchunk_y + j;
835 int slot_x = (cx>>1) & (CACHED_MESH_NUM_X-1);
836 int slot_y = (cy>>1) & (CACHED_MESH_NUM_Y-1);
837 chunk_mesh *cm = &cached_chunk_mesh[slot_y][slot_x];
838 if (cm->state == STATE_valid && cm->priority >= 0) {
839 ++chunks_considered;
840 quads_considered += cm->num_quads;
841 if (is_box_in_frustum(cm->bounds[0], cm->bounds[1])) {
842 ++chunks_in_frustum;
843
844 // @TODO if in range
845 stbglUniform3fv(uniform_loc[STBVOX_UNIFORM_transform], 3, cm->transform[0]);
846 glBindBufferARB(GL_ARRAY_BUFFER_ARB, cm->vbuf);
847 glVertexAttribIPointer(0, 1, GL_UNSIGNED_INT, 4, (void*) 0);
848 glBindTexture(GL_TEXTURE_BUFFER_ARB, cm->fbuf_tex);
849 glDrawArrays(GL_QUADS, 0, cm->num_quads*4);
850 quads_rendered += cm->num_quads;
851
852 chunk_storage_rendered += cm->vbuf_size + cm->fbuf_size;
853 }
854 chunk_storage_considered += cm->vbuf_size + cm->fbuf_size;
855 }
856 }
857 }
858 }
859
860 stbglDisableVertexAttribArray(0);
861 glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
862 glActiveTextureARB(GL_TEXTURE0_ARB);
863
864 stbglUseProgram(0);
865 num_meshes_started = 0;
866
867 {
868 #define MAX_QUEUE 8
869 float highest_priority[MAX_QUEUE];
870 int highest_i[MAX_QUEUE], highest_j[MAX_QUEUE];
871 float lowest_priority = view_dist_in_chunks * view_dist_in_chunks * 16 * 16.0f;
872 int lowest_i = -1, lowest_j = -1;
873
874 for (i=0; i < MAX_QUEUE; ++i) {
875 highest_priority[i] = min_priority;
876 highest_i[i] = -1;
877 highest_j[i] = -1;
878 }
879
880 for (j=0; j < CACHED_MESH_NUM_Y; ++j) {
881 for (i=0; i < CACHED_MESH_NUM_X; ++i) {
882 chunk_mesh *cm = &cached_chunk_mesh[j][i];
883 if (cm->state == STATE_valid) {
884 cm->priority = compute_priority(cm->chunk_x, cm->chunk_y, x, y);
885 chunk_storage_total += cm->vbuf_size + cm->fbuf_size;
886 if (cm->priority < lowest_priority) {
887 lowest_priority = cm->priority;
888 lowest_i = i;
889 lowest_j = j;
890 }
891 }
892 if (cm->state == STATE_needed) {
893 cm->priority = compute_priority(cm->chunk_x, cm->chunk_y, x, y);
894 if (cm->priority < min_priority)
895 cm->state = STATE_invalid;
896 else if (cm->priority > highest_priority[0]) {
897 int k;
898 highest_priority[0] = cm->priority;
899 highest_i[0] = i;
900 highest_j[0] = j;
901 // bubble this up to right place
902 for (k=0; k < MAX_QUEUE-1; ++k) {
903 if (highest_priority[k] > highest_priority[k+1]) {
904 highest_priority[k] = highest_priority[k+1];
905 highest_priority[k+1] = cm->priority;
906 highest_i[k] = highest_i[k+1];
907 highest_i[k+1] = i;
908 highest_j[k] = highest_j[k+1];
909 highest_j[k+1] = j;
910 } else {
911 break;
912 }
913 }
914 }
915 }
916 }
917 }
918
919
920 // I couldn't find any straightforward logic that avoids
921 // the hysteresis problem of continually creating & freeing
922 // a block on the margin, so I just don't free a block until
923 // it's out of range, but this doesn't actually correctly
924 // handle when the cache is too small for the given range
925 if (chunk_storage_total >= min_chunk_storage && lowest_i >= 0) {
926 if (cached_chunk_mesh[lowest_j][lowest_i].priority < -1200) // -1000? 0?
927 free_chunk(lowest_i, lowest_j);
928 }
929
930 if (chunk_storage_total < max_chunk_storage && highest_i[0] >= 0) {
931 for (j=MAX_QUEUE-1; j >= 0; --j) {
932 if (highest_j[0] >= 0) {
933 chunk_mesh *cm = &cached_chunk_mesh[highest_j[j]][highest_i[j]];
934 if (request_chunk(cm->chunk_x, cm->chunk_y)) {
935 cm->state = STATE_requested;
936 } else {
937 // if we couldn't queue this one, skip the remainder
938 break;
939 }
940 }
941 }
942 }
943 }
944
945 update_meshes_from_render_thread();
946
947 num_threads_active = 0;
948 for (i=0; i < num_mesh_workers; ++i) {
949 num_threads_active += (mesh_data[i].state == WSTATE_running);
950 }
951 }