comments updated
[henge/apc.git] / stb / stb_vorbis.c
1 // Ogg Vorbis audio decoder - v1.09 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking sponsored
7 // by Phillip Bennefall, Marc Andersen, Aaron Baker, Elias Software,
8 // Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 // This software is dual-licensed to the public domain and under the following
13 // license: you are granted a perpetual, irrevocable license to copy, modify,
14 // publish, and distribute this file as you see fit.
15 //
16 // No warranty for any purpose is expressed or implied by the author (nor
17 // by RAD Game Tools). Report bugs and send enhancements to the author.
18 //
19 // Limitations:
20 //
21 // - floor 0 not supported (used in old ogg vorbis files pre-2004)
22 // - lossless sample-truncation at beginning ignored
23 // - cannot concatenate multiple vorbis streams
24 // - sample positions are 32-bit, limiting seekable 192Khz
25 // files to around 6 hours (Ogg supports 64-bit)
26 //
27 // Feature contributors:
28 // Dougall Johnson (sample-exact seeking)
29 //
30 // Bugfix/warning contributors:
31 // Terje Mathisen Niklas Frykholm Andy Hill
32 // Casey Muratori John Bolton Gargaj
33 // Laurent Gomila Marc LeBlanc Ronny Chevalier
34 // Bernhard Wodo Evan Balster alxprd@github
35 // Tom Beaumont Ingo Leitgeb Nicolas Guillemot
36 // Phillip Bennefall Rohit Thiago Goulart
37 // manxorist@github saga musix
38 //
39 // Partial history:
40 // 1.09 - 2016/04/04 - back out 'truncation of last frame' fix from previous version
41 // 1.08 - 2016/04/02 - warnings; setup memory leaks; truncation of last frame
42 // 1.07 - 2015/01/16 - fixes for crashes on invalid files; warning fixes; const
43 // 1.06 - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
44 // some crash fixes when out of memory or with corrupt files
45 // fix some inappropriately signed shifts
46 // 1.05 - 2015/04/19 - don't define __forceinline if it's redundant
47 // 1.04 - 2014/08/27 - fix missing const-correct case in API
48 // 1.03 - 2014/08/07 - warning fixes
49 // 1.02 - 2014/07/09 - declare qsort comparison as explicitly _cdecl in Windows
50 // 1.01 - 2014/06/18 - fix stb_vorbis_get_samples_float (interleaved was correct)
51 // 1.0 - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
52 // (API change) report sample rate for decode-full-file funcs
53 //
54 // See end of file for full version history.
55
56
57 //////////////////////////////////////////////////////////////////////////////
58 //
59 // HEADER BEGINS HERE
60 //
61
62 #ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
63 #define STB_VORBIS_INCLUDE_STB_VORBIS_H
64
65 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
66 #define STB_VORBIS_NO_STDIO 1
67 #endif
68
69 #ifndef STB_VORBIS_NO_STDIO
70 #include <stdio.h>
71 #endif
72
73 #ifdef __cplusplus
74 extern "C" {
75 #endif
76
77 /////////// THREAD SAFETY
78
79 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
80 // them from multiple threads at the same time. However, you can have multiple
81 // stb_vorbis* handles and decode from them independently in multiple thrads.
82
83
84 /////////// MEMORY ALLOCATION
85
86 // normally stb_vorbis uses malloc() to allocate memory at startup,
87 // and alloca() to allocate temporary memory during a frame on the
88 // stack. (Memory consumption will depend on the amount of setup
89 // data in the file and how you set the compile flags for speed
90 // vs. size. In my test files the maximal-size usage is ~150KB.)
91 //
92 // You can modify the wrapper functions in the source (setup_malloc,
93 // setup_temp_malloc, temp_malloc) to change this behavior, or you
94 // can use a simpler allocation model: you pass in a buffer from
95 // which stb_vorbis will allocate _all_ its memory (including the
96 // temp memory). "open" may fail with a VORBIS_outofmem if you
97 // do not pass in enough data; there is no way to determine how
98 // much you do need except to succeed (at which point you can
99 // query get_info to find the exact amount required. yes I know
100 // this is lame).
101 //
102 // If you pass in a non-NULL buffer of the type below, allocation
103 // will occur from it as described above. Otherwise just pass NULL
104 // to use malloc()/alloca()
105
106 typedef struct
107 {
108 char *alloc_buffer;
109 int alloc_buffer_length_in_bytes;
110 } stb_vorbis_alloc;
111
112
113 /////////// FUNCTIONS USEABLE WITH ALL INPUT MODES
114
115 typedef struct stb_vorbis stb_vorbis;
116
117 typedef struct
118 {
119 unsigned int sample_rate;
120 int channels;
121
122 unsigned int setup_memory_required;
123 unsigned int setup_temp_memory_required;
124 unsigned int temp_memory_required;
125
126 int max_frame_size;
127 } stb_vorbis_info;
128
129 // get general information about the file
130 extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
131
132 // get the last error detected (clears it, too)
133 extern int stb_vorbis_get_error(stb_vorbis *f);
134
135 // close an ogg vorbis file and free all memory in use
136 extern void stb_vorbis_close(stb_vorbis *f);
137
138 // this function returns the offset (in samples) from the beginning of the
139 // file that will be returned by the next decode, if it is known, or -1
140 // otherwise. after a flush_pushdata() call, this may take a while before
141 // it becomes valid again.
142 // NOT WORKING YET after a seek with PULLDATA API
143 extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
144
145 // returns the current seek point within the file, or offset from the beginning
146 // of the memory buffer. In pushdata mode it returns 0.
147 extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
148
149 /////////// PUSHDATA API
150
151 #ifndef STB_VORBIS_NO_PUSHDATA_API
152
153 // this API allows you to get blocks of data from any source and hand
154 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
155 // you how much it used, and you have to give it the rest next time;
156 // and stb_vorbis may not have enough data to work with and you will
157 // need to give it the same data again PLUS more. Note that the Vorbis
158 // specification does not bound the size of an individual frame.
159
160 extern stb_vorbis *stb_vorbis_open_pushdata(
161 const unsigned char * datablock, int datablock_length_in_bytes,
162 int *datablock_memory_consumed_in_bytes,
163 int *error,
164 const stb_vorbis_alloc *alloc_buffer);
165 // create a vorbis decoder by passing in the initial data block containing
166 // the ogg&vorbis headers (you don't need to do parse them, just provide
167 // the first N bytes of the file--you're told if it's not enough, see below)
168 // on success, returns an stb_vorbis *, does not set error, returns the amount of
169 // data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
170 // on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
171 // if returns NULL and *error is VORBIS_need_more_data, then the input block was
172 // incomplete and you need to pass in a larger block from the start of the file
173
174 extern int stb_vorbis_decode_frame_pushdata(
175 stb_vorbis *f,
176 const unsigned char *datablock, int datablock_length_in_bytes,
177 int *channels, // place to write number of float * buffers
178 float ***output, // place to write float ** array of float * buffers
179 int *samples // place to write number of output samples
180 );
181 // decode a frame of audio sample data if possible from the passed-in data block
182 //
183 // return value: number of bytes we used from datablock
184 //
185 // possible cases:
186 // 0 bytes used, 0 samples output (need more data)
187 // N bytes used, 0 samples output (resynching the stream, keep going)
188 // N bytes used, M samples output (one frame of data)
189 // note that after opening a file, you will ALWAYS get one N-bytes,0-sample
190 // frame, because Vorbis always "discards" the first frame.
191 //
192 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
193 // instead only datablock_length_in_bytes-3 or less. This is because it wants
194 // to avoid missing parts of a page header if they cross a datablock boundary,
195 // without writing state-machiney code to record a partial detection.
196 //
197 // The number of channels returned are stored in *channels (which can be
198 // NULL--it is always the same as the number of channels reported by
199 // get_info). *output will contain an array of float* buffers, one per
200 // channel. In other words, (*output)[0][0] contains the first sample from
201 // the first channel, and (*output)[1][0] contains the first sample from
202 // the second channel.
203
204 extern void stb_vorbis_flush_pushdata(stb_vorbis *f);
205 // inform stb_vorbis that your next datablock will not be contiguous with
206 // previous ones (e.g. you've seeked in the data); future attempts to decode
207 // frames will cause stb_vorbis to resynchronize (as noted above), and
208 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
209 // will begin decoding the _next_ frame.
210 //
211 // if you want to seek using pushdata, you need to seek in your file, then
212 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
213 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
214 // if you don't like the result, seek your file again and repeat.
215 #endif
216
217
218 ////////// PULLING INPUT API
219
220 #ifndef STB_VORBIS_NO_PULLDATA_API
221 // This API assumes stb_vorbis is allowed to pull data from a source--
222 // either a block of memory containing the _entire_ vorbis stream, or a
223 // FILE * that you or it create, or possibly some other reading mechanism
224 // if you go modify the source to replace the FILE * case with some kind
225 // of callback to your code. (But if you don't support seeking, you may
226 // just want to go ahead and use pushdata.)
227
228 #if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
229 extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
230 #endif
231 #if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
232 extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
233 #endif
234 // decode an entire file and output the data interleaved into a malloc()ed
235 // buffer stored in *output. The return value is the number of samples
236 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
237 // When you're done with it, just free() the pointer returned in *output.
238
239 extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
240 int *error, const stb_vorbis_alloc *alloc_buffer);
241 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
242 // this must be the entire stream!). on failure, returns NULL and sets *error
243
244 #ifndef STB_VORBIS_NO_STDIO
245 extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
246 int *error, const stb_vorbis_alloc *alloc_buffer);
247 // create an ogg vorbis decoder from a filename via fopen(). on failure,
248 // returns NULL and sets *error (possibly to VORBIS_file_open_failure).
249
250 extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
251 int *error, const stb_vorbis_alloc *alloc_buffer);
252 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
253 // the _current_ seek point (ftell). on failure, returns NULL and sets *error.
254 // note that stb_vorbis must "own" this stream; if you seek it in between
255 // calls to stb_vorbis, it will become confused. Morever, if you attempt to
256 // perform stb_vorbis_seek_*() operations on this file, it will assume it
257 // owns the _entire_ rest of the file after the start point. Use the next
258 // function, stb_vorbis_open_file_section(), to limit it.
259
260 extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
261 int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
262 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
263 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
264 // on failure, returns NULL and sets *error. note that stb_vorbis must "own"
265 // this stream; if you seek it in between calls to stb_vorbis, it will become
266 // confused.
267 #endif
268
269 extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
270 extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
271 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
272 // after calling seek_frame(), the next call to get_frame_*() will include
273 // the specified sample. after calling stb_vorbis_seek(), the next call to
274 // stb_vorbis_get_samples_* will start with the specified sample. If you
275 // do not need to seek to EXACTLY the target sample when using get_samples_*,
276 // you can also use seek_frame().
277
278 extern void stb_vorbis_seek_start(stb_vorbis *f);
279 // this function is equivalent to stb_vorbis_seek(f,0)
280
281 extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
282 extern float stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
283 // these functions return the total length of the vorbis stream
284
285 extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
286 // decode the next frame and return the number of samples. the number of
287 // channels returned are stored in *channels (which can be NULL--it is always
288 // the same as the number of channels reported by get_info). *output will
289 // contain an array of float* buffers, one per channel. These outputs will
290 // be overwritten on the next call to stb_vorbis_get_frame_*.
291 //
292 // You generally should not intermix calls to stb_vorbis_get_frame_*()
293 // and stb_vorbis_get_samples_*(), since the latter calls the former.
294
295 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
296 extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
297 extern int stb_vorbis_get_frame_short (stb_vorbis *f, int num_c, short **buffer, int num_samples);
298 #endif
299 // decode the next frame and return the number of *samples* per channel.
300 // Note that for interleaved data, you pass in the number of shorts (the
301 // size of your array), but the return value is the number of samples per
302 // channel, not the total number of samples.
303 //
304 // The data is coerced to the number of channels you request according to the
305 // channel coercion rules (see below). You must pass in the size of your
306 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
307 // The maximum buffer size needed can be gotten from get_info(); however,
308 // the Vorbis I specification implies an absolute maximum of 4096 samples
309 // per channel.
310
311 // Channel coercion rules:
312 // Let M be the number of channels requested, and N the number of channels present,
313 // and Cn be the nth channel; let stereo L be the sum of all L and center channels,
314 // and stereo R be the sum of all R and center channels (channel assignment from the
315 // vorbis spec).
316 // M N output
317 // 1 k sum(Ck) for all k
318 // 2 * stereo L, stereo R
319 // k l k > l, the first l channels, then 0s
320 // k l k <= l, the first k channels
321 // Note that this is not _good_ surround etc. mixing at all! It's just so
322 // you get something useful.
323
324 extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
325 extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
326 // gets num_samples samples, not necessarily on a frame boundary--this requires
327 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
328 // Returns the number of samples stored per channel; it may be less than requested
329 // at the end of the file. If there are no more samples in the file, returns 0.
330
331 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
332 extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
333 extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
334 #endif
335 // gets num_samples samples, not necessarily on a frame boundary--this requires
336 // buffering so you have to supply the buffers. Applies the coercion rules above
337 // to produce 'channels' channels. Returns the number of samples stored per channel;
338 // it may be less than requested at the end of the file. If there are no more
339 // samples in the file, returns 0.
340
341 #endif
342
343 //////// ERROR CODES
344
345 enum STBVorbisError
346 {
347 VORBIS__no_error,
348
349 VORBIS_need_more_data=1, // not a real error
350
351 VORBIS_invalid_api_mixing, // can't mix API modes
352 VORBIS_outofmem, // not enough memory
353 VORBIS_feature_not_supported, // uses floor 0
354 VORBIS_too_many_channels, // STB_VORBIS_MAX_CHANNELS is too small
355 VORBIS_file_open_failure, // fopen() failed
356 VORBIS_seek_without_length, // can't seek in unknown-length file
357
358 VORBIS_unexpected_eof=10, // file is truncated?
359 VORBIS_seek_invalid, // seek past EOF
360
361 // decoding errors (corrupt/invalid stream) -- you probably
362 // don't care about the exact details of these
363
364 // vorbis errors:
365 VORBIS_invalid_setup=20,
366 VORBIS_invalid_stream,
367
368 // ogg errors:
369 VORBIS_missing_capture_pattern=30,
370 VORBIS_invalid_stream_structure_version,
371 VORBIS_continued_packet_flag_invalid,
372 VORBIS_incorrect_stream_serial_number,
373 VORBIS_invalid_first_page,
374 VORBIS_bad_packet_type,
375 VORBIS_cant_find_last_page,
376 VORBIS_seek_failed
377 };
378
379
380 #ifdef __cplusplus
381 }
382 #endif
383
384 #endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
385 //
386 // HEADER ENDS HERE
387 //
388 //////////////////////////////////////////////////////////////////////////////
389
390 #ifndef STB_VORBIS_HEADER_ONLY
391
392 // global configuration settings (e.g. set these in the project/makefile),
393 // or just set them in this file at the top (although ideally the first few
394 // should be visible when the header file is compiled too, although it's not
395 // crucial)
396
397 // STB_VORBIS_NO_PUSHDATA_API
398 // does not compile the code for the various stb_vorbis_*_pushdata()
399 // functions
400 // #define STB_VORBIS_NO_PUSHDATA_API
401
402 // STB_VORBIS_NO_PULLDATA_API
403 // does not compile the code for the non-pushdata APIs
404 // #define STB_VORBIS_NO_PULLDATA_API
405
406 // STB_VORBIS_NO_STDIO
407 // does not compile the code for the APIs that use FILE *s internally
408 // or externally (implied by STB_VORBIS_NO_PULLDATA_API)
409 // #define STB_VORBIS_NO_STDIO
410
411 // STB_VORBIS_NO_INTEGER_CONVERSION
412 // does not compile the code for converting audio sample data from
413 // float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
414 // #define STB_VORBIS_NO_INTEGER_CONVERSION
415
416 // STB_VORBIS_NO_FAST_SCALED_FLOAT
417 // does not use a fast float-to-int trick to accelerate float-to-int on
418 // most platforms which requires endianness be defined correctly.
419 //#define STB_VORBIS_NO_FAST_SCALED_FLOAT
420
421
422 // STB_VORBIS_MAX_CHANNELS [number]
423 // globally define this to the maximum number of channels you need.
424 // The spec does not put a restriction on channels except that
425 // the count is stored in a byte, so 255 is the hard limit.
426 // Reducing this saves about 16 bytes per value, so using 16 saves
427 // (255-16)*16 or around 4KB. Plus anything other memory usage
428 // I forgot to account for. Can probably go as low as 8 (7.1 audio),
429 // 6 (5.1 audio), or 2 (stereo only).
430 #ifndef STB_VORBIS_MAX_CHANNELS
431 #define STB_VORBIS_MAX_CHANNELS 16 // enough for anyone?
432 #endif
433
434 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
435 // after a flush_pushdata(), stb_vorbis begins scanning for the
436 // next valid page, without backtracking. when it finds something
437 // that looks like a page, it streams through it and verifies its
438 // CRC32. Should that validation fail, it keeps scanning. But it's
439 // possible that _while_ streaming through to check the CRC32 of
440 // one candidate page, it sees another candidate page. This #define
441 // determines how many "overlapping" candidate pages it can search
442 // at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
443 // garbage pages could be as big as 64KB, but probably average ~16KB.
444 // So don't hose ourselves by scanning an apparent 64KB page and
445 // missing a ton of real ones in the interim; so minimum of 2
446 #ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
447 #define STB_VORBIS_PUSHDATA_CRC_COUNT 4
448 #endif
449
450 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
451 // sets the log size of the huffman-acceleration table. Maximum
452 // supported value is 24. with larger numbers, more decodings are O(1),
453 // but the table size is larger so worse cache missing, so you'll have
454 // to probe (and try multiple ogg vorbis files) to find the sweet spot.
455 #ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
456 #define STB_VORBIS_FAST_HUFFMAN_LENGTH 10
457 #endif
458
459 // STB_VORBIS_FAST_BINARY_LENGTH [number]
460 // sets the log size of the binary-search acceleration table. this
461 // is used in similar fashion to the fast-huffman size to set initial
462 // parameters for the binary search
463
464 // STB_VORBIS_FAST_HUFFMAN_INT
465 // The fast huffman tables are much more efficient if they can be
466 // stored as 16-bit results instead of 32-bit results. This restricts
467 // the codebooks to having only 65535 possible outcomes, though.
468 // (At least, accelerated by the huffman table.)
469 #ifndef STB_VORBIS_FAST_HUFFMAN_INT
470 #define STB_VORBIS_FAST_HUFFMAN_SHORT
471 #endif
472
473 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
474 // If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
475 // back on binary searching for the correct one. This requires storing
476 // extra tables with the huffman codes in sorted order. Defining this
477 // symbol trades off space for speed by forcing a linear search in the
478 // non-fast case, except for "sparse" codebooks.
479 // #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
480
481 // STB_VORBIS_DIVIDES_IN_RESIDUE
482 // stb_vorbis precomputes the result of the scalar residue decoding
483 // that would otherwise require a divide per chunk. you can trade off
484 // space for time by defining this symbol.
485 // #define STB_VORBIS_DIVIDES_IN_RESIDUE
486
487 // STB_VORBIS_DIVIDES_IN_CODEBOOK
488 // vorbis VQ codebooks can be encoded two ways: with every case explicitly
489 // stored, or with all elements being chosen from a small range of values,
490 // and all values possible in all elements. By default, stb_vorbis expands
491 // this latter kind out to look like the former kind for ease of decoding,
492 // because otherwise an integer divide-per-vector-element is required to
493 // unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
494 // trade off storage for speed.
495 //#define STB_VORBIS_DIVIDES_IN_CODEBOOK
496
497 #ifdef STB_VORBIS_CODEBOOK_SHORTS
498 #error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
499 #endif
500
501 // STB_VORBIS_DIVIDE_TABLE
502 // this replaces small integer divides in the floor decode loop with
503 // table lookups. made less than 1% difference, so disabled by default.
504
505 // STB_VORBIS_NO_INLINE_DECODE
506 // disables the inlining of the scalar codebook fast-huffman decode.
507 // might save a little codespace; useful for debugging
508 // #define STB_VORBIS_NO_INLINE_DECODE
509
510 // STB_VORBIS_NO_DEFER_FLOOR
511 // Normally we only decode the floor without synthesizing the actual
512 // full curve. We can instead synthesize the curve immediately. This
513 // requires more memory and is very likely slower, so I don't think
514 // you'd ever want to do it except for debugging.
515 // #define STB_VORBIS_NO_DEFER_FLOOR
516
517
518
519
520 //////////////////////////////////////////////////////////////////////////////
521
522 #ifdef STB_VORBIS_NO_PULLDATA_API
523 #define STB_VORBIS_NO_INTEGER_CONVERSION
524 #define STB_VORBIS_NO_STDIO
525 #endif
526
527 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
528 #define STB_VORBIS_NO_STDIO 1
529 #endif
530
531 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
532 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
533
534 // only need endianness for fast-float-to-int, which we don't
535 // use for pushdata
536
537 #ifndef STB_VORBIS_BIG_ENDIAN
538 #define STB_VORBIS_ENDIAN 0
539 #else
540 #define STB_VORBIS_ENDIAN 1
541 #endif
542
543 #endif
544 #endif
545
546
547 #ifndef STB_VORBIS_NO_STDIO
548 #include <stdio.h>
549 #endif
550
551 #ifndef STB_VORBIS_NO_CRT
552 #include <stdlib.h>
553 #include <string.h>
554 #include <assert.h>
555 #include <math.h>
556
557 // find definition of alloca if it's not in stdlib.h:
558 #ifdef _MSC_VER
559 #include <malloc.h>
560 #endif
561 #if defined(__linux__) || defined(__linux) || defined(__EMSCRIPTEN__)
562 #include <alloca.h>
563 #endif
564 #else // STB_VORBIS_NO_CRT
565 #define NULL 0
566 #define malloc(s) 0
567 #define free(s) ((void) 0)
568 #define realloc(s) 0
569 #endif // STB_VORBIS_NO_CRT
570
571 #include <limits.h>
572
573 #ifdef __MINGW32__
574 // eff you mingw:
575 // "fixed":
576 // http://sourceforge.net/p/mingw-w64/mailman/message/32882927/
577 // "no that broke the build, reverted, who cares about C":
578 // http://sourceforge.net/p/mingw-w64/mailman/message/32890381/
579 #ifdef __forceinline
580 #undef __forceinline
581 #endif
582 #define __forceinline
583 #elif !defined(_MSC_VER)
584 #if __GNUC__
585 #define __forceinline inline
586 #else
587 #define __forceinline
588 #endif
589 #endif
590
591 #if STB_VORBIS_MAX_CHANNELS > 256
592 #error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
593 #endif
594
595 #if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
596 #error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
597 #endif
598
599
600 #if 0
601 #include <crtdbg.h>
602 #define CHECK(f) _CrtIsValidHeapPointer(f->channel_buffers[1])
603 #else
604 #define CHECK(f) ((void) 0)
605 #endif
606
607 #define MAX_BLOCKSIZE_LOG 13 // from specification
608 #define MAX_BLOCKSIZE (1 << MAX_BLOCKSIZE_LOG)
609
610
611 typedef unsigned char uint8;
612 typedef signed char int8;
613 typedef unsigned short uint16;
614 typedef signed short int16;
615 typedef unsigned int uint32;
616 typedef signed int int32;
617
618 #ifndef TRUE
619 #define TRUE 1
620 #define FALSE 0
621 #endif
622
623 typedef float codetype;
624
625 // @NOTE
626 //
627 // Some arrays below are tagged "//varies", which means it's actually
628 // a variable-sized piece of data, but rather than malloc I assume it's
629 // small enough it's better to just allocate it all together with the
630 // main thing
631 //
632 // Most of the variables are specified with the smallest size I could pack
633 // them into. It might give better performance to make them all full-sized
634 // integers. It should be safe to freely rearrange the structures or change
635 // the sizes larger--nothing relies on silently truncating etc., nor the
636 // order of variables.
637
638 #define FAST_HUFFMAN_TABLE_SIZE (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
639 #define FAST_HUFFMAN_TABLE_MASK (FAST_HUFFMAN_TABLE_SIZE - 1)
640
641 typedef struct
642 {
643 int dimensions, entries;
644 uint8 *codeword_lengths;
645 float minimum_value;
646 float delta_value;
647 uint8 value_bits;
648 uint8 lookup_type;
649 uint8 sequence_p;
650 uint8 sparse;
651 uint32 lookup_values;
652 codetype *multiplicands;
653 uint32 *codewords;
654 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
655 int16 fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
656 #else
657 int32 fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
658 #endif
659 uint32 *sorted_codewords;
660 int *sorted_values;
661 int sorted_entries;
662 } Codebook;
663
664 typedef struct
665 {
666 uint8 order;
667 uint16 rate;
668 uint16 bark_map_size;
669 uint8 amplitude_bits;
670 uint8 amplitude_offset;
671 uint8 number_of_books;
672 uint8 book_list[16]; // varies
673 } Floor0;
674
675 typedef struct
676 {
677 uint8 partitions;
678 uint8 partition_class_list[32]; // varies
679 uint8 class_dimensions[16]; // varies
680 uint8 class_subclasses[16]; // varies
681 uint8 class_masterbooks[16]; // varies
682 int16 subclass_books[16][8]; // varies
683 uint16 Xlist[31*8+2]; // varies
684 uint8 sorted_order[31*8+2];
685 uint8 neighbors[31*8+2][2];
686 uint8 floor1_multiplier;
687 uint8 rangebits;
688 int values;
689 } Floor1;
690
691 typedef union
692 {
693 Floor0 floor0;
694 Floor1 floor1;
695 } Floor;
696
697 typedef struct
698 {
699 uint32 begin, end;
700 uint32 part_size;
701 uint8 classifications;
702 uint8 classbook;
703 uint8 **classdata;
704 int16 (*residue_books)[8];
705 } Residue;
706
707 typedef struct
708 {
709 uint8 magnitude;
710 uint8 angle;
711 uint8 mux;
712 } MappingChannel;
713
714 typedef struct
715 {
716 uint16 coupling_steps;
717 MappingChannel *chan;
718 uint8 submaps;
719 uint8 submap_floor[15]; // varies
720 uint8 submap_residue[15]; // varies
721 } Mapping;
722
723 typedef struct
724 {
725 uint8 blockflag;
726 uint8 mapping;
727 uint16 windowtype;
728 uint16 transformtype;
729 } Mode;
730
731 typedef struct
732 {
733 uint32 goal_crc; // expected crc if match
734 int bytes_left; // bytes left in packet
735 uint32 crc_so_far; // running crc
736 int bytes_done; // bytes processed in _current_ chunk
737 uint32 sample_loc; // granule pos encoded in page
738 } CRCscan;
739
740 typedef struct
741 {
742 uint32 page_start, page_end;
743 uint32 last_decoded_sample;
744 } ProbedPage;
745
746 struct stb_vorbis
747 {
748 // user-accessible info
749 unsigned int sample_rate;
750 int channels;
751
752 unsigned int setup_memory_required;
753 unsigned int temp_memory_required;
754 unsigned int setup_temp_memory_required;
755
756 // input config
757 #ifndef STB_VORBIS_NO_STDIO
758 FILE *f;
759 uint32 f_start;
760 int close_on_free;
761 #endif
762
763 uint8 *stream;
764 uint8 *stream_start;
765 uint8 *stream_end;
766
767 uint32 stream_len;
768
769 uint8 push_mode;
770
771 uint32 first_audio_page_offset;
772
773 ProbedPage p_first, p_last;
774
775 // memory management
776 stb_vorbis_alloc alloc;
777 int setup_offset;
778 int temp_offset;
779
780 // run-time results
781 int eof;
782 enum STBVorbisError error;
783
784 // user-useful data
785
786 // header info
787 int blocksize[2];
788 int blocksize_0, blocksize_1;
789 int codebook_count;
790 Codebook *codebooks;
791 int floor_count;
792 uint16 floor_types[64]; // varies
793 Floor *floor_config;
794 int residue_count;
795 uint16 residue_types[64]; // varies
796 Residue *residue_config;
797 int mapping_count;
798 Mapping *mapping;
799 int mode_count;
800 Mode mode_config[64]; // varies
801
802 uint32 total_samples;
803
804 // decode buffer
805 float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
806 float *outputs [STB_VORBIS_MAX_CHANNELS];
807
808 float *previous_window[STB_VORBIS_MAX_CHANNELS];
809 int previous_length;
810
811 #ifndef STB_VORBIS_NO_DEFER_FLOOR
812 int16 *finalY[STB_VORBIS_MAX_CHANNELS];
813 #else
814 float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
815 #endif
816
817 uint32 current_loc; // sample location of next frame to decode
818 int current_loc_valid;
819
820 // per-blocksize precomputed data
821
822 // twiddle factors
823 float *A[2],*B[2],*C[2];
824 float *window[2];
825 uint16 *bit_reverse[2];
826
827 // current page/packet/segment streaming info
828 uint32 serial; // stream serial number for verification
829 int last_page;
830 int segment_count;
831 uint8 segments[255];
832 uint8 page_flag;
833 uint8 bytes_in_seg;
834 uint8 first_decode;
835 int next_seg;
836 int last_seg; // flag that we're on the last segment
837 int last_seg_which; // what was the segment number of the last seg?
838 uint32 acc;
839 int valid_bits;
840 int packet_bytes;
841 int end_seg_with_known_loc;
842 uint32 known_loc_for_packet;
843 int discard_samples_deferred;
844 uint32 samples_output;
845
846 // push mode scanning
847 int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
848 #ifndef STB_VORBIS_NO_PUSHDATA_API
849 CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
850 #endif
851
852 // sample-access
853 int channel_buffer_start;
854 int channel_buffer_end;
855 };
856
857 #if defined(STB_VORBIS_NO_PUSHDATA_API)
858 #define IS_PUSH_MODE(f) FALSE
859 #elif defined(STB_VORBIS_NO_PULLDATA_API)
860 #define IS_PUSH_MODE(f) TRUE
861 #else
862 #define IS_PUSH_MODE(f) ((f)->push_mode)
863 #endif
864
865 typedef struct stb_vorbis vorb;
866
867 static int error(vorb *f, enum STBVorbisError e)
868 {
869 f->error = e;
870 if (!f->eof && e != VORBIS_need_more_data) {
871 f->error=e; // breakpoint for debugging
872 }
873 return 0;
874 }
875
876
877 // these functions are used for allocating temporary memory
878 // while decoding. if you can afford the stack space, use
879 // alloca(); otherwise, provide a temp buffer and it will
880 // allocate out of those.
881
882 #define array_size_required(count,size) (count*(sizeof(void *)+(size)))
883
884 #define temp_alloc(f,size) (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
885 #ifdef dealloca
886 #define temp_free(f,p) (f->alloc.alloc_buffer ? 0 : dealloca(size))
887 #else
888 #define temp_free(f,p) 0
889 #endif
890 #define temp_alloc_save(f) ((f)->temp_offset)
891 #define temp_alloc_restore(f,p) ((f)->temp_offset = (p))
892
893 #define temp_block_array(f,count,size) make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
894
895 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
896 static void *make_block_array(void *mem, int count, int size)
897 {
898 int i;
899 void ** p = (void **) mem;
900 char *q = (char *) (p + count);
901 for (i=0; i < count; ++i) {
902 p[i] = q;
903 q += size;
904 }
905 return p;
906 }
907
908 static void *setup_malloc(vorb *f, int sz)
909 {
910 sz = (sz+3) & ~3;
911 f->setup_memory_required += sz;
912 if (f->alloc.alloc_buffer) {
913 void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
914 if (f->setup_offset + sz > f->temp_offset) return NULL;
915 f->setup_offset += sz;
916 return p;
917 }
918 return sz ? malloc(sz) : NULL;
919 }
920
921 static void setup_free(vorb *f, void *p)
922 {
923 if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
924 free(p);
925 }
926
927 static void *setup_temp_malloc(vorb *f, int sz)
928 {
929 sz = (sz+3) & ~3;
930 if (f->alloc.alloc_buffer) {
931 if (f->temp_offset - sz < f->setup_offset) return NULL;
932 f->temp_offset -= sz;
933 return (char *) f->alloc.alloc_buffer + f->temp_offset;
934 }
935 return malloc(sz);
936 }
937
938 static void setup_temp_free(vorb *f, void *p, int sz)
939 {
940 if (f->alloc.alloc_buffer) {
941 f->temp_offset += (sz+3)&~3;
942 return;
943 }
944 free(p);
945 }
946
947 #define CRC32_POLY 0x04c11db7 // from spec
948
949 static uint32 crc_table[256];
950 static void crc32_init(void)
951 {
952 int i,j;
953 uint32 s;
954 for(i=0; i < 256; i++) {
955 for (s=(uint32) i << 24, j=0; j < 8; ++j)
956 s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
957 crc_table[i] = s;
958 }
959 }
960
961 static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
962 {
963 return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
964 }
965
966
967 // used in setup, and for huffman that doesn't go fast path
968 static unsigned int bit_reverse(unsigned int n)
969 {
970 n = ((n & 0xAAAAAAAA) >> 1) | ((n & 0x55555555) << 1);
971 n = ((n & 0xCCCCCCCC) >> 2) | ((n & 0x33333333) << 2);
972 n = ((n & 0xF0F0F0F0) >> 4) | ((n & 0x0F0F0F0F) << 4);
973 n = ((n & 0xFF00FF00) >> 8) | ((n & 0x00FF00FF) << 8);
974 return (n >> 16) | (n << 16);
975 }
976
977 static float square(float x)
978 {
979 return x*x;
980 }
981
982 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
983 // as required by the specification. fast(?) implementation from stb.h
984 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
985 static int ilog(int32 n)
986 {
987 static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
988
989 // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
990 if (n < (1 << 14))
991 if (n < (1 << 4)) return 0 + log2_4[n ];
992 else if (n < (1 << 9)) return 5 + log2_4[n >> 5];
993 else return 10 + log2_4[n >> 10];
994 else if (n < (1 << 24))
995 if (n < (1 << 19)) return 15 + log2_4[n >> 15];
996 else return 20 + log2_4[n >> 20];
997 else if (n < (1 << 29)) return 25 + log2_4[n >> 25];
998 else if (n < (1 << 31)) return 30 + log2_4[n >> 30];
999 else return 0; // signed n returns 0
1000 }
1001
1002 #ifndef M_PI
1003 #define M_PI 3.14159265358979323846264f // from CRC
1004 #endif
1005
1006 // code length assigned to a value with no huffman encoding
1007 #define NO_CODE 255
1008
1009 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
1010 //
1011 // these functions are only called at setup, and only a few times
1012 // per file
1013
1014 static float float32_unpack(uint32 x)
1015 {
1016 // from the specification
1017 uint32 mantissa = x & 0x1fffff;
1018 uint32 sign = x & 0x80000000;
1019 uint32 exp = (x & 0x7fe00000) >> 21;
1020 double res = sign ? -(double)mantissa : (double)mantissa;
1021 return (float) ldexp((float)res, exp-788);
1022 }
1023
1024
1025 // zlib & jpeg huffman tables assume that the output symbols
1026 // can either be arbitrarily arranged, or have monotonically
1027 // increasing frequencies--they rely on the lengths being sorted;
1028 // this makes for a very simple generation algorithm.
1029 // vorbis allows a huffman table with non-sorted lengths. This
1030 // requires a more sophisticated construction, since symbols in
1031 // order do not map to huffman codes "in order".
1032 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
1033 {
1034 if (!c->sparse) {
1035 c->codewords [symbol] = huff_code;
1036 } else {
1037 c->codewords [count] = huff_code;
1038 c->codeword_lengths[count] = len;
1039 values [count] = symbol;
1040 }
1041 }
1042
1043 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
1044 {
1045 int i,k,m=0;
1046 uint32 available[32];
1047
1048 memset(available, 0, sizeof(available));
1049 // find the first entry
1050 for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
1051 if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
1052 // add to the list
1053 add_entry(c, 0, k, m++, len[k], values);
1054 // add all available leaves
1055 for (i=1; i <= len[k]; ++i)
1056 available[i] = 1U << (32-i);
1057 // note that the above code treats the first case specially,
1058 // but it's really the same as the following code, so they
1059 // could probably be combined (except the initial code is 0,
1060 // and I use 0 in available[] to mean 'empty')
1061 for (i=k+1; i < n; ++i) {
1062 uint32 res;
1063 int z = len[i], y;
1064 if (z == NO_CODE) continue;
1065 // find lowest available leaf (should always be earliest,
1066 // which is what the specification calls for)
1067 // note that this property, and the fact we can never have
1068 // more than one free leaf at a given level, isn't totally
1069 // trivial to prove, but it seems true and the assert never
1070 // fires, so!
1071 while (z > 0 && !available[z]) --z;
1072 if (z == 0) { return FALSE; }
1073 res = available[z];
1074 assert(z >= 0 && z < 32);
1075 available[z] = 0;
1076 add_entry(c, bit_reverse(res), i, m++, len[i], values);
1077 // propogate availability up the tree
1078 if (z != len[i]) {
1079 assert(len[i] >= 0 && len[i] < 32);
1080 for (y=len[i]; y > z; --y) {
1081 assert(available[y] == 0);
1082 available[y] = res + (1 << (32-y));
1083 }
1084 }
1085 }
1086 return TRUE;
1087 }
1088
1089 // accelerated huffman table allows fast O(1) match of all symbols
1090 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
1091 static void compute_accelerated_huffman(Codebook *c)
1092 {
1093 int i, len;
1094 for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
1095 c->fast_huffman[i] = -1;
1096
1097 len = c->sparse ? c->sorted_entries : c->entries;
1098 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
1099 if (len > 32767) len = 32767; // largest possible value we can encode!
1100 #endif
1101 for (i=0; i < len; ++i) {
1102 if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
1103 uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
1104 // set table entries for all bit combinations in the higher bits
1105 while (z < FAST_HUFFMAN_TABLE_SIZE) {
1106 c->fast_huffman[z] = i;
1107 z += 1 << c->codeword_lengths[i];
1108 }
1109 }
1110 }
1111 }
1112
1113 #ifdef _MSC_VER
1114 #define STBV_CDECL __cdecl
1115 #else
1116 #define STBV_CDECL
1117 #endif
1118
1119 static int STBV_CDECL uint32_compare(const void *p, const void *q)
1120 {
1121 uint32 x = * (uint32 *) p;
1122 uint32 y = * (uint32 *) q;
1123 return x < y ? -1 : x > y;
1124 }
1125
1126 static int include_in_sort(Codebook *c, uint8 len)
1127 {
1128 if (c->sparse) { assert(len != NO_CODE); return TRUE; }
1129 if (len == NO_CODE) return FALSE;
1130 if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
1131 return FALSE;
1132 }
1133
1134 // if the fast table above doesn't work, we want to binary
1135 // search them... need to reverse the bits
1136 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
1137 {
1138 int i, len;
1139 // build a list of all the entries
1140 // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
1141 // this is kind of a frivolous optimization--I don't see any performance improvement,
1142 // but it's like 4 extra lines of code, so.
1143 if (!c->sparse) {
1144 int k = 0;
1145 for (i=0; i < c->entries; ++i)
1146 if (include_in_sort(c, lengths[i]))
1147 c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
1148 assert(k == c->sorted_entries);
1149 } else {
1150 for (i=0; i < c->sorted_entries; ++i)
1151 c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
1152 }
1153
1154 qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
1155 c->sorted_codewords[c->sorted_entries] = 0xffffffff;
1156
1157 len = c->sparse ? c->sorted_entries : c->entries;
1158 // now we need to indicate how they correspond; we could either
1159 // #1: sort a different data structure that says who they correspond to
1160 // #2: for each sorted entry, search the original list to find who corresponds
1161 // #3: for each original entry, find the sorted entry
1162 // #1 requires extra storage, #2 is slow, #3 can use binary search!
1163 for (i=0; i < len; ++i) {
1164 int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
1165 if (include_in_sort(c,huff_len)) {
1166 uint32 code = bit_reverse(c->codewords[i]);
1167 int x=0, n=c->sorted_entries;
1168 while (n > 1) {
1169 // invariant: sc[x] <= code < sc[x+n]
1170 int m = x + (n >> 1);
1171 if (c->sorted_codewords[m] <= code) {
1172 x = m;
1173 n -= (n>>1);
1174 } else {
1175 n >>= 1;
1176 }
1177 }
1178 assert(c->sorted_codewords[x] == code);
1179 if (c->sparse) {
1180 c->sorted_values[x] = values[i];
1181 c->codeword_lengths[x] = huff_len;
1182 } else {
1183 c->sorted_values[x] = i;
1184 }
1185 }
1186 }
1187 }
1188
1189 // only run while parsing the header (3 times)
1190 static int vorbis_validate(uint8 *data)
1191 {
1192 static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
1193 return memcmp(data, vorbis, 6) == 0;
1194 }
1195
1196 // called from setup only, once per code book
1197 // (formula implied by specification)
1198 static int lookup1_values(int entries, int dim)
1199 {
1200 int r = (int) floor(exp((float) log((float) entries) / dim));
1201 if ((int) floor(pow((float) r+1, dim)) <= entries) // (int) cast for MinGW warning;
1202 ++r; // floor() to avoid _ftol() when non-CRT
1203 assert(pow((float) r+1, dim) > entries);
1204 assert((int) floor(pow((float) r, dim)) <= entries); // (int),floor() as above
1205 return r;
1206 }
1207
1208 // called twice per file
1209 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
1210 {
1211 int n4 = n >> 2, n8 = n >> 3;
1212 int k,k2;
1213
1214 for (k=k2=0; k < n4; ++k,k2+=2) {
1215 A[k2 ] = (float) cos(4*k*M_PI/n);
1216 A[k2+1] = (float) -sin(4*k*M_PI/n);
1217 B[k2 ] = (float) cos((k2+1)*M_PI/n/2) * 0.5f;
1218 B[k2+1] = (float) sin((k2+1)*M_PI/n/2) * 0.5f;
1219 }
1220 for (k=k2=0; k < n8; ++k,k2+=2) {
1221 C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
1222 C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
1223 }
1224 }
1225
1226 static void compute_window(int n, float *window)
1227 {
1228 int n2 = n >> 1, i;
1229 for (i=0; i < n2; ++i)
1230 window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
1231 }
1232
1233 static void compute_bitreverse(int n, uint16 *rev)
1234 {
1235 int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
1236 int i, n8 = n >> 3;
1237 for (i=0; i < n8; ++i)
1238 rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
1239 }
1240
1241 static int init_blocksize(vorb *f, int b, int n)
1242 {
1243 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
1244 f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1245 f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1246 f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
1247 if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
1248 compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
1249 f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1250 if (!f->window[b]) return error(f, VORBIS_outofmem);
1251 compute_window(n, f->window[b]);
1252 f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
1253 if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
1254 compute_bitreverse(n, f->bit_reverse[b]);
1255 return TRUE;
1256 }
1257
1258 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
1259 {
1260 int low = -1;
1261 int high = 65536;
1262 int i;
1263 for (i=0; i < n; ++i) {
1264 if (x[i] > low && x[i] < x[n]) { *plow = i; low = x[i]; }
1265 if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
1266 }
1267 }
1268
1269 // this has been repurposed so y is now the original index instead of y
1270 typedef struct
1271 {
1272 uint16 x,y;
1273 } Point;
1274
1275 static int STBV_CDECL point_compare(const void *p, const void *q)
1276 {
1277 Point *a = (Point *) p;
1278 Point *b = (Point *) q;
1279 return a->x < b->x ? -1 : a->x > b->x;
1280 }
1281
1282 //
1283 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
1284
1285
1286 #if defined(STB_VORBIS_NO_STDIO)
1287 #define USE_MEMORY(z) TRUE
1288 #else
1289 #define USE_MEMORY(z) ((z)->stream)
1290 #endif
1291
1292 static uint8 get8(vorb *z)
1293 {
1294 if (USE_MEMORY(z)) {
1295 if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
1296 return *z->stream++;
1297 }
1298
1299 #ifndef STB_VORBIS_NO_STDIO
1300 {
1301 int c = fgetc(z->f);
1302 if (c == EOF) { z->eof = TRUE; return 0; }
1303 return c;
1304 }
1305 #endif
1306 }
1307
1308 static uint32 get32(vorb *f)
1309 {
1310 uint32 x;
1311 x = get8(f);
1312 x += get8(f) << 8;
1313 x += get8(f) << 16;
1314 x += (uint32) get8(f) << 24;
1315 return x;
1316 }
1317
1318 static int getn(vorb *z, uint8 *data, int n)
1319 {
1320 if (USE_MEMORY(z)) {
1321 if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
1322 memcpy(data, z->stream, n);
1323 z->stream += n;
1324 return 1;
1325 }
1326
1327 #ifndef STB_VORBIS_NO_STDIO
1328 if (fread(data, n, 1, z->f) == 1)
1329 return 1;
1330 else {
1331 z->eof = 1;
1332 return 0;
1333 }
1334 #endif
1335 }
1336
1337 static void skip(vorb *z, int n)
1338 {
1339 if (USE_MEMORY(z)) {
1340 z->stream += n;
1341 if (z->stream >= z->stream_end) z->eof = 1;
1342 return;
1343 }
1344 #ifndef STB_VORBIS_NO_STDIO
1345 {
1346 long x = ftell(z->f);
1347 fseek(z->f, x+n, SEEK_SET);
1348 }
1349 #endif
1350 }
1351
1352 static int set_file_offset(stb_vorbis *f, unsigned int loc)
1353 {
1354 #ifndef STB_VORBIS_NO_PUSHDATA_API
1355 if (f->push_mode) return 0;
1356 #endif
1357 f->eof = 0;
1358 if (USE_MEMORY(f)) {
1359 if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
1360 f->stream = f->stream_end;
1361 f->eof = 1;
1362 return 0;
1363 } else {
1364 f->stream = f->stream_start + loc;
1365 return 1;
1366 }
1367 }
1368 #ifndef STB_VORBIS_NO_STDIO
1369 if (loc + f->f_start < loc || loc >= 0x80000000) {
1370 loc = 0x7fffffff;
1371 f->eof = 1;
1372 } else {
1373 loc += f->f_start;
1374 }
1375 if (!fseek(f->f, loc, SEEK_SET))
1376 return 1;
1377 f->eof = 1;
1378 fseek(f->f, f->f_start, SEEK_END);
1379 return 0;
1380 #endif
1381 }
1382
1383
1384 static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
1385
1386 static int capture_pattern(vorb *f)
1387 {
1388 if (0x4f != get8(f)) return FALSE;
1389 if (0x67 != get8(f)) return FALSE;
1390 if (0x67 != get8(f)) return FALSE;
1391 if (0x53 != get8(f)) return FALSE;
1392 return TRUE;
1393 }
1394
1395 #define PAGEFLAG_continued_packet 1
1396 #define PAGEFLAG_first_page 2
1397 #define PAGEFLAG_last_page 4
1398
1399 static int start_page_no_capturepattern(vorb *f)
1400 {
1401 uint32 loc0,loc1,n;
1402 // stream structure version
1403 if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
1404 // header flag
1405 f->page_flag = get8(f);
1406 // absolute granule position
1407 loc0 = get32(f);
1408 loc1 = get32(f);
1409 // @TODO: validate loc0,loc1 as valid positions?
1410 // stream serial number -- vorbis doesn't interleave, so discard
1411 get32(f);
1412 //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
1413 // page sequence number
1414 n = get32(f);
1415 f->last_page = n;
1416 // CRC32
1417 get32(f);
1418 // page_segments
1419 f->segment_count = get8(f);
1420 if (!getn(f, f->segments, f->segment_count))
1421 return error(f, VORBIS_unexpected_eof);
1422 // assume we _don't_ know any the sample position of any segments
1423 f->end_seg_with_known_loc = -2;
1424 if (loc0 != ~0U || loc1 != ~0U) {
1425 int i;
1426 // determine which packet is the last one that will complete
1427 for (i=f->segment_count-1; i >= 0; --i)
1428 if (f->segments[i] < 255)
1429 break;
1430 // 'i' is now the index of the _last_ segment of a packet that ends
1431 if (i >= 0) {
1432 f->end_seg_with_known_loc = i;
1433 f->known_loc_for_packet = loc0;
1434 }
1435 }
1436 if (f->first_decode) {
1437 int i,len;
1438 ProbedPage p;
1439 len = 0;
1440 for (i=0; i < f->segment_count; ++i)
1441 len += f->segments[i];
1442 len += 27 + f->segment_count;
1443 p.page_start = f->first_audio_page_offset;
1444 p.page_end = p.page_start + len;
1445 p.last_decoded_sample = loc0;
1446 f->p_first = p;
1447 }
1448 f->next_seg = 0;
1449 return TRUE;
1450 }
1451
1452 static int start_page(vorb *f)
1453 {
1454 if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
1455 return start_page_no_capturepattern(f);
1456 }
1457
1458 static int start_packet(vorb *f)
1459 {
1460 while (f->next_seg == -1) {
1461 if (!start_page(f)) return FALSE;
1462 if (f->page_flag & PAGEFLAG_continued_packet)
1463 return error(f, VORBIS_continued_packet_flag_invalid);
1464 }
1465 f->last_seg = FALSE;
1466 f->valid_bits = 0;
1467 f->packet_bytes = 0;
1468 f->bytes_in_seg = 0;
1469 // f->next_seg is now valid
1470 return TRUE;
1471 }
1472
1473 static int maybe_start_packet(vorb *f)
1474 {
1475 if (f->next_seg == -1) {
1476 int x = get8(f);
1477 if (f->eof) return FALSE; // EOF at page boundary is not an error!
1478 if (0x4f != x ) return error(f, VORBIS_missing_capture_pattern);
1479 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1480 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1481 if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1482 if (!start_page_no_capturepattern(f)) return FALSE;
1483 if (f->page_flag & PAGEFLAG_continued_packet) {
1484 // set up enough state that we can read this packet if we want,
1485 // e.g. during recovery
1486 f->last_seg = FALSE;
1487 f->bytes_in_seg = 0;
1488 return error(f, VORBIS_continued_packet_flag_invalid);
1489 }
1490 }
1491 return start_packet(f);
1492 }
1493
1494 static int next_segment(vorb *f)
1495 {
1496 int len;
1497 if (f->last_seg) return 0;
1498 if (f->next_seg == -1) {
1499 f->last_seg_which = f->segment_count-1; // in case start_page fails
1500 if (!start_page(f)) { f->last_seg = 1; return 0; }
1501 if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1502 }
1503 len = f->segments[f->next_seg++];
1504 if (len < 255) {
1505 f->last_seg = TRUE;
1506 f->last_seg_which = f->next_seg-1;
1507 }
1508 if (f->next_seg >= f->segment_count)
1509 f->next_seg = -1;
1510 assert(f->bytes_in_seg == 0);
1511 f->bytes_in_seg = len;
1512 return len;
1513 }
1514
1515 #define EOP (-1)
1516 #define INVALID_BITS (-1)
1517
1518 static int get8_packet_raw(vorb *f)
1519 {
1520 if (!f->bytes_in_seg) { // CLANG!
1521 if (f->last_seg) return EOP;
1522 else if (!next_segment(f)) return EOP;
1523 }
1524 assert(f->bytes_in_seg > 0);
1525 --f->bytes_in_seg;
1526 ++f->packet_bytes;
1527 return get8(f);
1528 }
1529
1530 static int get8_packet(vorb *f)
1531 {
1532 int x = get8_packet_raw(f);
1533 f->valid_bits = 0;
1534 return x;
1535 }
1536
1537 static void flush_packet(vorb *f)
1538 {
1539 while (get8_packet_raw(f) != EOP);
1540 }
1541
1542 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1543 // as the huffman decoder?
1544 static uint32 get_bits(vorb *f, int n)
1545 {
1546 uint32 z;
1547
1548 if (f->valid_bits < 0) return 0;
1549 if (f->valid_bits < n) {
1550 if (n > 24) {
1551 // the accumulator technique below would not work correctly in this case
1552 z = get_bits(f, 24);
1553 z += get_bits(f, n-24) << 24;
1554 return z;
1555 }
1556 if (f->valid_bits == 0) f->acc = 0;
1557 while (f->valid_bits < n) {
1558 int z = get8_packet_raw(f);
1559 if (z == EOP) {
1560 f->valid_bits = INVALID_BITS;
1561 return 0;
1562 }
1563 f->acc += z << f->valid_bits;
1564 f->valid_bits += 8;
1565 }
1566 }
1567 if (f->valid_bits < 0) return 0;
1568 z = f->acc & ((1 << n)-1);
1569 f->acc >>= n;
1570 f->valid_bits -= n;
1571 return z;
1572 }
1573
1574 // @OPTIMIZE: primary accumulator for huffman
1575 // expand the buffer to as many bits as possible without reading off end of packet
1576 // it might be nice to allow f->valid_bits and f->acc to be stored in registers,
1577 // e.g. cache them locally and decode locally
1578 static __forceinline void prep_huffman(vorb *f)
1579 {
1580 if (f->valid_bits <= 24) {
1581 if (f->valid_bits == 0) f->acc = 0;
1582 do {
1583 int z;
1584 if (f->last_seg && !f->bytes_in_seg) return;
1585 z = get8_packet_raw(f);
1586 if (z == EOP) return;
1587 f->acc += (unsigned) z << f->valid_bits;
1588 f->valid_bits += 8;
1589 } while (f->valid_bits <= 24);
1590 }
1591 }
1592
1593 enum
1594 {
1595 VORBIS_packet_id = 1,
1596 VORBIS_packet_comment = 3,
1597 VORBIS_packet_setup = 5
1598 };
1599
1600 static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1601 {
1602 int i;
1603 prep_huffman(f);
1604
1605 if (c->codewords == NULL && c->sorted_codewords == NULL)
1606 return -1;
1607
1608 // cases to use binary search: sorted_codewords && !c->codewords
1609 // sorted_codewords && c->entries > 8
1610 if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
1611 // binary search
1612 uint32 code = bit_reverse(f->acc);
1613 int x=0, n=c->sorted_entries, len;
1614
1615 while (n > 1) {
1616 // invariant: sc[x] <= code < sc[x+n]
1617 int m = x + (n >> 1);
1618 if (c->sorted_codewords[m] <= code) {
1619 x = m;
1620 n -= (n>>1);
1621 } else {
1622 n >>= 1;
1623 }
1624 }
1625 // x is now the sorted index
1626 if (!c->sparse) x = c->sorted_values[x];
1627 // x is now sorted index if sparse, or symbol otherwise
1628 len = c->codeword_lengths[x];
1629 if (f->valid_bits >= len) {
1630 f->acc >>= len;
1631 f->valid_bits -= len;
1632 return x;
1633 }
1634
1635 f->valid_bits = 0;
1636 return -1;
1637 }
1638
1639 // if small, linear search
1640 assert(!c->sparse);
1641 for (i=0; i < c->entries; ++i) {
1642 if (c->codeword_lengths[i] == NO_CODE) continue;
1643 if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
1644 if (f->valid_bits >= c->codeword_lengths[i]) {
1645 f->acc >>= c->codeword_lengths[i];
1646 f->valid_bits -= c->codeword_lengths[i];
1647 return i;
1648 }
1649 f->valid_bits = 0;
1650 return -1;
1651 }
1652 }
1653
1654 error(f, VORBIS_invalid_stream);
1655 f->valid_bits = 0;
1656 return -1;
1657 }
1658
1659 #ifndef STB_VORBIS_NO_INLINE_DECODE
1660
1661 #define DECODE_RAW(var, f,c) \
1662 if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) \
1663 prep_huffman(f); \
1664 var = f->acc & FAST_HUFFMAN_TABLE_MASK; \
1665 var = c->fast_huffman[var]; \
1666 if (var >= 0) { \
1667 int n = c->codeword_lengths[var]; \
1668 f->acc >>= n; \
1669 f->valid_bits -= n; \
1670 if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
1671 } else { \
1672 var = codebook_decode_scalar_raw(f,c); \
1673 }
1674
1675 #else
1676
1677 static int codebook_decode_scalar(vorb *f, Codebook *c)
1678 {
1679 int i;
1680 if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1681 prep_huffman(f);
1682 // fast huffman table lookup
1683 i = f->acc & FAST_HUFFMAN_TABLE_MASK;
1684 i = c->fast_huffman[i];
1685 if (i >= 0) {
1686 f->acc >>= c->codeword_lengths[i];
1687 f->valid_bits -= c->codeword_lengths[i];
1688 if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
1689 return i;
1690 }
1691 return codebook_decode_scalar_raw(f,c);
1692 }
1693
1694 #define DECODE_RAW(var,f,c) var = codebook_decode_scalar(f,c);
1695
1696 #endif
1697
1698 #define DECODE(var,f,c) \
1699 DECODE_RAW(var,f,c) \
1700 if (c->sparse) var = c->sorted_values[var];
1701
1702 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1703 #define DECODE_VQ(var,f,c) DECODE_RAW(var,f,c)
1704 #else
1705 #define DECODE_VQ(var,f,c) DECODE(var,f,c)
1706 #endif
1707
1708
1709
1710
1711
1712
1713 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1714 // where we avoid one addition
1715 #define CODEBOOK_ELEMENT(c,off) (c->multiplicands[off])
1716 #define CODEBOOK_ELEMENT_FAST(c,off) (c->multiplicands[off])
1717 #define CODEBOOK_ELEMENT_BASE(c) (0)
1718
1719 static int codebook_decode_start(vorb *f, Codebook *c)
1720 {
1721 int z = -1;
1722
1723 // type 0 is only legal in a scalar context
1724 if (c->lookup_type == 0)
1725 error(f, VORBIS_invalid_stream);
1726 else {
1727 DECODE_VQ(z,f,c);
1728 if (c->sparse) assert(z < c->sorted_entries);
1729 if (z < 0) { // check for EOP
1730 if (!f->bytes_in_seg)
1731 if (f->last_seg)
1732 return z;
1733 error(f, VORBIS_invalid_stream);
1734 }
1735 }
1736 return z;
1737 }
1738
1739 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1740 {
1741 int i,z = codebook_decode_start(f,c);
1742 if (z < 0) return FALSE;
1743 if (len > c->dimensions) len = c->dimensions;
1744
1745 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1746 if (c->lookup_type == 1) {
1747 float last = CODEBOOK_ELEMENT_BASE(c);
1748 int div = 1;
1749 for (i=0; i < len; ++i) {
1750 int off = (z / div) % c->lookup_values;
1751 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1752 output[i] += val;
1753 if (c->sequence_p) last = val + c->minimum_value;
1754 div *= c->lookup_values;
1755 }
1756 return TRUE;
1757 }
1758 #endif
1759
1760 z *= c->dimensions;
1761 if (c->sequence_p) {
1762 float last = CODEBOOK_ELEMENT_BASE(c);
1763 for (i=0; i < len; ++i) {
1764 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1765 output[i] += val;
1766 last = val + c->minimum_value;
1767 }
1768 } else {
1769 float last = CODEBOOK_ELEMENT_BASE(c);
1770 for (i=0; i < len; ++i) {
1771 output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1772 }
1773 }
1774
1775 return TRUE;
1776 }
1777
1778 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1779 {
1780 int i,z = codebook_decode_start(f,c);
1781 float last = CODEBOOK_ELEMENT_BASE(c);
1782 if (z < 0) return FALSE;
1783 if (len > c->dimensions) len = c->dimensions;
1784
1785 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1786 if (c->lookup_type == 1) {
1787 int div = 1;
1788 for (i=0; i < len; ++i) {
1789 int off = (z / div) % c->lookup_values;
1790 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1791 output[i*step] += val;
1792 if (c->sequence_p) last = val;
1793 div *= c->lookup_values;
1794 }
1795 return TRUE;
1796 }
1797 #endif
1798
1799 z *= c->dimensions;
1800 for (i=0; i < len; ++i) {
1801 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1802 output[i*step] += val;
1803 if (c->sequence_p) last = val;
1804 }
1805
1806 return TRUE;
1807 }
1808
1809 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1810 {
1811 int c_inter = *c_inter_p;
1812 int p_inter = *p_inter_p;
1813 int i,z, effective = c->dimensions;
1814
1815 // type 0 is only legal in a scalar context
1816 if (c->lookup_type == 0) return error(f, VORBIS_invalid_stream);
1817
1818 while (total_decode > 0) {
1819 float last = CODEBOOK_ELEMENT_BASE(c);
1820 DECODE_VQ(z,f,c);
1821 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1822 assert(!c->sparse || z < c->sorted_entries);
1823 #endif
1824 if (z < 0) {
1825 if (!f->bytes_in_seg)
1826 if (f->last_seg) return FALSE;
1827 return error(f, VORBIS_invalid_stream);
1828 }
1829
1830 // if this will take us off the end of the buffers, stop short!
1831 // we check by computing the length of the virtual interleaved
1832 // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1833 // and the length we'll be using (effective)
1834 if (c_inter + p_inter*ch + effective > len * ch) {
1835 effective = len*ch - (p_inter*ch - c_inter);
1836 }
1837
1838 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1839 if (c->lookup_type == 1) {
1840 int div = 1;
1841 for (i=0; i < effective; ++i) {
1842 int off = (z / div) % c->lookup_values;
1843 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1844 if (outputs[c_inter])
1845 outputs[c_inter][p_inter] += val;
1846 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1847 if (c->sequence_p) last = val;
1848 div *= c->lookup_values;
1849 }
1850 } else
1851 #endif
1852 {
1853 z *= c->dimensions;
1854 if (c->sequence_p) {
1855 for (i=0; i < effective; ++i) {
1856 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1857 if (outputs[c_inter])
1858 outputs[c_inter][p_inter] += val;
1859 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1860 last = val;
1861 }
1862 } else {
1863 for (i=0; i < effective; ++i) {
1864 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1865 if (outputs[c_inter])
1866 outputs[c_inter][p_inter] += val;
1867 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1868 }
1869 }
1870 }
1871
1872 total_decode -= effective;
1873 }
1874 *c_inter_p = c_inter;
1875 *p_inter_p = p_inter;
1876 return TRUE;
1877 }
1878
1879 static int predict_point(int x, int x0, int x1, int y0, int y1)
1880 {
1881 int dy = y1 - y0;
1882 int adx = x1 - x0;
1883 // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1884 int err = abs(dy) * (x - x0);
1885 int off = err / adx;
1886 return dy < 0 ? y0 - off : y0 + off;
1887 }
1888
1889 // the following table is block-copied from the specification
1890 static float inverse_db_table[256] =
1891 {
1892 1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1893 1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1894 1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1895 2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1896 2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1897 3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1898 4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1899 6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1900 7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1901 1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1902 1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1903 1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1904 2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1905 2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1906 3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1907 4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1908 5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1909 7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1910 9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1911 1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1912 1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1913 2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1914 2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1915 3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1916 4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1917 5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1918 7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1919 9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1920 0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1921 0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1922 0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1923 0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1924 0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1925 0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1926 0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1927 0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1928 0.00092223983f, 0.00098217216f, 0.0010459992f, 0.0011139742f,
1929 0.0011863665f, 0.0012634633f, 0.0013455702f, 0.0014330129f,
1930 0.0015261382f, 0.0016253153f, 0.0017309374f, 0.0018434235f,
1931 0.0019632195f, 0.0020908006f, 0.0022266726f, 0.0023713743f,
1932 0.0025254795f, 0.0026895994f, 0.0028643847f, 0.0030505286f,
1933 0.0032487691f, 0.0034598925f, 0.0036847358f, 0.0039241906f,
1934 0.0041792066f, 0.0044507950f, 0.0047400328f, 0.0050480668f,
1935 0.0053761186f, 0.0057254891f, 0.0060975636f, 0.0064938176f,
1936 0.0069158225f, 0.0073652516f, 0.0078438871f, 0.0083536271f,
1937 0.0088964928f, 0.009474637f, 0.010090352f, 0.010746080f,
1938 0.011444421f, 0.012188144f, 0.012980198f, 0.013823725f,
1939 0.014722068f, 0.015678791f, 0.016697687f, 0.017782797f,
1940 0.018938423f, 0.020169149f, 0.021479854f, 0.022875735f,
1941 0.024362330f, 0.025945531f, 0.027631618f, 0.029427276f,
1942 0.031339626f, 0.033376252f, 0.035545228f, 0.037855157f,
1943 0.040315199f, 0.042935108f, 0.045725273f, 0.048696758f,
1944 0.051861348f, 0.055231591f, 0.058820850f, 0.062643361f,
1945 0.066714279f, 0.071049749f, 0.075666962f, 0.080584227f,
1946 0.085821044f, 0.091398179f, 0.097337747f, 0.10366330f,
1947 0.11039993f, 0.11757434f, 0.12521498f, 0.13335215f,
1948 0.14201813f, 0.15124727f, 0.16107617f, 0.17154380f,
1949 0.18269168f, 0.19456402f, 0.20720788f, 0.22067342f,
1950 0.23501402f, 0.25028656f, 0.26655159f, 0.28387361f,
1951 0.30232132f, 0.32196786f, 0.34289114f, 0.36517414f,
1952 0.38890521f, 0.41417847f, 0.44109412f, 0.46975890f,
1953 0.50028648f, 0.53279791f, 0.56742212f, 0.60429640f,
1954 0.64356699f, 0.68538959f, 0.72993007f, 0.77736504f,
1955 0.82788260f, 0.88168307f, 0.9389798f, 1.0f
1956 };
1957
1958
1959 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1960 // note that you must produce bit-identical output to decode correctly;
1961 // this specific sequence of operations is specified in the spec (it's
1962 // drawing integer-quantized frequency-space lines that the encoder
1963 // expects to be exactly the same)
1964 // ... also, isn't the whole point of Bresenham's algorithm to NOT
1965 // have to divide in the setup? sigh.
1966 #ifndef STB_VORBIS_NO_DEFER_FLOOR
1967 #define LINE_OP(a,b) a *= b
1968 #else
1969 #define LINE_OP(a,b) a = b
1970 #endif
1971
1972 #ifdef STB_VORBIS_DIVIDE_TABLE
1973 #define DIVTAB_NUMER 32
1974 #define DIVTAB_DENOM 64
1975 int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
1976 #endif
1977
1978 static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
1979 {
1980 int dy = y1 - y0;
1981 int adx = x1 - x0;
1982 int ady = abs(dy);
1983 int base;
1984 int x=x0,y=y0;
1985 int err = 0;
1986 int sy;
1987
1988 #ifdef STB_VORBIS_DIVIDE_TABLE
1989 if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
1990 if (dy < 0) {
1991 base = -integer_divide_table[ady][adx];
1992 sy = base-1;
1993 } else {
1994 base = integer_divide_table[ady][adx];
1995 sy = base+1;
1996 }
1997 } else {
1998 base = dy / adx;
1999 if (dy < 0)
2000 sy = base - 1;
2001 else
2002 sy = base+1;
2003 }
2004 #else
2005 base = dy / adx;
2006 if (dy < 0)
2007 sy = base - 1;
2008 else
2009 sy = base+1;
2010 #endif
2011 ady -= abs(base) * adx;
2012 if (x1 > n) x1 = n;
2013 if (x < x1) {
2014 LINE_OP(output[x], inverse_db_table[y]);
2015 for (++x; x < x1; ++x) {
2016 err += ady;
2017 if (err >= adx) {
2018 err -= adx;
2019 y += sy;
2020 } else
2021 y += base;
2022 LINE_OP(output[x], inverse_db_table[y]);
2023 }
2024 }
2025 }
2026
2027 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
2028 {
2029 int k;
2030 if (rtype == 0) {
2031 int step = n / book->dimensions;
2032 for (k=0; k < step; ++k)
2033 if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
2034 return FALSE;
2035 } else {
2036 for (k=0; k < n; ) {
2037 if (!codebook_decode(f, book, target+offset, n-k))
2038 return FALSE;
2039 k += book->dimensions;
2040 offset += book->dimensions;
2041 }
2042 }
2043 return TRUE;
2044 }
2045
2046 static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
2047 {
2048 int i,j,pass;
2049 Residue *r = f->residue_config + rn;
2050 int rtype = f->residue_types[rn];
2051 int c = r->classbook;
2052 int classwords = f->codebooks[c].dimensions;
2053 int n_read = r->end - r->begin;
2054 int part_read = n_read / r->part_size;
2055 int temp_alloc_point = temp_alloc_save(f);
2056 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2057 uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
2058 #else
2059 int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
2060 #endif
2061
2062 CHECK(f);
2063
2064 for (i=0; i < ch; ++i)
2065 if (!do_not_decode[i])
2066 memset(residue_buffers[i], 0, sizeof(float) * n);
2067
2068 if (rtype == 2 && ch != 1) {
2069 for (j=0; j < ch; ++j)
2070 if (!do_not_decode[j])
2071 break;
2072 if (j == ch)
2073 goto done;
2074
2075 for (pass=0; pass < 8; ++pass) {
2076 int pcount = 0, class_set = 0;
2077 if (ch == 2) {
2078 while (pcount < part_read) {
2079 int z = r->begin + pcount*r->part_size;
2080 int c_inter = (z & 1), p_inter = z>>1;
2081 if (pass == 0) {
2082 Codebook *c = f->codebooks+r->classbook;
2083 int q;
2084 DECODE(q,f,c);
2085 if (q == EOP) goto done;
2086 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2087 part_classdata[0][class_set] = r->classdata[q];
2088 #else
2089 for (i=classwords-1; i >= 0; --i) {
2090 classifications[0][i+pcount] = q % r->classifications;
2091 q /= r->classifications;
2092 }
2093 #endif
2094 }
2095 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2096 int z = r->begin + pcount*r->part_size;
2097 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2098 int c = part_classdata[0][class_set][i];
2099 #else
2100 int c = classifications[0][pcount];
2101 #endif
2102 int b = r->residue_books[c][pass];
2103 if (b >= 0) {
2104 Codebook *book = f->codebooks + b;
2105 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
2106 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2107 goto done;
2108 #else
2109 // saves 1%
2110 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2111 goto done;
2112 #endif
2113 } else {
2114 z += r->part_size;
2115 c_inter = z & 1;
2116 p_inter = z >> 1;
2117 }
2118 }
2119 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2120 ++class_set;
2121 #endif
2122 }
2123 } else if (ch == 1) {
2124 while (pcount < part_read) {
2125 int z = r->begin + pcount*r->part_size;
2126 int c_inter = 0, p_inter = z;
2127 if (pass == 0) {
2128 Codebook *c = f->codebooks+r->classbook;
2129 int q;
2130 DECODE(q,f,c);
2131 if (q == EOP) goto done;
2132 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2133 part_classdata[0][class_set] = r->classdata[q];
2134 #else
2135 for (i=classwords-1; i >= 0; --i) {
2136 classifications[0][i+pcount] = q % r->classifications;
2137 q /= r->classifications;
2138 }
2139 #endif
2140 }
2141 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2142 int z = r->begin + pcount*r->part_size;
2143 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2144 int c = part_classdata[0][class_set][i];
2145 #else
2146 int c = classifications[0][pcount];
2147 #endif
2148 int b = r->residue_books[c][pass];
2149 if (b >= 0) {
2150 Codebook *book = f->codebooks + b;
2151 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2152 goto done;
2153 } else {
2154 z += r->part_size;
2155 c_inter = 0;
2156 p_inter = z;
2157 }
2158 }
2159 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2160 ++class_set;
2161 #endif
2162 }
2163 } else {
2164 while (pcount < part_read) {
2165 int z = r->begin + pcount*r->part_size;
2166 int c_inter = z % ch, p_inter = z/ch;
2167 if (pass == 0) {
2168 Codebook *c = f->codebooks+r->classbook;
2169 int q;
2170 DECODE(q,f,c);
2171 if (q == EOP) goto done;
2172 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2173 part_classdata[0][class_set] = r->classdata[q];
2174 #else
2175 for (i=classwords-1; i >= 0; --i) {
2176 classifications[0][i+pcount] = q % r->classifications;
2177 q /= r->classifications;
2178 }
2179 #endif
2180 }
2181 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2182 int z = r->begin + pcount*r->part_size;
2183 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2184 int c = part_classdata[0][class_set][i];
2185 #else
2186 int c = classifications[0][pcount];
2187 #endif
2188 int b = r->residue_books[c][pass];
2189 if (b >= 0) {
2190 Codebook *book = f->codebooks + b;
2191 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2192 goto done;
2193 } else {
2194 z += r->part_size;
2195 c_inter = z % ch;
2196 p_inter = z / ch;
2197 }
2198 }
2199 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2200 ++class_set;
2201 #endif
2202 }
2203 }
2204 }
2205 goto done;
2206 }
2207 CHECK(f);
2208
2209 for (pass=0; pass < 8; ++pass) {
2210 int pcount = 0, class_set=0;
2211 while (pcount < part_read) {
2212 if (pass == 0) {
2213 for (j=0; j < ch; ++j) {
2214 if (!do_not_decode[j]) {
2215 Codebook *c = f->codebooks+r->classbook;
2216 int temp;
2217 DECODE(temp,f,c);
2218 if (temp == EOP) goto done;
2219 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2220 part_classdata[j][class_set] = r->classdata[temp];
2221 #else
2222 for (i=classwords-1; i >= 0; --i) {
2223 classifications[j][i+pcount] = temp % r->classifications;
2224 temp /= r->classifications;
2225 }
2226 #endif
2227 }
2228 }
2229 }
2230 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2231 for (j=0; j < ch; ++j) {
2232 if (!do_not_decode[j]) {
2233 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2234 int c = part_classdata[j][class_set][i];
2235 #else
2236 int c = classifications[j][pcount];
2237 #endif
2238 int b = r->residue_books[c][pass];
2239 if (b >= 0) {
2240 float *target = residue_buffers[j];
2241 int offset = r->begin + pcount * r->part_size;
2242 int n = r->part_size;
2243 Codebook *book = f->codebooks + b;
2244 if (!residue_decode(f, book, target, offset, n, rtype))
2245 goto done;
2246 }
2247 }
2248 }
2249 }
2250 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2251 ++class_set;
2252 #endif
2253 }
2254 }
2255 done:
2256 CHECK(f);
2257 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2258 temp_free(f,part_classdata);
2259 #else
2260 temp_free(f,classifications);
2261 #endif
2262 temp_alloc_restore(f,temp_alloc_point);
2263 }
2264
2265
2266 #if 0
2267 // slow way for debugging
2268 void inverse_mdct_slow(float *buffer, int n)
2269 {
2270 int i,j;
2271 int n2 = n >> 1;
2272 float *x = (float *) malloc(sizeof(*x) * n2);
2273 memcpy(x, buffer, sizeof(*x) * n2);
2274 for (i=0; i < n; ++i) {
2275 float acc = 0;
2276 for (j=0; j < n2; ++j)
2277 // formula from paper:
2278 //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2279 // formula from wikipedia
2280 //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2281 // these are equivalent, except the formula from the paper inverts the multiplier!
2282 // however, what actually works is NO MULTIPLIER!?!
2283 //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2284 acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2285 buffer[i] = acc;
2286 }
2287 free(x);
2288 }
2289 #elif 0
2290 // same as above, but just barely able to run in real time on modern machines
2291 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2292 {
2293 float mcos[16384];
2294 int i,j;
2295 int n2 = n >> 1, nmask = (n << 2) -1;
2296 float *x = (float *) malloc(sizeof(*x) * n2);
2297 memcpy(x, buffer, sizeof(*x) * n2);
2298 for (i=0; i < 4*n; ++i)
2299 mcos[i] = (float) cos(M_PI / 2 * i / n);
2300
2301 for (i=0; i < n; ++i) {
2302 float acc = 0;
2303 for (j=0; j < n2; ++j)
2304 acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
2305 buffer[i] = acc;
2306 }
2307 free(x);
2308 }
2309 #elif 0
2310 // transform to use a slow dct-iv; this is STILL basically trivial,
2311 // but only requires half as many ops
2312 void dct_iv_slow(float *buffer, int n)
2313 {
2314 float mcos[16384];
2315 float x[2048];
2316 int i,j;
2317 int n2 = n >> 1, nmask = (n << 3) - 1;
2318 memcpy(x, buffer, sizeof(*x) * n);
2319 for (i=0; i < 8*n; ++i)
2320 mcos[i] = (float) cos(M_PI / 4 * i / n);
2321 for (i=0; i < n; ++i) {
2322 float acc = 0;
2323 for (j=0; j < n; ++j)
2324 acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
2325 buffer[i] = acc;
2326 }
2327 }
2328
2329 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2330 {
2331 int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
2332 float temp[4096];
2333
2334 memcpy(temp, buffer, n2 * sizeof(float));
2335 dct_iv_slow(temp, n2); // returns -c'-d, a-b'
2336
2337 for (i=0; i < n4 ; ++i) buffer[i] = temp[i+n4]; // a-b'
2338 for ( ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1]; // b-a', c+d'
2339 for ( ; i < n ; ++i) buffer[i] = -temp[i - n3_4]; // c'+d
2340 }
2341 #endif
2342
2343 #ifndef LIBVORBIS_MDCT
2344 #define LIBVORBIS_MDCT 0
2345 #endif
2346
2347 #if LIBVORBIS_MDCT
2348 // directly call the vorbis MDCT using an interface documented
2349 // by Jeff Roberts... useful for performance comparison
2350 typedef struct
2351 {
2352 int n;
2353 int log2n;
2354
2355 float *trig;
2356 int *bitrev;
2357
2358 float scale;
2359 } mdct_lookup;
2360
2361 extern void mdct_init(mdct_lookup *lookup, int n);
2362 extern void mdct_clear(mdct_lookup *l);
2363 extern void mdct_backward(mdct_lookup *init, float *in, float *out);
2364
2365 mdct_lookup M1,M2;
2366
2367 void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2368 {
2369 mdct_lookup *M;
2370 if (M1.n == n) M = &M1;
2371 else if (M2.n == n) M = &M2;
2372 else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
2373 else {
2374 if (M2.n) __asm int 3;
2375 mdct_init(&M2, n);
2376 M = &M2;
2377 }
2378
2379 mdct_backward(M, buffer, buffer);
2380 }
2381 #endif
2382
2383
2384 // the following were split out into separate functions while optimizing;
2385 // they could be pushed back up but eh. __forceinline showed no change;
2386 // they're probably already being inlined.
2387 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
2388 {
2389 float *ee0 = e + i_off;
2390 float *ee2 = ee0 + k_off;
2391 int i;
2392
2393 assert((n & 3) == 0);
2394 for (i=(n>>2); i > 0; --i) {
2395 float k00_20, k01_21;
2396 k00_20 = ee0[ 0] - ee2[ 0];
2397 k01_21 = ee0[-1] - ee2[-1];
2398 ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
2399 ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
2400 ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
2401 ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
2402 A += 8;
2403
2404 k00_20 = ee0[-2] - ee2[-2];
2405 k01_21 = ee0[-3] - ee2[-3];
2406 ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
2407 ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
2408 ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
2409 ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
2410 A += 8;
2411
2412 k00_20 = ee0[-4] - ee2[-4];
2413 k01_21 = ee0[-5] - ee2[-5];
2414 ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
2415 ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
2416 ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
2417 ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
2418 A += 8;
2419
2420 k00_20 = ee0[-6] - ee2[-6];
2421 k01_21 = ee0[-7] - ee2[-7];
2422 ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
2423 ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
2424 ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
2425 ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
2426 A += 8;
2427 ee0 -= 8;
2428 ee2 -= 8;
2429 }
2430 }
2431
2432 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
2433 {
2434 int i;
2435 float k00_20, k01_21;
2436
2437 float *e0 = e + d0;
2438 float *e2 = e0 + k_off;
2439
2440 for (i=lim >> 2; i > 0; --i) {
2441 k00_20 = e0[-0] - e2[-0];
2442 k01_21 = e0[-1] - e2[-1];
2443 e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
2444 e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
2445 e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
2446 e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
2447
2448 A += k1;
2449
2450 k00_20 = e0[-2] - e2[-2];
2451 k01_21 = e0[-3] - e2[-3];
2452 e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
2453 e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
2454 e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
2455 e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
2456
2457 A += k1;
2458
2459 k00_20 = e0[-4] - e2[-4];
2460 k01_21 = e0[-5] - e2[-5];
2461 e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
2462 e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
2463 e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
2464 e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
2465
2466 A += k1;
2467
2468 k00_20 = e0[-6] - e2[-6];
2469 k01_21 = e0[-7] - e2[-7];
2470 e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
2471 e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
2472 e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
2473 e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
2474
2475 e0 -= 8;
2476 e2 -= 8;
2477
2478 A += k1;
2479 }
2480 }
2481
2482 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
2483 {
2484 int i;
2485 float A0 = A[0];
2486 float A1 = A[0+1];
2487 float A2 = A[0+a_off];
2488 float A3 = A[0+a_off+1];
2489 float A4 = A[0+a_off*2+0];
2490 float A5 = A[0+a_off*2+1];
2491 float A6 = A[0+a_off*3+0];
2492 float A7 = A[0+a_off*3+1];
2493
2494 float k00,k11;
2495
2496 float *ee0 = e +i_off;
2497 float *ee2 = ee0+k_off;
2498
2499 for (i=n; i > 0; --i) {
2500 k00 = ee0[ 0] - ee2[ 0];
2501 k11 = ee0[-1] - ee2[-1];
2502 ee0[ 0] = ee0[ 0] + ee2[ 0];
2503 ee0[-1] = ee0[-1] + ee2[-1];
2504 ee2[ 0] = (k00) * A0 - (k11) * A1;
2505 ee2[-1] = (k11) * A0 + (k00) * A1;
2506
2507 k00 = ee0[-2] - ee2[-2];
2508 k11 = ee0[-3] - ee2[-3];
2509 ee0[-2] = ee0[-2] + ee2[-2];
2510 ee0[-3] = ee0[-3] + ee2[-3];
2511 ee2[-2] = (k00) * A2 - (k11) * A3;
2512 ee2[-3] = (k11) * A2 + (k00) * A3;
2513
2514 k00 = ee0[-4] - ee2[-4];
2515 k11 = ee0[-5] - ee2[-5];
2516 ee0[-4] = ee0[-4] + ee2[-4];
2517 ee0[-5] = ee0[-5] + ee2[-5];
2518 ee2[-4] = (k00) * A4 - (k11) * A5;
2519 ee2[-5] = (k11) * A4 + (k00) * A5;
2520
2521 k00 = ee0[-6] - ee2[-6];
2522 k11 = ee0[-7] - ee2[-7];
2523 ee0[-6] = ee0[-6] + ee2[-6];
2524 ee0[-7] = ee0[-7] + ee2[-7];
2525 ee2[-6] = (k00) * A6 - (k11) * A7;
2526 ee2[-7] = (k11) * A6 + (k00) * A7;
2527
2528 ee0 -= k0;
2529 ee2 -= k0;
2530 }
2531 }
2532
2533 static __forceinline void iter_54(float *z)
2534 {
2535 float k00,k11,k22,k33;
2536 float y0,y1,y2,y3;
2537
2538 k00 = z[ 0] - z[-4];
2539 y0 = z[ 0] + z[-4];
2540 y2 = z[-2] + z[-6];
2541 k22 = z[-2] - z[-6];
2542
2543 z[-0] = y0 + y2; // z0 + z4 + z2 + z6
2544 z[-2] = y0 - y2; // z0 + z4 - z2 - z6
2545
2546 // done with y0,y2
2547
2548 k33 = z[-3] - z[-7];
2549
2550 z[-4] = k00 + k33; // z0 - z4 + z3 - z7
2551 z[-6] = k00 - k33; // z0 - z4 - z3 + z7
2552
2553 // done with k33
2554
2555 k11 = z[-1] - z[-5];
2556 y1 = z[-1] + z[-5];
2557 y3 = z[-3] + z[-7];
2558
2559 z[-1] = y1 + y3; // z1 + z5 + z3 + z7
2560 z[-3] = y1 - y3; // z1 + z5 - z3 - z7
2561 z[-5] = k11 - k22; // z1 - z5 + z2 - z6
2562 z[-7] = k11 + k22; // z1 - z5 - z2 + z6
2563 }
2564
2565 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
2566 {
2567 int a_off = base_n >> 3;
2568 float A2 = A[0+a_off];
2569 float *z = e + i_off;
2570 float *base = z - 16 * n;
2571
2572 while (z > base) {
2573 float k00,k11;
2574
2575 k00 = z[-0] - z[-8];
2576 k11 = z[-1] - z[-9];
2577 z[-0] = z[-0] + z[-8];
2578 z[-1] = z[-1] + z[-9];
2579 z[-8] = k00;
2580 z[-9] = k11 ;
2581
2582 k00 = z[ -2] - z[-10];
2583 k11 = z[ -3] - z[-11];
2584 z[ -2] = z[ -2] + z[-10];
2585 z[ -3] = z[ -3] + z[-11];
2586 z[-10] = (k00+k11) * A2;
2587 z[-11] = (k11-k00) * A2;
2588
2589 k00 = z[-12] - z[ -4]; // reverse to avoid a unary negation
2590 k11 = z[ -5] - z[-13];
2591 z[ -4] = z[ -4] + z[-12];
2592 z[ -5] = z[ -5] + z[-13];
2593 z[-12] = k11;
2594 z[-13] = k00;
2595
2596 k00 = z[-14] - z[ -6]; // reverse to avoid a unary negation
2597 k11 = z[ -7] - z[-15];
2598 z[ -6] = z[ -6] + z[-14];
2599 z[ -7] = z[ -7] + z[-15];
2600 z[-14] = (k00+k11) * A2;
2601 z[-15] = (k00-k11) * A2;
2602
2603 iter_54(z);
2604 iter_54(z-8);
2605 z -= 16;
2606 }
2607 }
2608
2609 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2610 {
2611 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2612 int ld;
2613 // @OPTIMIZE: reduce register pressure by using fewer variables?
2614 int save_point = temp_alloc_save(f);
2615 float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
2616 float *u=NULL,*v=NULL;
2617 // twiddle factors
2618 float *A = f->A[blocktype];
2619
2620 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2621 // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2622
2623 // kernel from paper
2624
2625
2626 // merged:
2627 // copy and reflect spectral data
2628 // step 0
2629
2630 // note that it turns out that the items added together during
2631 // this step are, in fact, being added to themselves (as reflected
2632 // by step 0). inexplicable inefficiency! this became obvious
2633 // once I combined the passes.
2634
2635 // so there's a missing 'times 2' here (for adding X to itself).
2636 // this propogates through linearly to the end, where the numbers
2637 // are 1/2 too small, and need to be compensated for.
2638
2639 {
2640 float *d,*e, *AA, *e_stop;
2641 d = &buf2[n2-2];
2642 AA = A;
2643 e = &buffer[0];
2644 e_stop = &buffer[n2];
2645 while (e != e_stop) {
2646 d[1] = (e[0] * AA[0] - e[2]*AA[1]);
2647 d[0] = (e[0] * AA[1] + e[2]*AA[0]);
2648 d -= 2;
2649 AA += 2;
2650 e += 4;
2651 }
2652
2653 e = &buffer[n2-3];
2654 while (d >= buf2) {
2655 d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
2656 d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
2657 d -= 2;
2658 AA += 2;
2659 e -= 4;
2660 }
2661 }
2662
2663 // now we use symbolic names for these, so that we can
2664 // possibly swap their meaning as we change which operations
2665 // are in place
2666
2667 u = buffer;
2668 v = buf2;
2669
2670 // step 2 (paper output is w, now u)
2671 // this could be in place, but the data ends up in the wrong
2672 // place... _somebody_'s got to swap it, so this is nominated
2673 {
2674 float *AA = &A[n2-8];
2675 float *d0,*d1, *e0, *e1;
2676
2677 e0 = &v[n4];
2678 e1 = &v[0];
2679
2680 d0 = &u[n4];
2681 d1 = &u[0];
2682
2683 while (AA >= A) {
2684 float v40_20, v41_21;
2685
2686 v41_21 = e0[1] - e1[1];
2687 v40_20 = e0[0] - e1[0];
2688 d0[1] = e0[1] + e1[1];
2689 d0[0] = e0[0] + e1[0];
2690 d1[1] = v41_21*AA[4] - v40_20*AA[5];
2691 d1[0] = v40_20*AA[4] + v41_21*AA[5];
2692
2693 v41_21 = e0[3] - e1[3];
2694 v40_20 = e0[2] - e1[2];
2695 d0[3] = e0[3] + e1[3];
2696 d0[2] = e0[2] + e1[2];
2697 d1[3] = v41_21*AA[0] - v40_20*AA[1];
2698 d1[2] = v40_20*AA[0] + v41_21*AA[1];
2699
2700 AA -= 8;
2701
2702 d0 += 4;
2703 d1 += 4;
2704 e0 += 4;
2705 e1 += 4;
2706 }
2707 }
2708
2709 // step 3
2710 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2711
2712 // optimized step 3:
2713
2714 // the original step3 loop can be nested r inside s or s inside r;
2715 // it's written originally as s inside r, but this is dumb when r
2716 // iterates many times, and s few. So I have two copies of it and
2717 // switch between them halfway.
2718
2719 // this is iteration 0 of step 3
2720 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2721 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2722
2723 // this is iteration 1 of step 3
2724 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2725 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2726 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2727 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2728
2729 l=2;
2730 for (; l < (ld-3)>>1; ++l) {
2731 int k0 = n >> (l+2), k0_2 = k0>>1;
2732 int lim = 1 << (l+1);
2733 int i;
2734 for (i=0; i < lim; ++i)
2735 imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2736 }
2737
2738 for (; l < ld-6; ++l) {
2739 int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2740 int rlim = n >> (l+6), r;
2741 int lim = 1 << (l+1);
2742 int i_off;
2743 float *A0 = A;
2744 i_off = n2-1;
2745 for (r=rlim; r > 0; --r) {
2746 imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2747 A0 += k1*4;
2748 i_off -= 8;
2749 }
2750 }
2751
2752 // iterations with count:
2753 // ld-6,-5,-4 all interleaved together
2754 // the big win comes from getting rid of needless flops
2755 // due to the constants on pass 5 & 4 being all 1 and 0;
2756 // combining them to be simultaneous to improve cache made little difference
2757 imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2758
2759 // output is u
2760
2761 // step 4, 5, and 6
2762 // cannot be in-place because of step 5
2763 {
2764 uint16 *bitrev = f->bit_reverse[blocktype];
2765 // weirdly, I'd have thought reading sequentially and writing
2766 // erratically would have been better than vice-versa, but in
2767 // fact that's not what my testing showed. (That is, with
2768 // j = bitreverse(i), do you read i and write j, or read j and write i.)
2769
2770 float *d0 = &v[n4-4];
2771 float *d1 = &v[n2-4];
2772 while (d0 >= v) {
2773 int k4;
2774
2775 k4 = bitrev[0];
2776 d1[3] = u[k4+0];
2777 d1[2] = u[k4+1];
2778 d0[3] = u[k4+2];
2779 d0[2] = u[k4+3];
2780
2781 k4 = bitrev[1];
2782 d1[1] = u[k4+0];
2783 d1[0] = u[k4+1];
2784 d0[1] = u[k4+2];
2785 d0[0] = u[k4+3];
2786
2787 d0 -= 4;
2788 d1 -= 4;
2789 bitrev += 2;
2790 }
2791 }
2792 // (paper output is u, now v)
2793
2794
2795 // data must be in buf2
2796 assert(v == buf2);
2797
2798 // step 7 (paper output is v, now v)
2799 // this is now in place
2800 {
2801 float *C = f->C[blocktype];
2802 float *d, *e;
2803
2804 d = v;
2805 e = v + n2 - 4;
2806
2807 while (d < e) {
2808 float a02,a11,b0,b1,b2,b3;
2809
2810 a02 = d[0] - e[2];
2811 a11 = d[1] + e[3];
2812
2813 b0 = C[1]*a02 + C[0]*a11;
2814 b1 = C[1]*a11 - C[0]*a02;
2815
2816 b2 = d[0] + e[ 2];
2817 b3 = d[1] - e[ 3];
2818
2819 d[0] = b2 + b0;
2820 d[1] = b3 + b1;
2821 e[2] = b2 - b0;
2822 e[3] = b1 - b3;
2823
2824 a02 = d[2] - e[0];
2825 a11 = d[3] + e[1];
2826
2827 b0 = C[3]*a02 + C[2]*a11;
2828 b1 = C[3]*a11 - C[2]*a02;
2829
2830 b2 = d[2] + e[ 0];
2831 b3 = d[3] - e[ 1];
2832
2833 d[2] = b2 + b0;
2834 d[3] = b3 + b1;
2835 e[0] = b2 - b0;
2836 e[1] = b1 - b3;
2837
2838 C += 4;
2839 d += 4;
2840 e -= 4;
2841 }
2842 }
2843
2844 // data must be in buf2
2845
2846
2847 // step 8+decode (paper output is X, now buffer)
2848 // this generates pairs of data a la 8 and pushes them directly through
2849 // the decode kernel (pushing rather than pulling) to avoid having
2850 // to make another pass later
2851
2852 // this cannot POSSIBLY be in place, so we refer to the buffers directly
2853
2854 {
2855 float *d0,*d1,*d2,*d3;
2856
2857 float *B = f->B[blocktype] + n2 - 8;
2858 float *e = buf2 + n2 - 8;
2859 d0 = &buffer[0];
2860 d1 = &buffer[n2-4];
2861 d2 = &buffer[n2];
2862 d3 = &buffer[n-4];
2863 while (e >= v) {
2864 float p0,p1,p2,p3;
2865
2866 p3 = e[6]*B[7] - e[7]*B[6];
2867 p2 = -e[6]*B[6] - e[7]*B[7];
2868
2869 d0[0] = p3;
2870 d1[3] = - p3;
2871 d2[0] = p2;
2872 d3[3] = p2;
2873
2874 p1 = e[4]*B[5] - e[5]*B[4];
2875 p0 = -e[4]*B[4] - e[5]*B[5];
2876
2877 d0[1] = p1;
2878 d1[2] = - p1;
2879 d2[1] = p0;
2880 d3[2] = p0;
2881
2882 p3 = e[2]*B[3] - e[3]*B[2];
2883 p2 = -e[2]*B[2] - e[3]*B[3];
2884
2885 d0[2] = p3;
2886 d1[1] = - p3;
2887 d2[2] = p2;
2888 d3[1] = p2;
2889
2890 p1 = e[0]*B[1] - e[1]*B[0];
2891 p0 = -e[0]*B[0] - e[1]*B[1];
2892
2893 d0[3] = p1;
2894 d1[0] = - p1;
2895 d2[3] = p0;
2896 d3[0] = p0;
2897
2898 B -= 8;
2899 e -= 8;
2900 d0 += 4;
2901 d2 += 4;
2902 d1 -= 4;
2903 d3 -= 4;
2904 }
2905 }
2906
2907 temp_free(f,buf2);
2908 temp_alloc_restore(f,save_point);
2909 }
2910
2911 #if 0
2912 // this is the original version of the above code, if you want to optimize it from scratch
2913 void inverse_mdct_naive(float *buffer, int n)
2914 {
2915 float s;
2916 float A[1 << 12], B[1 << 12], C[1 << 11];
2917 int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2918 int n3_4 = n - n4, ld;
2919 // how can they claim this only uses N words?!
2920 // oh, because they're only used sparsely, whoops
2921 float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
2922 // set up twiddle factors
2923
2924 for (k=k2=0; k < n4; ++k,k2+=2) {
2925 A[k2 ] = (float) cos(4*k*M_PI/n);
2926 A[k2+1] = (float) -sin(4*k*M_PI/n);
2927 B[k2 ] = (float) cos((k2+1)*M_PI/n/2);
2928 B[k2+1] = (float) sin((k2+1)*M_PI/n/2);
2929 }
2930 for (k=k2=0; k < n8; ++k,k2+=2) {
2931 C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
2932 C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
2933 }
2934
2935 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2936 // Note there are bugs in that pseudocode, presumably due to them attempting
2937 // to rename the arrays nicely rather than representing the way their actual
2938 // implementation bounces buffers back and forth. As a result, even in the
2939 // "some formulars corrected" version, a direct implementation fails. These
2940 // are noted below as "paper bug".
2941
2942 // copy and reflect spectral data
2943 for (k=0; k < n2; ++k) u[k] = buffer[k];
2944 for ( ; k < n ; ++k) u[k] = -buffer[n - k - 1];
2945 // kernel from paper
2946 // step 1
2947 for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
2948 v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2] - (u[k4+2] - u[n-k4-3])*A[k2+1];
2949 v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
2950 }
2951 // step 2
2952 for (k=k4=0; k < n8; k+=1, k4+=4) {
2953 w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
2954 w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
2955 w[k4+3] = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
2956 w[k4+1] = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
2957 }
2958 // step 3
2959 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2960 for (l=0; l < ld-3; ++l) {
2961 int k0 = n >> (l+2), k1 = 1 << (l+3);
2962 int rlim = n >> (l+4), r4, r;
2963 int s2lim = 1 << (l+2), s2;
2964 for (r=r4=0; r < rlim; r4+=4,++r) {
2965 for (s2=0; s2 < s2lim; s2+=2) {
2966 u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
2967 u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
2968 u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
2969 - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
2970 u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
2971 + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
2972 }
2973 }
2974 if (l+1 < ld-3) {
2975 // paper bug: ping-ponging of u&w here is omitted
2976 memcpy(w, u, sizeof(u));
2977 }
2978 }
2979
2980 // step 4
2981 for (i=0; i < n8; ++i) {
2982 int j = bit_reverse(i) >> (32-ld+3);
2983 assert(j < n8);
2984 if (i == j) {
2985 // paper bug: original code probably swapped in place; if copying,
2986 // need to directly copy in this case
2987 int i8 = i << 3;
2988 v[i8+1] = u[i8+1];
2989 v[i8+3] = u[i8+3];
2990 v[i8+5] = u[i8+5];
2991 v[i8+7] = u[i8+7];
2992 } else if (i < j) {
2993 int i8 = i << 3, j8 = j << 3;
2994 v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
2995 v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
2996 v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
2997 v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
2998 }
2999 }
3000 // step 5
3001 for (k=0; k < n2; ++k) {
3002 w[k] = v[k*2+1];
3003 }
3004 // step 6
3005 for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
3006 u[n-1-k2] = w[k4];
3007 u[n-2-k2] = w[k4+1];
3008 u[n3_4 - 1 - k2] = w[k4+2];
3009 u[n3_4 - 2 - k2] = w[k4+3];
3010 }
3011 // step 7
3012 for (k=k2=0; k < n8; ++k, k2 += 2) {
3013 v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3014 v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3015 v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3016 v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3017 }
3018 // step 8
3019 for (k=k2=0; k < n4; ++k,k2 += 2) {
3020 X[k] = v[k2+n2]*B[k2 ] + v[k2+1+n2]*B[k2+1];
3021 X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2 ];
3022 }
3023
3024 // decode kernel to output
3025 // determined the following value experimentally
3026 // (by first figuring out what made inverse_mdct_slow work); then matching that here
3027 // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
3028 s = 0.5; // theoretically would be n4
3029
3030 // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
3031 // so it needs to use the "old" B values to behave correctly, or else
3032 // set s to 1.0 ]]]
3033 for (i=0; i < n4 ; ++i) buffer[i] = s * X[i+n4];
3034 for ( ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
3035 for ( ; i < n ; ++i) buffer[i] = -s * X[i - n3_4];
3036 }
3037 #endif
3038
3039 static float *get_window(vorb *f, int len)
3040 {
3041 len <<= 1;
3042 if (len == f->blocksize_0) return f->window[0];
3043 if (len == f->blocksize_1) return f->window[1];
3044 assert(0);
3045 return NULL;
3046 }
3047
3048 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3049 typedef int16 YTYPE;
3050 #else
3051 typedef int YTYPE;
3052 #endif
3053 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
3054 {
3055 int n2 = n >> 1;
3056 int s = map->chan[i].mux, floor;
3057 floor = map->submap_floor[s];
3058 if (f->floor_types[floor] == 0) {
3059 return error(f, VORBIS_invalid_stream);
3060 } else {
3061 Floor1 *g = &f->floor_config[floor].floor1;
3062 int j,q;
3063 int lx = 0, ly = finalY[0] * g->floor1_multiplier;
3064 for (q=1; q < g->values; ++q) {
3065 j = g->sorted_order[q];
3066 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3067 if (finalY[j] >= 0)
3068 #else
3069 if (step2_flag[j])
3070 #endif
3071 {
3072 int hy = finalY[j] * g->floor1_multiplier;
3073 int hx = g->Xlist[j];
3074 if (lx != hx)
3075 draw_line(target, lx,ly, hx,hy, n2);
3076 CHECK(f);
3077 lx = hx, ly = hy;
3078 }
3079 }
3080 if (lx < n2) {
3081 // optimization of: draw_line(target, lx,ly, n,ly, n2);
3082 for (j=lx; j < n2; ++j)
3083 LINE_OP(target[j], inverse_db_table[ly]);
3084 CHECK(f);
3085 }
3086 }
3087 return TRUE;
3088 }
3089
3090 // The meaning of "left" and "right"
3091 //
3092 // For a given frame:
3093 // we compute samples from 0..n
3094 // window_center is n/2
3095 // we'll window and mix the samples from left_start to left_end with data from the previous frame
3096 // all of the samples from left_end to right_start can be output without mixing; however,
3097 // this interval is 0-length except when transitioning between short and long frames
3098 // all of the samples from right_start to right_end need to be mixed with the next frame,
3099 // which we don't have, so those get saved in a buffer
3100 // frame N's right_end-right_start, the number of samples to mix with the next frame,
3101 // has to be the same as frame N+1's left_end-left_start (which they are by
3102 // construction)
3103
3104 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
3105 {
3106 Mode *m;
3107 int i, n, prev, next, window_center;
3108 f->channel_buffer_start = f->channel_buffer_end = 0;
3109
3110 retry:
3111 if (f->eof) return FALSE;
3112 if (!maybe_start_packet(f))
3113 return FALSE;
3114 // check packet type
3115 if (get_bits(f,1) != 0) {
3116 if (IS_PUSH_MODE(f))
3117 return error(f,VORBIS_bad_packet_type);
3118 while (EOP != get8_packet(f));
3119 goto retry;
3120 }
3121
3122 if (f->alloc.alloc_buffer)
3123 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3124
3125 i = get_bits(f, ilog(f->mode_count-1));
3126 if (i == EOP) return FALSE;
3127 if (i >= f->mode_count) return FALSE;
3128 *mode = i;
3129 m = f->mode_config + i;
3130 if (m->blockflag) {
3131 n = f->blocksize_1;
3132 prev = get_bits(f,1);
3133 next = get_bits(f,1);
3134 } else {
3135 prev = next = 0;
3136 n = f->blocksize_0;
3137 }
3138
3139 // WINDOWING
3140
3141 window_center = n >> 1;
3142 if (m->blockflag && !prev) {
3143 *p_left_start = (n - f->blocksize_0) >> 2;
3144 *p_left_end = (n + f->blocksize_0) >> 2;
3145 } else {
3146 *p_left_start = 0;
3147 *p_left_end = window_center;
3148 }
3149 if (m->blockflag && !next) {
3150 *p_right_start = (n*3 - f->blocksize_0) >> 2;
3151 *p_right_end = (n*3 + f->blocksize_0) >> 2;
3152 } else {
3153 *p_right_start = window_center;
3154 *p_right_end = n;
3155 }
3156
3157 return TRUE;
3158 }
3159
3160 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
3161 {
3162 Mapping *map;
3163 int i,j,k,n,n2;
3164 int zero_channel[256];
3165 int really_zero_channel[256];
3166
3167 // WINDOWING
3168
3169 n = f->blocksize[m->blockflag];
3170 map = &f->mapping[m->mapping];
3171
3172 // FLOORS
3173 n2 = n >> 1;
3174
3175 CHECK(f);
3176
3177 for (i=0; i < f->channels; ++i) {
3178 int s = map->chan[i].mux, floor;
3179 zero_channel[i] = FALSE;
3180 floor = map->submap_floor[s];
3181 if (f->floor_types[floor] == 0) {
3182 return error(f, VORBIS_invalid_stream);
3183 } else {
3184 Floor1 *g = &f->floor_config[floor].floor1;
3185 if (get_bits(f, 1)) {
3186 short *finalY;
3187 uint8 step2_flag[256];
3188 static int range_list[4] = { 256, 128, 86, 64 };
3189 int range = range_list[g->floor1_multiplier-1];
3190 int offset = 2;
3191 finalY = f->finalY[i];
3192 finalY[0] = get_bits(f, ilog(range)-1);
3193 finalY[1] = get_bits(f, ilog(range)-1);
3194 for (j=0; j < g->partitions; ++j) {
3195 int pclass = g->partition_class_list[j];
3196 int cdim = g->class_dimensions[pclass];
3197 int cbits = g->class_subclasses[pclass];
3198 int csub = (1 << cbits)-1;
3199 int cval = 0;
3200 if (cbits) {
3201 Codebook *c = f->codebooks + g->class_masterbooks[pclass];
3202 DECODE(cval,f,c);
3203 }
3204 for (k=0; k < cdim; ++k) {
3205 int book = g->subclass_books[pclass][cval & csub];
3206 cval = cval >> cbits;
3207 if (book >= 0) {
3208 int temp;
3209 Codebook *c = f->codebooks + book;
3210 DECODE(temp,f,c);
3211 finalY[offset++] = temp;
3212 } else
3213 finalY[offset++] = 0;
3214 }
3215 }
3216 if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
3217 step2_flag[0] = step2_flag[1] = 1;
3218 for (j=2; j < g->values; ++j) {
3219 int low, high, pred, highroom, lowroom, room, val;
3220 low = g->neighbors[j][0];
3221 high = g->neighbors[j][1];
3222 //neighbors(g->Xlist, j, &low, &high);
3223 pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
3224 val = finalY[j];
3225 highroom = range - pred;
3226 lowroom = pred;
3227 if (highroom < lowroom)
3228 room = highroom * 2;
3229 else
3230 room = lowroom * 2;
3231 if (val) {
3232 step2_flag[low] = step2_flag[high] = 1;
3233 step2_flag[j] = 1;
3234 if (val >= room)
3235 if (highroom > lowroom)
3236 finalY[j] = val - lowroom + pred;
3237 else
3238 finalY[j] = pred - val + highroom - 1;
3239 else
3240 if (val & 1)
3241 finalY[j] = pred - ((val+1)>>1);
3242 else
3243 finalY[j] = pred + (val>>1);
3244 } else {
3245 step2_flag[j] = 0;
3246 finalY[j] = pred;
3247 }
3248 }
3249
3250 #ifdef STB_VORBIS_NO_DEFER_FLOOR
3251 do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
3252 #else
3253 // defer final floor computation until _after_ residue
3254 for (j=0; j < g->values; ++j) {
3255 if (!step2_flag[j])
3256 finalY[j] = -1;
3257 }
3258 #endif
3259 } else {
3260 error:
3261 zero_channel[i] = TRUE;
3262 }
3263 // So we just defer everything else to later
3264
3265 // at this point we've decoded the floor into buffer
3266 }
3267 }
3268 CHECK(f);
3269 // at this point we've decoded all floors
3270
3271 if (f->alloc.alloc_buffer)
3272 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3273
3274 // re-enable coupled channels if necessary
3275 memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
3276 for (i=0; i < map->coupling_steps; ++i)
3277 if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
3278 zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
3279 }
3280
3281 CHECK(f);
3282 // RESIDUE DECODE
3283 for (i=0; i < map->submaps; ++i) {
3284 float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
3285 int r;
3286 uint8 do_not_decode[256];
3287 int ch = 0;
3288 for (j=0; j < f->channels; ++j) {
3289 if (map->chan[j].mux == i) {
3290 if (zero_channel[j]) {
3291 do_not_decode[ch] = TRUE;
3292 residue_buffers[ch] = NULL;
3293 } else {
3294 do_not_decode[ch] = FALSE;
3295 residue_buffers[ch] = f->channel_buffers[j];
3296 }
3297 ++ch;
3298 }
3299 }
3300 r = map->submap_residue[i];
3301 decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
3302 }
3303
3304 if (f->alloc.alloc_buffer)
3305 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3306 CHECK(f);
3307
3308 // INVERSE COUPLING
3309 for (i = map->coupling_steps-1; i >= 0; --i) {
3310 int n2 = n >> 1;
3311 float *m = f->channel_buffers[map->chan[i].magnitude];
3312 float *a = f->channel_buffers[map->chan[i].angle ];
3313 for (j=0; j < n2; ++j) {
3314 float a2,m2;
3315 if (m[j] > 0)
3316 if (a[j] > 0)
3317 m2 = m[j], a2 = m[j] - a[j];
3318 else
3319 a2 = m[j], m2 = m[j] + a[j];
3320 else
3321 if (a[j] > 0)
3322 m2 = m[j], a2 = m[j] + a[j];
3323 else
3324 a2 = m[j], m2 = m[j] - a[j];
3325 m[j] = m2;
3326 a[j] = a2;
3327 }
3328 }
3329 CHECK(f);
3330
3331 // finish decoding the floors
3332 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3333 for (i=0; i < f->channels; ++i) {
3334 if (really_zero_channel[i]) {
3335 memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3336 } else {
3337 do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
3338 }
3339 }
3340 #else
3341 for (i=0; i < f->channels; ++i) {
3342 if (really_zero_channel[i]) {
3343 memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3344 } else {
3345 for (j=0; j < n2; ++j)
3346 f->channel_buffers[i][j] *= f->floor_buffers[i][j];
3347 }
3348 }
3349 #endif
3350
3351 // INVERSE MDCT
3352 CHECK(f);
3353 for (i=0; i < f->channels; ++i)
3354 inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
3355 CHECK(f);
3356
3357 // this shouldn't be necessary, unless we exited on an error
3358 // and want to flush to get to the next packet
3359 flush_packet(f);
3360
3361 if (f->first_decode) {
3362 // assume we start so first non-discarded sample is sample 0
3363 // this isn't to spec, but spec would require us to read ahead
3364 // and decode the size of all current frames--could be done,
3365 // but presumably it's not a commonly used feature
3366 f->current_loc = -n2; // start of first frame is positioned for discard
3367 // we might have to discard samples "from" the next frame too,
3368 // if we're lapping a large block then a small at the start?
3369 f->discard_samples_deferred = n - right_end;
3370 f->current_loc_valid = TRUE;
3371 f->first_decode = FALSE;
3372 } else if (f->discard_samples_deferred) {
3373 if (f->discard_samples_deferred >= right_start - left_start) {
3374 f->discard_samples_deferred -= (right_start - left_start);
3375 left_start = right_start;
3376 *p_left = left_start;
3377 } else {
3378 left_start += f->discard_samples_deferred;
3379 *p_left = left_start;
3380 f->discard_samples_deferred = 0;
3381 }
3382 } else if (f->previous_length == 0 && f->current_loc_valid) {
3383 // we're recovering from a seek... that means we're going to discard
3384 // the samples from this packet even though we know our position from
3385 // the last page header, so we need to update the position based on
3386 // the discarded samples here
3387 // but wait, the code below is going to add this in itself even
3388 // on a discard, so we don't need to do it here...
3389 }
3390
3391 // check if we have ogg information about the sample # for this packet
3392 if (f->last_seg_which == f->end_seg_with_known_loc) {
3393 // if we have a valid current loc, and this is final:
3394 if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
3395 uint32 current_end = f->known_loc_for_packet - (n-right_end);
3396 // then let's infer the size of the (probably) short final frame
3397 if (current_end < f->current_loc + (right_end-left_start)) {
3398 if (current_end < f->current_loc) {
3399 // negative truncation, that's impossible!
3400 *len = 0;
3401 } else {
3402 *len = current_end - f->current_loc;
3403 }
3404 *len += left_start;
3405 if (*len > right_end) *len = right_end; // this should never happen
3406 f->current_loc += *len;
3407 return TRUE;
3408 }
3409 }
3410 // otherwise, just set our sample loc
3411 // guess that the ogg granule pos refers to the _middle_ of the
3412 // last frame?
3413 // set f->current_loc to the position of left_start
3414 f->current_loc = f->known_loc_for_packet - (n2-left_start);
3415 f->current_loc_valid = TRUE;
3416 }
3417 if (f->current_loc_valid)
3418 f->current_loc += (right_start - left_start);
3419
3420 if (f->alloc.alloc_buffer)
3421 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3422 *len = right_end; // ignore samples after the window goes to 0
3423 CHECK(f);
3424
3425 return TRUE;
3426 }
3427
3428 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
3429 {
3430 int mode, left_end, right_end;
3431 if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
3432 return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
3433 }
3434
3435 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
3436 {
3437 int prev,i,j;
3438 // we use right&left (the start of the right- and left-window sin()-regions)
3439 // to determine how much to return, rather than inferring from the rules
3440 // (same result, clearer code); 'left' indicates where our sin() window
3441 // starts, therefore where the previous window's right edge starts, and
3442 // therefore where to start mixing from the previous buffer. 'right'
3443 // indicates where our sin() ending-window starts, therefore that's where
3444 // we start saving, and where our returned-data ends.
3445
3446 // mixin from previous window
3447 if (f->previous_length) {
3448 int i,j, n = f->previous_length;
3449 float *w = get_window(f, n);
3450 for (i=0; i < f->channels; ++i) {
3451 for (j=0; j < n; ++j)
3452 f->channel_buffers[i][left+j] =
3453 f->channel_buffers[i][left+j]*w[ j] +
3454 f->previous_window[i][ j]*w[n-1-j];
3455 }
3456 }
3457
3458 prev = f->previous_length;
3459
3460 // last half of this data becomes previous window
3461 f->previous_length = len - right;
3462
3463 // @OPTIMIZE: could avoid this copy by double-buffering the
3464 // output (flipping previous_window with channel_buffers), but
3465 // then previous_window would have to be 2x as large, and
3466 // channel_buffers couldn't be temp mem (although they're NOT
3467 // currently temp mem, they could be (unless we want to level
3468 // performance by spreading out the computation))
3469 for (i=0; i < f->channels; ++i)
3470 for (j=0; right+j < len; ++j)
3471 f->previous_window[i][j] = f->channel_buffers[i][right+j];
3472
3473 if (!prev)
3474 // there was no previous packet, so this data isn't valid...
3475 // this isn't entirely true, only the would-have-overlapped data
3476 // isn't valid, but this seems to be what the spec requires
3477 return 0;
3478
3479 // truncate a short frame
3480 if (len < right) right = len;
3481
3482 f->samples_output += right-left;
3483
3484 return right - left;
3485 }
3486
3487 static void vorbis_pump_first_frame(stb_vorbis *f)
3488 {
3489 int len, right, left;
3490 if (vorbis_decode_packet(f, &len, &left, &right))
3491 vorbis_finish_frame(f, len, left, right);
3492 }
3493
3494 #ifndef STB_VORBIS_NO_PUSHDATA_API
3495 static int is_whole_packet_present(stb_vorbis *f, int end_page)
3496 {
3497 // make sure that we have the packet available before continuing...
3498 // this requires a full ogg parse, but we know we can fetch from f->stream
3499
3500 // instead of coding this out explicitly, we could save the current read state,
3501 // read the next packet with get8() until end-of-packet, check f->eof, then
3502 // reset the state? but that would be slower, esp. since we'd have over 256 bytes
3503 // of state to restore (primarily the page segment table)
3504
3505 int s = f->next_seg, first = TRUE;
3506 uint8 *p = f->stream;
3507
3508 if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
3509 for (; s < f->segment_count; ++s) {
3510 p += f->segments[s];
3511 if (f->segments[s] < 255) // stop at first short segment
3512 break;
3513 }
3514 // either this continues, or it ends it...
3515 if (end_page)
3516 if (s < f->segment_count-1) return error(f, VORBIS_invalid_stream);
3517 if (s == f->segment_count)
3518 s = -1; // set 'crosses page' flag
3519 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3520 first = FALSE;
3521 }
3522 for (; s == -1;) {
3523 uint8 *q;
3524 int n;
3525
3526 // check that we have the page header ready
3527 if (p + 26 >= f->stream_end) return error(f, VORBIS_need_more_data);
3528 // validate the page
3529 if (memcmp(p, ogg_page_header, 4)) return error(f, VORBIS_invalid_stream);
3530 if (p[4] != 0) return error(f, VORBIS_invalid_stream);
3531 if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
3532 if (f->previous_length)
3533 if ((p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3534 // if no previous length, we're resynching, so we can come in on a continued-packet,
3535 // which we'll just drop
3536 } else {
3537 if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3538 }
3539 n = p[26]; // segment counts
3540 q = p+27; // q points to segment table
3541 p = q + n; // advance past header
3542 // make sure we've read the segment table
3543 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3544 for (s=0; s < n; ++s) {
3545 p += q[s];
3546 if (q[s] < 255)
3547 break;
3548 }
3549 if (end_page)
3550 if (s < n-1) return error(f, VORBIS_invalid_stream);
3551 if (s == n)
3552 s = -1; // set 'crosses page' flag
3553 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3554 first = FALSE;
3555 }
3556 return TRUE;
3557 }
3558 #endif // !STB_VORBIS_NO_PUSHDATA_API
3559
3560 static int start_decoder(vorb *f)
3561 {
3562 uint8 header[6], x,y;
3563 int len,i,j,k, max_submaps = 0;
3564 int longest_floorlist=0;
3565
3566 // first page, first packet
3567
3568 if (!start_page(f)) return FALSE;
3569 // validate page flag
3570 if (!(f->page_flag & PAGEFLAG_first_page)) return error(f, VORBIS_invalid_first_page);
3571 if (f->page_flag & PAGEFLAG_last_page) return error(f, VORBIS_invalid_first_page);
3572 if (f->page_flag & PAGEFLAG_continued_packet) return error(f, VORBIS_invalid_first_page);
3573 // check for expected packet length
3574 if (f->segment_count != 1) return error(f, VORBIS_invalid_first_page);
3575 if (f->segments[0] != 30) return error(f, VORBIS_invalid_first_page);
3576 // read packet
3577 // check packet header
3578 if (get8(f) != VORBIS_packet_id) return error(f, VORBIS_invalid_first_page);
3579 if (!getn(f, header, 6)) return error(f, VORBIS_unexpected_eof);
3580 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_first_page);
3581 // vorbis_version
3582 if (get32(f) != 0) return error(f, VORBIS_invalid_first_page);
3583 f->channels = get8(f); if (!f->channels) return error(f, VORBIS_invalid_first_page);
3584 if (f->channels > STB_VORBIS_MAX_CHANNELS) return error(f, VORBIS_too_many_channels);
3585 f->sample_rate = get32(f); if (!f->sample_rate) return error(f, VORBIS_invalid_first_page);
3586 get32(f); // bitrate_maximum
3587 get32(f); // bitrate_nominal
3588 get32(f); // bitrate_minimum
3589 x = get8(f);
3590 {
3591 int log0,log1;
3592 log0 = x & 15;
3593 log1 = x >> 4;
3594 f->blocksize_0 = 1 << log0;
3595 f->blocksize_1 = 1 << log1;
3596 if (log0 < 6 || log0 > 13) return error(f, VORBIS_invalid_setup);
3597 if (log1 < 6 || log1 > 13) return error(f, VORBIS_invalid_setup);
3598 if (log0 > log1) return error(f, VORBIS_invalid_setup);
3599 }
3600
3601 // framing_flag
3602 x = get8(f);
3603 if (!(x & 1)) return error(f, VORBIS_invalid_first_page);
3604
3605 // second packet!
3606 if (!start_page(f)) return FALSE;
3607
3608 if (!start_packet(f)) return FALSE;
3609 do {
3610 len = next_segment(f);
3611 skip(f, len);
3612 f->bytes_in_seg = 0;
3613 } while (len);
3614
3615 // third packet!
3616 if (!start_packet(f)) return FALSE;
3617
3618 #ifndef STB_VORBIS_NO_PUSHDATA_API
3619 if (IS_PUSH_MODE(f)) {
3620 if (!is_whole_packet_present(f, TRUE)) {
3621 // convert error in ogg header to write type
3622 if (f->error == VORBIS_invalid_stream)
3623 f->error = VORBIS_invalid_setup;
3624 return FALSE;
3625 }
3626 }
3627 #endif
3628
3629 crc32_init(); // always init it, to avoid multithread race conditions
3630
3631 if (get8_packet(f) != VORBIS_packet_setup) return error(f, VORBIS_invalid_setup);
3632 for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3633 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_setup);
3634
3635 // codebooks
3636
3637 f->codebook_count = get_bits(f,8) + 1;
3638 f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
3639 if (f->codebooks == NULL) return error(f, VORBIS_outofmem);
3640 memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
3641 for (i=0; i < f->codebook_count; ++i) {
3642 uint32 *values;
3643 int ordered, sorted_count;
3644 int total=0;
3645 uint8 *lengths;
3646 Codebook *c = f->codebooks+i;
3647 CHECK(f);
3648 x = get_bits(f, 8); if (x != 0x42) return error(f, VORBIS_invalid_setup);
3649 x = get_bits(f, 8); if (x != 0x43) return error(f, VORBIS_invalid_setup);
3650 x = get_bits(f, 8); if (x != 0x56) return error(f, VORBIS_invalid_setup);
3651 x = get_bits(f, 8);
3652 c->dimensions = (get_bits(f, 8)<<8) + x;
3653 x = get_bits(f, 8);
3654 y = get_bits(f, 8);
3655 c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
3656 ordered = get_bits(f,1);
3657 c->sparse = ordered ? 0 : get_bits(f,1);
3658
3659 if (c->dimensions == 0 && c->entries != 0) return error(f, VORBIS_invalid_setup);
3660
3661 if (c->sparse)
3662 lengths = (uint8 *) setup_temp_malloc(f, c->entries);
3663 else
3664 lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3665
3666 if (!lengths) return error(f, VORBIS_outofmem);
3667
3668 if (ordered) {
3669 int current_entry = 0;
3670 int current_length = get_bits(f,5) + 1;
3671 while (current_entry < c->entries) {
3672 int limit = c->entries - current_entry;
3673 int n = get_bits(f, ilog(limit));
3674 if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
3675 memset(lengths + current_entry, current_length, n);
3676 current_entry += n;
3677 ++current_length;
3678 }
3679 } else {
3680 for (j=0; j < c->entries; ++j) {
3681 int present = c->sparse ? get_bits(f,1) : 1;
3682 if (present) {
3683 lengths[j] = get_bits(f, 5) + 1;
3684 ++total;
3685 if (lengths[j] == 32)
3686 return error(f, VORBIS_invalid_setup);
3687 } else {
3688 lengths[j] = NO_CODE;
3689 }
3690 }
3691 }
3692
3693 if (c->sparse && total >= c->entries >> 2) {
3694 // convert sparse items to non-sparse!
3695 if (c->entries > (int) f->setup_temp_memory_required)
3696 f->setup_temp_memory_required = c->entries;
3697
3698 c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3699 if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem);
3700 memcpy(c->codeword_lengths, lengths, c->entries);
3701 setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
3702 lengths = c->codeword_lengths;
3703 c->sparse = 0;
3704 }
3705
3706 // compute the size of the sorted tables
3707 if (c->sparse) {
3708 sorted_count = total;
3709 } else {
3710 sorted_count = 0;
3711 #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
3712 for (j=0; j < c->entries; ++j)
3713 if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
3714 ++sorted_count;
3715 #endif
3716 }
3717
3718 c->sorted_entries = sorted_count;
3719 values = NULL;
3720
3721 CHECK(f);
3722 if (!c->sparse) {
3723 c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
3724 if (!c->codewords) return error(f, VORBIS_outofmem);
3725 } else {
3726 unsigned int size;
3727 if (c->sorted_entries) {
3728 c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries);
3729 if (!c->codeword_lengths) return error(f, VORBIS_outofmem);
3730 c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
3731 if (!c->codewords) return error(f, VORBIS_outofmem);
3732 values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
3733 if (!values) return error(f, VORBIS_outofmem);
3734 }
3735 size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
3736 if (size > f->setup_temp_memory_required)
3737 f->setup_temp_memory_required = size;
3738 }
3739
3740 if (!compute_codewords(c, lengths, c->entries, values)) {
3741 if (c->sparse) setup_temp_free(f, values, 0);
3742 return error(f, VORBIS_invalid_setup);
3743 }
3744
3745 if (c->sorted_entries) {
3746 // allocate an extra slot for sentinels
3747 c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
3748 if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem);
3749 // allocate an extra slot at the front so that c->sorted_values[-1] is defined
3750 // so that we can catch that case without an extra if
3751 c->sorted_values = ( int *) setup_malloc(f, sizeof(*c->sorted_values ) * (c->sorted_entries+1));
3752 if (c->sorted_values == NULL) return error(f, VORBIS_outofmem);
3753 ++c->sorted_values;
3754 c->sorted_values[-1] = -1;
3755 compute_sorted_huffman(c, lengths, values);
3756 }
3757
3758 if (c->sparse) {
3759 setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
3760 setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
3761 setup_temp_free(f, lengths, c->entries);
3762 c->codewords = NULL;
3763 }
3764
3765 compute_accelerated_huffman(c);
3766
3767 CHECK(f);
3768 c->lookup_type = get_bits(f, 4);
3769 if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
3770 if (c->lookup_type > 0) {
3771 uint16 *mults;
3772 c->minimum_value = float32_unpack(get_bits(f, 32));
3773 c->delta_value = float32_unpack(get_bits(f, 32));
3774 c->value_bits = get_bits(f, 4)+1;
3775 c->sequence_p = get_bits(f,1);
3776 if (c->lookup_type == 1) {
3777 c->lookup_values = lookup1_values(c->entries, c->dimensions);
3778 } else {
3779 c->lookup_values = c->entries * c->dimensions;
3780 }
3781 if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup);
3782 mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
3783 if (mults == NULL) return error(f, VORBIS_outofmem);
3784 for (j=0; j < (int) c->lookup_values; ++j) {
3785 int q = get_bits(f, c->value_bits);
3786 if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
3787 mults[j] = q;
3788 }
3789
3790 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3791 if (c->lookup_type == 1) {
3792 int len, sparse = c->sparse;
3793 float last=0;
3794 // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3795 if (sparse) {
3796 if (c->sorted_entries == 0) goto skip;
3797 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
3798 } else
3799 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries * c->dimensions);
3800 if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3801 len = sparse ? c->sorted_entries : c->entries;
3802 for (j=0; j < len; ++j) {
3803 unsigned int z = sparse ? c->sorted_values[j] : j;
3804 unsigned int div=1;
3805 for (k=0; k < c->dimensions; ++k) {
3806 int off = (z / div) % c->lookup_values;
3807 float val = mults[off];
3808 val = mults[off]*c->delta_value + c->minimum_value + last;
3809 c->multiplicands[j*c->dimensions + k] = val;
3810 if (c->sequence_p)
3811 last = val;
3812 if (k+1 < c->dimensions) {
3813 if (div > UINT_MAX / (unsigned int) c->lookup_values) {
3814 setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
3815 return error(f, VORBIS_invalid_setup);
3816 }
3817 div *= c->lookup_values;
3818 }
3819 }
3820 }
3821 c->lookup_type = 2;
3822 }
3823 else
3824 #endif
3825 {
3826 float last=0;
3827 CHECK(f);
3828 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
3829 if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3830 for (j=0; j < (int) c->lookup_values; ++j) {
3831 float val = mults[j] * c->delta_value + c->minimum_value + last;
3832 c->multiplicands[j] = val;
3833 if (c->sequence_p)
3834 last = val;
3835 }
3836 }
3837 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3838 skip:;
3839 #endif
3840 setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values);
3841
3842 CHECK(f);
3843 }
3844 CHECK(f);
3845 }
3846
3847 // time domain transfers (notused)
3848
3849 x = get_bits(f, 6) + 1;
3850 for (i=0; i < x; ++i) {
3851 uint32 z = get_bits(f, 16);
3852 if (z != 0) return error(f, VORBIS_invalid_setup);
3853 }
3854
3855 // Floors
3856 f->floor_count = get_bits(f, 6)+1;
3857 f->floor_config = (Floor *) setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
3858 if (f->floor_config == NULL) return error(f, VORBIS_outofmem);
3859 for (i=0; i < f->floor_count; ++i) {
3860 f->floor_types[i] = get_bits(f, 16);
3861 if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
3862 if (f->floor_types[i] == 0) {
3863 Floor0 *g = &f->floor_config[i].floor0;
3864 g->order = get_bits(f,8);
3865 g->rate = get_bits(f,16);
3866 g->bark_map_size = get_bits(f,16);
3867 g->amplitude_bits = get_bits(f,6);
3868 g->amplitude_offset = get_bits(f,8);
3869 g->number_of_books = get_bits(f,4) + 1;
3870 for (j=0; j < g->number_of_books; ++j)
3871 g->book_list[j] = get_bits(f,8);
3872 return error(f, VORBIS_feature_not_supported);
3873 } else {
3874 Point p[31*8+2];
3875 Floor1 *g = &f->floor_config[i].floor1;
3876 int max_class = -1;
3877 g->partitions = get_bits(f, 5);
3878 for (j=0; j < g->partitions; ++j) {
3879 g->partition_class_list[j] = get_bits(f, 4);
3880 if (g->partition_class_list[j] > max_class)
3881 max_class = g->partition_class_list[j];
3882 }
3883 for (j=0; j <= max_class; ++j) {
3884 g->class_dimensions[j] = get_bits(f, 3)+1;
3885 g->class_subclasses[j] = get_bits(f, 2);
3886 if (g->class_subclasses[j]) {
3887 g->class_masterbooks[j] = get_bits(f, 8);
3888 if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3889 }
3890 for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
3891 g->subclass_books[j][k] = get_bits(f,8)-1;
3892 if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3893 }
3894 }
3895 g->floor1_multiplier = get_bits(f,2)+1;
3896 g->rangebits = get_bits(f,4);
3897 g->Xlist[0] = 0;
3898 g->Xlist[1] = 1 << g->rangebits;
3899 g->values = 2;
3900 for (j=0; j < g->partitions; ++j) {
3901 int c = g->partition_class_list[j];
3902 for (k=0; k < g->class_dimensions[c]; ++k) {
3903 g->Xlist[g->values] = get_bits(f, g->rangebits);
3904 ++g->values;
3905 }
3906 }
3907 // precompute the sorting
3908 for (j=0; j < g->values; ++j) {
3909 p[j].x = g->Xlist[j];
3910 p[j].y = j;
3911 }
3912 qsort(p, g->values, sizeof(p[0]), point_compare);
3913 for (j=0; j < g->values; ++j)
3914 g->sorted_order[j] = (uint8) p[j].y;
3915 // precompute the neighbors
3916 for (j=2; j < g->values; ++j) {
3917 int low,hi;
3918 neighbors(g->Xlist, j, &low,&hi);
3919 g->neighbors[j][0] = low;
3920 g->neighbors[j][1] = hi;
3921 }
3922
3923 if (g->values > longest_floorlist)
3924 longest_floorlist = g->values;
3925 }
3926 }
3927
3928 // Residue
3929 f->residue_count = get_bits(f, 6)+1;
3930 f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0]));
3931 if (f->residue_config == NULL) return error(f, VORBIS_outofmem);
3932 memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0]));
3933 for (i=0; i < f->residue_count; ++i) {
3934 uint8 residue_cascade[64];
3935 Residue *r = f->residue_config+i;
3936 f->residue_types[i] = get_bits(f, 16);
3937 if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
3938 r->begin = get_bits(f, 24);
3939 r->end = get_bits(f, 24);
3940 if (r->end < r->begin) return error(f, VORBIS_invalid_setup);
3941 r->part_size = get_bits(f,24)+1;
3942 r->classifications = get_bits(f,6)+1;
3943 r->classbook = get_bits(f,8);
3944 if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3945 for (j=0; j < r->classifications; ++j) {
3946 uint8 high_bits=0;
3947 uint8 low_bits=get_bits(f,3);
3948 if (get_bits(f,1))
3949 high_bits = get_bits(f,5);
3950 residue_cascade[j] = high_bits*8 + low_bits;
3951 }
3952 r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
3953 if (r->residue_books == NULL) return error(f, VORBIS_outofmem);
3954 for (j=0; j < r->classifications; ++j) {
3955 for (k=0; k < 8; ++k) {
3956 if (residue_cascade[j] & (1 << k)) {
3957 r->residue_books[j][k] = get_bits(f, 8);
3958 if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3959 } else {
3960 r->residue_books[j][k] = -1;
3961 }
3962 }
3963 }
3964 // precompute the classifications[] array to avoid inner-loop mod/divide
3965 // call it 'classdata' since we already have r->classifications
3966 r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3967 if (!r->classdata) return error(f, VORBIS_outofmem);
3968 memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3969 for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
3970 int classwords = f->codebooks[r->classbook].dimensions;
3971 int temp = j;
3972 r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
3973 if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem);
3974 for (k=classwords-1; k >= 0; --k) {
3975 r->classdata[j][k] = temp % r->classifications;
3976 temp /= r->classifications;
3977 }
3978 }
3979 }
3980
3981 f->mapping_count = get_bits(f,6)+1;
3982 f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
3983 if (f->mapping == NULL) return error(f, VORBIS_outofmem);
3984 memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping));
3985 for (i=0; i < f->mapping_count; ++i) {
3986 Mapping *m = f->mapping + i;
3987 int mapping_type = get_bits(f,16);
3988 if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
3989 m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
3990 if (m->chan == NULL) return error(f, VORBIS_outofmem);
3991 if (get_bits(f,1))
3992 m->submaps = get_bits(f,4)+1;
3993 else
3994 m->submaps = 1;
3995 if (m->submaps > max_submaps)
3996 max_submaps = m->submaps;
3997 if (get_bits(f,1)) {
3998 m->coupling_steps = get_bits(f,8)+1;
3999 for (k=0; k < m->coupling_steps; ++k) {
4000 m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
4001 m->chan[k].angle = get_bits(f, ilog(f->channels-1));
4002 if (m->chan[k].magnitude >= f->channels) return error(f, VORBIS_invalid_setup);
4003 if (m->chan[k].angle >= f->channels) return error(f, VORBIS_invalid_setup);
4004 if (m->chan[k].magnitude == m->chan[k].angle) return error(f, VORBIS_invalid_setup);
4005 }
4006 } else
4007 m->coupling_steps = 0;
4008
4009 // reserved field
4010 if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
4011 if (m->submaps > 1) {
4012 for (j=0; j < f->channels; ++j) {
4013 m->chan[j].mux = get_bits(f, 4);
4014 if (m->chan[j].mux >= m->submaps) return error(f, VORBIS_invalid_setup);
4015 }
4016 } else
4017 // @SPECIFICATION: this case is missing from the spec
4018 for (j=0; j < f->channels; ++j)
4019 m->chan[j].mux = 0;
4020
4021 for (j=0; j < m->submaps; ++j) {
4022 get_bits(f,8); // discard
4023 m->submap_floor[j] = get_bits(f,8);
4024 m->submap_residue[j] = get_bits(f,8);
4025 if (m->submap_floor[j] >= f->floor_count) return error(f, VORBIS_invalid_setup);
4026 if (m->submap_residue[j] >= f->residue_count) return error(f, VORBIS_invalid_setup);
4027 }
4028 }
4029
4030 // Modes
4031 f->mode_count = get_bits(f, 6)+1;
4032 for (i=0; i < f->mode_count; ++i) {
4033 Mode *m = f->mode_config+i;
4034 m->blockflag = get_bits(f,1);
4035 m->windowtype = get_bits(f,16);
4036 m->transformtype = get_bits(f,16);
4037 m->mapping = get_bits(f,8);
4038 if (m->windowtype != 0) return error(f, VORBIS_invalid_setup);
4039 if (m->transformtype != 0) return error(f, VORBIS_invalid_setup);
4040 if (m->mapping >= f->mapping_count) return error(f, VORBIS_invalid_setup);
4041 }
4042
4043 flush_packet(f);
4044
4045 f->previous_length = 0;
4046
4047 for (i=0; i < f->channels; ++i) {
4048 f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
4049 f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4050 f->finalY[i] = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
4051 if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem);
4052 #ifdef STB_VORBIS_NO_DEFER_FLOOR
4053 f->floor_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4054 if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem);
4055 #endif
4056 }
4057
4058 if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
4059 if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
4060 f->blocksize[0] = f->blocksize_0;
4061 f->blocksize[1] = f->blocksize_1;
4062
4063 #ifdef STB_VORBIS_DIVIDE_TABLE
4064 if (integer_divide_table[1][1]==0)
4065 for (i=0; i < DIVTAB_NUMER; ++i)
4066 for (j=1; j < DIVTAB_DENOM; ++j)
4067 integer_divide_table[i][j] = i / j;
4068 #endif
4069
4070 // compute how much temporary memory is needed
4071
4072 // 1.
4073 {
4074 uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
4075 uint32 classify_mem;
4076 int i,max_part_read=0;
4077 for (i=0; i < f->residue_count; ++i) {
4078 Residue *r = f->residue_config + i;
4079 int n_read = r->end - r->begin;
4080 int part_read = n_read / r->part_size;
4081 if (part_read > max_part_read)
4082 max_part_read = part_read;
4083 }
4084 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
4085 classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
4086 #else
4087 classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
4088 #endif
4089
4090 f->temp_memory_required = classify_mem;
4091 if (imdct_mem > f->temp_memory_required)
4092 f->temp_memory_required = imdct_mem;
4093 }
4094
4095 f->first_decode = TRUE;
4096
4097 if (f->alloc.alloc_buffer) {
4098 assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
4099 // check if there's enough temp memory so we don't error later
4100 if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
4101 return error(f, VORBIS_outofmem);
4102 }
4103
4104 f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
4105
4106 return TRUE;
4107 }
4108
4109 static void vorbis_deinit(stb_vorbis *p)
4110 {
4111 int i,j;
4112 if (p->residue_config) {
4113 for (i=0; i < p->residue_count; ++i) {
4114 Residue *r = p->residue_config+i;
4115 if (r->classdata) {
4116 for (j=0; j < p->codebooks[r->classbook].entries; ++j)
4117 setup_free(p, r->classdata[j]);
4118 setup_free(p, r->classdata);
4119 }
4120 setup_free(p, r->residue_books);
4121 }
4122 }
4123
4124 if (p->codebooks) {
4125 CHECK(p);
4126 for (i=0; i < p->codebook_count; ++i) {
4127 Codebook *c = p->codebooks + i;
4128 setup_free(p, c->codeword_lengths);
4129 setup_free(p, c->multiplicands);
4130 setup_free(p, c->codewords);
4131 setup_free(p, c->sorted_codewords);
4132 // c->sorted_values[-1] is the first entry in the array
4133 setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
4134 }
4135 setup_free(p, p->codebooks);
4136 }
4137 setup_free(p, p->floor_config);
4138 setup_free(p, p->residue_config);
4139 if (p->mapping) {
4140 for (i=0; i < p->mapping_count; ++i)
4141 setup_free(p, p->mapping[i].chan);
4142 setup_free(p, p->mapping);
4143 }
4144 CHECK(p);
4145 for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
4146 setup_free(p, p->channel_buffers[i]);
4147 setup_free(p, p->previous_window[i]);
4148 #ifdef STB_VORBIS_NO_DEFER_FLOOR
4149 setup_free(p, p->floor_buffers[i]);
4150 #endif
4151 setup_free(p, p->finalY[i]);
4152 }
4153 for (i=0; i < 2; ++i) {
4154 setup_free(p, p->A[i]);
4155 setup_free(p, p->B[i]);
4156 setup_free(p, p->C[i]);
4157 setup_free(p, p->window[i]);
4158 setup_free(p, p->bit_reverse[i]);
4159 }
4160 #ifndef STB_VORBIS_NO_STDIO
4161 if (p->close_on_free) fclose(p->f);
4162 #endif
4163 }
4164
4165 void stb_vorbis_close(stb_vorbis *p)
4166 {
4167 if (p == NULL) return;
4168 vorbis_deinit(p);
4169 setup_free(p,p);
4170 }
4171
4172 static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
4173 {
4174 memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
4175 if (z) {
4176 p->alloc = *z;
4177 p->alloc.alloc_buffer_length_in_bytes = (p->alloc.alloc_buffer_length_in_bytes+3) & ~3;
4178 p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
4179 }
4180 p->eof = 0;
4181 p->error = VORBIS__no_error;
4182 p->stream = NULL;
4183 p->codebooks = NULL;
4184 p->page_crc_tests = -1;
4185 #ifndef STB_VORBIS_NO_STDIO
4186 p->close_on_free = FALSE;
4187 p->f = NULL;
4188 #endif
4189 }
4190
4191 int stb_vorbis_get_sample_offset(stb_vorbis *f)
4192 {
4193 if (f->current_loc_valid)
4194 return f->current_loc;
4195 else
4196 return -1;
4197 }
4198
4199 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
4200 {
4201 stb_vorbis_info d;
4202 d.channels = f->channels;
4203 d.sample_rate = f->sample_rate;
4204 d.setup_memory_required = f->setup_memory_required;
4205 d.setup_temp_memory_required = f->setup_temp_memory_required;
4206 d.temp_memory_required = f->temp_memory_required;
4207 d.max_frame_size = f->blocksize_1 >> 1;
4208 return d;
4209 }
4210
4211 int stb_vorbis_get_error(stb_vorbis *f)
4212 {
4213 int e = f->error;
4214 f->error = VORBIS__no_error;
4215 return e;
4216 }
4217
4218 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
4219 {
4220 stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
4221 return p;
4222 }
4223
4224 #ifndef STB_VORBIS_NO_PUSHDATA_API
4225
4226 void stb_vorbis_flush_pushdata(stb_vorbis *f)
4227 {
4228 f->previous_length = 0;
4229 f->page_crc_tests = 0;
4230 f->discard_samples_deferred = 0;
4231 f->current_loc_valid = FALSE;
4232 f->first_decode = FALSE;
4233 f->samples_output = 0;
4234 f->channel_buffer_start = 0;
4235 f->channel_buffer_end = 0;
4236 }
4237
4238 static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
4239 {
4240 int i,n;
4241 for (i=0; i < f->page_crc_tests; ++i)
4242 f->scan[i].bytes_done = 0;
4243
4244 // if we have room for more scans, search for them first, because
4245 // they may cause us to stop early if their header is incomplete
4246 if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
4247 if (data_len < 4) return 0;
4248 data_len -= 3; // need to look for 4-byte sequence, so don't miss
4249 // one that straddles a boundary
4250 for (i=0; i < data_len; ++i) {
4251 if (data[i] == 0x4f) {
4252 if (0==memcmp(data+i, ogg_page_header, 4)) {
4253 int j,len;
4254 uint32 crc;
4255 // make sure we have the whole page header
4256 if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
4257 // only read up to this page start, so hopefully we'll
4258 // have the whole page header start next time
4259 data_len = i;
4260 break;
4261 }
4262 // ok, we have it all; compute the length of the page
4263 len = 27 + data[i+26];
4264 for (j=0; j < data[i+26]; ++j)
4265 len += data[i+27+j];
4266 // scan everything up to the embedded crc (which we must 0)
4267 crc = 0;
4268 for (j=0; j < 22; ++j)
4269 crc = crc32_update(crc, data[i+j]);
4270 // now process 4 0-bytes
4271 for ( ; j < 26; ++j)
4272 crc = crc32_update(crc, 0);
4273 // len is the total number of bytes we need to scan
4274 n = f->page_crc_tests++;
4275 f->scan[n].bytes_left = len-j;
4276 f->scan[n].crc_so_far = crc;
4277 f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
4278 // if the last frame on a page is continued to the next, then
4279 // we can't recover the sample_loc immediately
4280 if (data[i+27+data[i+26]-1] == 255)
4281 f->scan[n].sample_loc = ~0;
4282 else
4283 f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
4284 f->scan[n].bytes_done = i+j;
4285 if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT)
4286 break;
4287 // keep going if we still have room for more
4288 }
4289 }
4290 }
4291 }
4292
4293 for (i=0; i < f->page_crc_tests;) {
4294 uint32 crc;
4295 int j;
4296 int n = f->scan[i].bytes_done;
4297 int m = f->scan[i].bytes_left;
4298 if (m > data_len - n) m = data_len - n;
4299 // m is the bytes to scan in the current chunk
4300 crc = f->scan[i].crc_so_far;
4301 for (j=0; j < m; ++j)
4302 crc = crc32_update(crc, data[n+j]);
4303 f->scan[i].bytes_left -= m;
4304 f->scan[i].crc_so_far = crc;
4305 if (f->scan[i].bytes_left == 0) {
4306 // does it match?
4307 if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
4308 // Houston, we have page
4309 data_len = n+m; // consumption amount is wherever that scan ended
4310 f->page_crc_tests = -1; // drop out of page scan mode
4311 f->previous_length = 0; // decode-but-don't-output one frame
4312 f->next_seg = -1; // start a new page
4313 f->current_loc = f->scan[i].sample_loc; // set the current sample location
4314 // to the amount we'd have decoded had we decoded this page
4315 f->current_loc_valid = f->current_loc != ~0U;
4316 return data_len;
4317 }
4318 // delete entry
4319 f->scan[i] = f->scan[--f->page_crc_tests];
4320 } else {
4321 ++i;
4322 }
4323 }
4324
4325 return data_len;
4326 }
4327
4328 // return value: number of bytes we used
4329 int stb_vorbis_decode_frame_pushdata(
4330 stb_vorbis *f, // the file we're decoding
4331 const uint8 *data, int data_len, // the memory available for decoding
4332 int *channels, // place to write number of float * buffers
4333 float ***output, // place to write float ** array of float * buffers
4334 int *samples // place to write number of output samples
4335 )
4336 {
4337 int i;
4338 int len,right,left;
4339
4340 if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4341
4342 if (f->page_crc_tests >= 0) {
4343 *samples = 0;
4344 return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len);
4345 }
4346
4347 f->stream = (uint8 *) data;
4348 f->stream_end = (uint8 *) data + data_len;
4349 f->error = VORBIS__no_error;
4350
4351 // check that we have the entire packet in memory
4352 if (!is_whole_packet_present(f, FALSE)) {
4353 *samples = 0;
4354 return 0;
4355 }
4356
4357 if (!vorbis_decode_packet(f, &len, &left, &right)) {
4358 // save the actual error we encountered
4359 enum STBVorbisError error = f->error;
4360 if (error == VORBIS_bad_packet_type) {
4361 // flush and resynch
4362 f->error = VORBIS__no_error;
4363 while (get8_packet(f) != EOP)
4364 if (f->eof) break;
4365 *samples = 0;
4366 return (int) (f->stream - data);
4367 }
4368 if (error == VORBIS_continued_packet_flag_invalid) {
4369 if (f->previous_length == 0) {
4370 // we may be resynching, in which case it's ok to hit one
4371 // of these; just discard the packet
4372 f->error = VORBIS__no_error;
4373 while (get8_packet(f) != EOP)
4374 if (f->eof) break;
4375 *samples = 0;
4376 return (int) (f->stream - data);
4377 }
4378 }
4379 // if we get an error while parsing, what to do?
4380 // well, it DEFINITELY won't work to continue from where we are!
4381 stb_vorbis_flush_pushdata(f);
4382 // restore the error that actually made us bail
4383 f->error = error;
4384 *samples = 0;
4385 return 1;
4386 }
4387
4388 // success!
4389 len = vorbis_finish_frame(f, len, left, right);
4390 for (i=0; i < f->channels; ++i)
4391 f->outputs[i] = f->channel_buffers[i] + left;
4392
4393 if (channels) *channels = f->channels;
4394 *samples = len;
4395 *output = f->outputs;
4396 return (int) (f->stream - data);
4397 }
4398
4399 stb_vorbis *stb_vorbis_open_pushdata(
4400 const unsigned char *data, int data_len, // the memory available for decoding
4401 int *data_used, // only defined if result is not NULL
4402 int *error, const stb_vorbis_alloc *alloc)
4403 {
4404 stb_vorbis *f, p;
4405 vorbis_init(&p, alloc);
4406 p.stream = (uint8 *) data;
4407 p.stream_end = (uint8 *) data + data_len;
4408 p.push_mode = TRUE;
4409 if (!start_decoder(&p)) {
4410 if (p.eof)
4411 *error = VORBIS_need_more_data;
4412 else
4413 *error = p.error;
4414 return NULL;
4415 }
4416 f = vorbis_alloc(&p);
4417 if (f) {
4418 *f = p;
4419 *data_used = (int) (f->stream - data);
4420 *error = 0;
4421 return f;
4422 } else {
4423 vorbis_deinit(&p);
4424 return NULL;
4425 }
4426 }
4427 #endif // STB_VORBIS_NO_PUSHDATA_API
4428
4429 unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
4430 {
4431 #ifndef STB_VORBIS_NO_PUSHDATA_API
4432 if (f->push_mode) return 0;
4433 #endif
4434 if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
4435 #ifndef STB_VORBIS_NO_STDIO
4436 return (unsigned int) (ftell(f->f) - f->f_start);
4437 #endif
4438 }
4439
4440 #ifndef STB_VORBIS_NO_PULLDATA_API
4441 //
4442 // DATA-PULLING API
4443 //
4444
4445 static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
4446 {
4447 for(;;) {
4448 int n;
4449 if (f->eof) return 0;
4450 n = get8(f);
4451 if (n == 0x4f) { // page header candidate
4452 unsigned int retry_loc = stb_vorbis_get_file_offset(f);
4453 int i;
4454 // check if we're off the end of a file_section stream
4455 if (retry_loc - 25 > f->stream_len)
4456 return 0;
4457 // check the rest of the header
4458 for (i=1; i < 4; ++i)
4459 if (get8(f) != ogg_page_header[i])
4460 break;
4461 if (f->eof) return 0;
4462 if (i == 4) {
4463 uint8 header[27];
4464 uint32 i, crc, goal, len;
4465 for (i=0; i < 4; ++i)
4466 header[i] = ogg_page_header[i];
4467 for (; i < 27; ++i)
4468 header[i] = get8(f);
4469 if (f->eof) return 0;
4470 if (header[4] != 0) goto invalid;
4471 goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
4472 for (i=22; i < 26; ++i)
4473 header[i] = 0;
4474 crc = 0;
4475 for (i=0; i < 27; ++i)
4476 crc = crc32_update(crc, header[i]);
4477 len = 0;
4478 for (i=0; i < header[26]; ++i) {
4479 int s = get8(f);
4480 crc = crc32_update(crc, s);
4481 len += s;
4482 }
4483 if (len && f->eof) return 0;
4484 for (i=0; i < len; ++i)
4485 crc = crc32_update(crc, get8(f));
4486 // finished parsing probable page
4487 if (crc == goal) {
4488 // we could now check that it's either got the last
4489 // page flag set, OR it's followed by the capture
4490 // pattern, but I guess TECHNICALLY you could have
4491 // a file with garbage between each ogg page and recover
4492 // from it automatically? So even though that paranoia
4493 // might decrease the chance of an invalid decode by
4494 // another 2^32, not worth it since it would hose those
4495 // invalid-but-useful files?
4496 if (end)
4497 *end = stb_vorbis_get_file_offset(f);
4498 if (last) {
4499 if (header[5] & 0x04)
4500 *last = 1;
4501 else
4502 *last = 0;
4503 }
4504 set_file_offset(f, retry_loc-1);
4505 return 1;
4506 }
4507 }
4508 invalid:
4509 // not a valid page, so rewind and look for next one
4510 set_file_offset(f, retry_loc);
4511 }
4512 }
4513 }
4514
4515
4516 #define SAMPLE_unknown 0xffffffff
4517
4518 // seeking is implemented with a binary search, which narrows down the range to
4519 // 64K, before using a linear search (because finding the synchronization
4520 // pattern can be expensive, and the chance we'd find the end page again is
4521 // relatively high for small ranges)
4522 //
4523 // two initial interpolation-style probes are used at the start of the search
4524 // to try to bound either side of the binary search sensibly, while still
4525 // working in O(log n) time if they fail.
4526
4527 static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
4528 {
4529 uint8 header[27], lacing[255];
4530 int i,len;
4531
4532 // record where the page starts
4533 z->page_start = stb_vorbis_get_file_offset(f);
4534
4535 // parse the header
4536 getn(f, header, 27);
4537 if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
4538 return 0;
4539 getn(f, lacing, header[26]);
4540
4541 // determine the length of the payload
4542 len = 0;
4543 for (i=0; i < header[26]; ++i)
4544 len += lacing[i];
4545
4546 // this implies where the page ends
4547 z->page_end = z->page_start + 27 + header[26] + len;
4548
4549 // read the last-decoded sample out of the data
4550 z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
4551
4552 // restore file state to where we were
4553 set_file_offset(f, z->page_start);
4554 return 1;
4555 }
4556
4557 // rarely used function to seek back to the preceeding page while finding the
4558 // start of a packet
4559 static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
4560 {
4561 unsigned int previous_safe, end;
4562
4563 // now we want to seek back 64K from the limit
4564 if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset)
4565 previous_safe = limit_offset - 65536;
4566 else
4567 previous_safe = f->first_audio_page_offset;
4568
4569 set_file_offset(f, previous_safe);
4570
4571 while (vorbis_find_page(f, &end, NULL)) {
4572 if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
4573 return 1;
4574 set_file_offset(f, end);
4575 }
4576
4577 return 0;
4578 }
4579
4580 // implements the search logic for finding a page and starting decoding. if
4581 // the function succeeds, current_loc_valid will be true and current_loc will
4582 // be less than or equal to the provided sample number (the closer the
4583 // better).
4584 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
4585 {
4586 ProbedPage left, right, mid;
4587 int i, start_seg_with_known_loc, end_pos, page_start;
4588 uint32 delta, stream_length, padding;
4589 double offset, bytes_per_sample;
4590 int probe = 0;
4591
4592 // find the last page and validate the target sample
4593 stream_length = stb_vorbis_stream_length_in_samples(f);
4594 if (stream_length == 0) return error(f, VORBIS_seek_without_length);
4595 if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
4596
4597 // this is the maximum difference between the window-center (which is the
4598 // actual granule position value), and the right-start (which the spec
4599 // indicates should be the granule position (give or take one)).
4600 padding = ((f->blocksize_1 - f->blocksize_0) >> 2);
4601 if (sample_number < padding)
4602 sample_number = 0;
4603 else
4604 sample_number -= padding;
4605
4606 left = f->p_first;
4607 while (left.last_decoded_sample == ~0U) {
4608 // (untested) the first page does not have a 'last_decoded_sample'
4609 set_file_offset(f, left.page_end);
4610 if (!get_seek_page_info(f, &left)) goto error;
4611 }
4612
4613 right = f->p_last;
4614 assert(right.last_decoded_sample != ~0U);
4615
4616 // starting from the start is handled differently
4617 if (sample_number <= left.last_decoded_sample) {
4618 stb_vorbis_seek_start(f);
4619 return 1;
4620 }
4621
4622 while (left.page_end != right.page_start) {
4623 assert(left.page_end < right.page_start);
4624 // search range in bytes
4625 delta = right.page_start - left.page_end;
4626 if (delta <= 65536) {
4627 // there's only 64K left to search - handle it linearly
4628 set_file_offset(f, left.page_end);
4629 } else {
4630 if (probe < 2) {
4631 if (probe == 0) {
4632 // first probe (interpolate)
4633 double data_bytes = right.page_end - left.page_start;
4634 bytes_per_sample = data_bytes / right.last_decoded_sample;
4635 offset = left.page_start + bytes_per_sample * (sample_number - left.last_decoded_sample);
4636 } else {
4637 // second probe (try to bound the other side)
4638 double error = ((double) sample_number - mid.last_decoded_sample) * bytes_per_sample;
4639 if (error >= 0 && error < 8000) error = 8000;
4640 if (error < 0 && error > -8000) error = -8000;
4641 offset += error * 2;
4642 }
4643
4644 // ensure the offset is valid
4645 if (offset < left.page_end)
4646 offset = left.page_end;
4647 if (offset > right.page_start - 65536)
4648 offset = right.page_start - 65536;
4649
4650 set_file_offset(f, (unsigned int) offset);
4651 } else {
4652 // binary search for large ranges (offset by 32K to ensure
4653 // we don't hit the right page)
4654 set_file_offset(f, left.page_end + (delta / 2) - 32768);
4655 }
4656
4657 if (!vorbis_find_page(f, NULL, NULL)) goto error;
4658 }
4659
4660 for (;;) {
4661 if (!get_seek_page_info(f, &mid)) goto error;
4662 if (mid.last_decoded_sample != ~0U) break;
4663 // (untested) no frames end on this page
4664 set_file_offset(f, mid.page_end);
4665 assert(mid.page_start < right.page_start);
4666 }
4667
4668 // if we've just found the last page again then we're in a tricky file,
4669 // and we're close enough.
4670 if (mid.page_start == right.page_start)
4671 break;
4672
4673 if (sample_number < mid.last_decoded_sample)
4674 right = mid;
4675 else
4676 left = mid;
4677
4678 ++probe;
4679 }
4680
4681 // seek back to start of the last packet
4682 page_start = left.page_start;
4683 set_file_offset(f, page_start);
4684 if (!start_page(f)) return error(f, VORBIS_seek_failed);
4685 end_pos = f->end_seg_with_known_loc;
4686 assert(end_pos >= 0);
4687
4688 for (;;) {
4689 for (i = end_pos; i > 0; --i)
4690 if (f->segments[i-1] != 255)
4691 break;
4692
4693 start_seg_with_known_loc = i;
4694
4695 if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet))
4696 break;
4697
4698 // (untested) the final packet begins on an earlier page
4699 if (!go_to_page_before(f, page_start))
4700 goto error;
4701
4702 page_start = stb_vorbis_get_file_offset(f);
4703 if (!start_page(f)) goto error;
4704 end_pos = f->segment_count - 1;
4705 }
4706
4707 // prepare to start decoding
4708 f->current_loc_valid = FALSE;
4709 f->last_seg = FALSE;
4710 f->valid_bits = 0;
4711 f->packet_bytes = 0;
4712 f->bytes_in_seg = 0;
4713 f->previous_length = 0;
4714 f->next_seg = start_seg_with_known_loc;
4715
4716 for (i = 0; i < start_seg_with_known_loc; i++)
4717 skip(f, f->segments[i]);
4718
4719 // start decoding (optimizable - this frame is generally discarded)
4720 vorbis_pump_first_frame(f);
4721 return 1;
4722
4723 error:
4724 // try to restore the file to a valid state
4725 stb_vorbis_seek_start(f);
4726 return error(f, VORBIS_seek_failed);
4727 }
4728
4729 // the same as vorbis_decode_initial, but without advancing
4730 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
4731 {
4732 int bits_read, bytes_read;
4733
4734 if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
4735 return 0;
4736
4737 // either 1 or 2 bytes were read, figure out which so we can rewind
4738 bits_read = 1 + ilog(f->mode_count-1);
4739 if (f->mode_config[*mode].blockflag)
4740 bits_read += 2;
4741 bytes_read = (bits_read + 7) / 8;
4742
4743 f->bytes_in_seg += bytes_read;
4744 f->packet_bytes -= bytes_read;
4745 skip(f, -bytes_read);
4746 if (f->next_seg == -1)
4747 f->next_seg = f->segment_count - 1;
4748 else
4749 f->next_seg--;
4750 f->valid_bits = 0;
4751
4752 return 1;
4753 }
4754
4755 int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
4756 {
4757 uint32 max_frame_samples;
4758
4759 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4760
4761 // fast page-level search
4762 if (!seek_to_sample_coarse(f, sample_number))
4763 return 0;
4764
4765 assert(f->current_loc_valid);
4766 assert(f->current_loc <= sample_number);
4767
4768 // linear search for the relevant packet
4769 max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2;
4770 while (f->current_loc < sample_number) {
4771 int left_start, left_end, right_start, right_end, mode, frame_samples;
4772 if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
4773 return error(f, VORBIS_seek_failed);
4774 // calculate the number of samples returned by the next frame
4775 frame_samples = right_start - left_start;
4776 if (f->current_loc + frame_samples > sample_number) {
4777 return 1; // the next frame will contain the sample
4778 } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) {
4779 // there's a chance the frame after this could contain the sample
4780 vorbis_pump_first_frame(f);
4781 } else {
4782 // this frame is too early to be relevant
4783 f->current_loc += frame_samples;
4784 f->previous_length = 0;
4785 maybe_start_packet(f);
4786 flush_packet(f);
4787 }
4788 }
4789 // the next frame will start with the sample
4790 assert(f->current_loc == sample_number);
4791 return 1;
4792 }
4793
4794 int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
4795 {
4796 if (!stb_vorbis_seek_frame(f, sample_number))
4797 return 0;
4798
4799 if (sample_number != f->current_loc) {
4800 int n;
4801 uint32 frame_start = f->current_loc;
4802 stb_vorbis_get_frame_float(f, &n, NULL);
4803 assert(sample_number > frame_start);
4804 assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end);
4805 f->channel_buffer_start += (sample_number - frame_start);
4806 }
4807
4808 return 1;
4809 }
4810
4811 void stb_vorbis_seek_start(stb_vorbis *f)
4812 {
4813 if (IS_PUSH_MODE(f)) { error(f, VORBIS_invalid_api_mixing); return; }
4814 set_file_offset(f, f->first_audio_page_offset);
4815 f->previous_length = 0;
4816 f->first_decode = TRUE;
4817 f->next_seg = -1;
4818 vorbis_pump_first_frame(f);
4819 }
4820
4821 unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
4822 {
4823 unsigned int restore_offset, previous_safe;
4824 unsigned int end, last_page_loc;
4825
4826 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4827 if (!f->total_samples) {
4828 unsigned int last;
4829 uint32 lo,hi;
4830 char header[6];
4831
4832 // first, store the current decode position so we can restore it
4833 restore_offset = stb_vorbis_get_file_offset(f);
4834
4835 // now we want to seek back 64K from the end (the last page must
4836 // be at most a little less than 64K, but let's allow a little slop)
4837 if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
4838 previous_safe = f->stream_len - 65536;
4839 else
4840 previous_safe = f->first_audio_page_offset;
4841
4842 set_file_offset(f, previous_safe);
4843 // previous_safe is now our candidate 'earliest known place that seeking
4844 // to will lead to the final page'
4845
4846 if (!vorbis_find_page(f, &end, &last)) {
4847 // if we can't find a page, we're hosed!
4848 f->error = VORBIS_cant_find_last_page;
4849 f->total_samples = 0xffffffff;
4850 goto done;
4851 }
4852
4853 // check if there are more pages
4854 last_page_loc = stb_vorbis_get_file_offset(f);
4855
4856 // stop when the last_page flag is set, not when we reach eof;
4857 // this allows us to stop short of a 'file_section' end without
4858 // explicitly checking the length of the section
4859 while (!last) {
4860 set_file_offset(f, end);
4861 if (!vorbis_find_page(f, &end, &last)) {
4862 // the last page we found didn't have the 'last page' flag
4863 // set. whoops!
4864 break;
4865 }
4866 previous_safe = last_page_loc+1;
4867 last_page_loc = stb_vorbis_get_file_offset(f);
4868 }
4869
4870 set_file_offset(f, last_page_loc);
4871
4872 // parse the header
4873 getn(f, (unsigned char *)header, 6);
4874 // extract the absolute granule position
4875 lo = get32(f);
4876 hi = get32(f);
4877 if (lo == 0xffffffff && hi == 0xffffffff) {
4878 f->error = VORBIS_cant_find_last_page;
4879 f->total_samples = SAMPLE_unknown;
4880 goto done;
4881 }
4882 if (hi)
4883 lo = 0xfffffffe; // saturate
4884 f->total_samples = lo;
4885
4886 f->p_last.page_start = last_page_loc;
4887 f->p_last.page_end = end;
4888 f->p_last.last_decoded_sample = lo;
4889
4890 done:
4891 set_file_offset(f, restore_offset);
4892 }
4893 return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
4894 }
4895
4896 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
4897 {
4898 return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
4899 }
4900
4901
4902
4903 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
4904 {
4905 int len, right,left,i;
4906 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4907
4908 if (!vorbis_decode_packet(f, &len, &left, &right)) {
4909 f->channel_buffer_start = f->channel_buffer_end = 0;
4910 return 0;
4911 }
4912
4913 len = vorbis_finish_frame(f, len, left, right);
4914 for (i=0; i < f->channels; ++i)
4915 f->outputs[i] = f->channel_buffers[i] + left;
4916
4917 f->channel_buffer_start = left;
4918 f->channel_buffer_end = left+len;
4919
4920 if (channels) *channels = f->channels;
4921 if (output) *output = f->outputs;
4922 return len;
4923 }
4924
4925 #ifndef STB_VORBIS_NO_STDIO
4926
4927 stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length)
4928 {
4929 stb_vorbis *f, p;
4930 vorbis_init(&p, alloc);
4931 p.f = file;
4932 p.f_start = (uint32) ftell(file);
4933 p.stream_len = length;
4934 p.close_on_free = close_on_free;
4935 if (start_decoder(&p)) {
4936 f = vorbis_alloc(&p);
4937 if (f) {
4938 *f = p;
4939 vorbis_pump_first_frame(f);
4940 return f;
4941 }
4942 }
4943 if (error) *error = p.error;
4944 vorbis_deinit(&p);
4945 return NULL;
4946 }
4947
4948 stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc)
4949 {
4950 unsigned int len, start;
4951 start = (unsigned int) ftell(file);
4952 fseek(file, 0, SEEK_END);
4953 len = (unsigned int) (ftell(file) - start);
4954 fseek(file, start, SEEK_SET);
4955 return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
4956 }
4957
4958 stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
4959 {
4960 FILE *f = fopen(filename, "rb");
4961 if (f)
4962 return stb_vorbis_open_file(f, TRUE, error, alloc);
4963 if (error) *error = VORBIS_file_open_failure;
4964 return NULL;
4965 }
4966 #endif // STB_VORBIS_NO_STDIO
4967
4968 stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc)
4969 {
4970 stb_vorbis *f, p;
4971 if (data == NULL) return NULL;
4972 vorbis_init(&p, alloc);
4973 p.stream = (uint8 *) data;
4974 p.stream_end = (uint8 *) data + len;
4975 p.stream_start = (uint8 *) p.stream;
4976 p.stream_len = len;
4977 p.push_mode = FALSE;
4978 if (start_decoder(&p)) {
4979 f = vorbis_alloc(&p);
4980 if (f) {
4981 *f = p;
4982 vorbis_pump_first_frame(f);
4983 return f;
4984 }
4985 }
4986 if (error) *error = p.error;
4987 vorbis_deinit(&p);
4988 return NULL;
4989 }
4990
4991 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
4992 #define PLAYBACK_MONO 1
4993 #define PLAYBACK_LEFT 2
4994 #define PLAYBACK_RIGHT 4
4995
4996 #define L (PLAYBACK_LEFT | PLAYBACK_MONO)
4997 #define C (PLAYBACK_LEFT | PLAYBACK_RIGHT | PLAYBACK_MONO)
4998 #define R (PLAYBACK_RIGHT | PLAYBACK_MONO)
4999
5000 static int8 channel_position[7][6] =
5001 {
5002 { 0 },
5003 { C },
5004 { L, R },
5005 { L, C, R },
5006 { L, R, L, R },
5007 { L, C, R, L, R },
5008 { L, C, R, L, R, C },
5009 };
5010
5011
5012 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
5013 typedef union {
5014 float f;
5015 int i;
5016 } float_conv;
5017 typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
5018 #define FASTDEF(x) float_conv x
5019 // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
5020 #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
5021 #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
5022 #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
5023 #define check_endianness()
5024 #else
5025 #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
5026 #define check_endianness()
5027 #define FASTDEF(x)
5028 #endif
5029
5030 static void copy_samples(short *dest, float *src, int len)
5031 {
5032 int i;
5033 check_endianness();
5034 for (i=0; i < len; ++i) {
5035 FASTDEF(temp);
5036 int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15);
5037 if ((unsigned int) (v + 32768) > 65535)
5038 v = v < 0 ? -32768 : 32767;
5039 dest[i] = v;
5040 }
5041 }
5042
5043 static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
5044 {
5045 #define BUFFER_SIZE 32
5046 float buffer[BUFFER_SIZE];
5047 int i,j,o,n = BUFFER_SIZE;
5048 check_endianness();
5049 for (o = 0; o < len; o += BUFFER_SIZE) {
5050 memset(buffer, 0, sizeof(buffer));
5051 if (o + n > len) n = len - o;
5052 for (j=0; j < num_c; ++j) {
5053 if (channel_position[num_c][j] & mask) {
5054 for (i=0; i < n; ++i)
5055 buffer[i] += data[j][d_offset+o+i];
5056 }
5057 }
5058 for (i=0; i < n; ++i) {
5059 FASTDEF(temp);
5060 int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5061 if ((unsigned int) (v + 32768) > 65535)
5062 v = v < 0 ? -32768 : 32767;
5063 output[o+i] = v;
5064 }
5065 }
5066 }
5067
5068 static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
5069 {
5070 #define BUFFER_SIZE 32
5071 float buffer[BUFFER_SIZE];
5072 int i,j,o,n = BUFFER_SIZE >> 1;
5073 // o is the offset in the source data
5074 check_endianness();
5075 for (o = 0; o < len; o += BUFFER_SIZE >> 1) {
5076 // o2 is the offset in the output data
5077 int o2 = o << 1;
5078 memset(buffer, 0, sizeof(buffer));
5079 if (o + n > len) n = len - o;
5080 for (j=0; j < num_c; ++j) {
5081 int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
5082 if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
5083 for (i=0; i < n; ++i) {
5084 buffer[i*2+0] += data[j][d_offset+o+i];
5085 buffer[i*2+1] += data[j][d_offset+o+i];
5086 }
5087 } else if (m == PLAYBACK_LEFT) {
5088 for (i=0; i < n; ++i) {
5089 buffer[i*2+0] += data[j][d_offset+o+i];
5090 }
5091 } else if (m == PLAYBACK_RIGHT) {
5092 for (i=0; i < n; ++i) {
5093 buffer[i*2+1] += data[j][d_offset+o+i];
5094 }
5095 }
5096 }
5097 for (i=0; i < (n<<1); ++i) {
5098 FASTDEF(temp);
5099 int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5100 if ((unsigned int) (v + 32768) > 65535)
5101 v = v < 0 ? -32768 : 32767;
5102 output[o2+i] = v;
5103 }
5104 }
5105 }
5106
5107 static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
5108 {
5109 int i;
5110 if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5111 static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
5112 for (i=0; i < buf_c; ++i)
5113 compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
5114 } else {
5115 int limit = buf_c < data_c ? buf_c : data_c;
5116 for (i=0; i < limit; ++i)
5117 copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
5118 for ( ; i < buf_c; ++i)
5119 memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
5120 }
5121 }
5122
5123 int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
5124 {
5125 float **output;
5126 int len = stb_vorbis_get_frame_float(f, NULL, &output);
5127 if (len > num_samples) len = num_samples;
5128 if (len)
5129 convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
5130 return len;
5131 }
5132
5133 static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
5134 {
5135 int i;
5136 check_endianness();
5137 if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5138 assert(buf_c == 2);
5139 for (i=0; i < buf_c; ++i)
5140 compute_stereo_samples(buffer, data_c, data, d_offset, len);
5141 } else {
5142 int limit = buf_c < data_c ? buf_c : data_c;
5143 int j;
5144 for (j=0; j < len; ++j) {
5145 for (i=0; i < limit; ++i) {
5146 FASTDEF(temp);
5147 float f = data[i][d_offset+j];
5148 int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15);
5149 if ((unsigned int) (v + 32768) > 65535)
5150 v = v < 0 ? -32768 : 32767;
5151 *buffer++ = v;
5152 }
5153 for ( ; i < buf_c; ++i)
5154 *buffer++ = 0;
5155 }
5156 }
5157 }
5158
5159 int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
5160 {
5161 float **output;
5162 int len;
5163 if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
5164 len = stb_vorbis_get_frame_float(f, NULL, &output);
5165 if (len) {
5166 if (len*num_c > num_shorts) len = num_shorts / num_c;
5167 convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
5168 }
5169 return len;
5170 }
5171
5172 int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
5173 {
5174 float **outputs;
5175 int len = num_shorts / channels;
5176 int n=0;
5177 int z = f->channels;
5178 if (z > channels) z = channels;
5179 while (n < len) {
5180 int k = f->channel_buffer_end - f->channel_buffer_start;
5181 if (n+k >= len) k = len - n;
5182 if (k)
5183 convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5184 buffer += k*channels;
5185 n += k;
5186 f->channel_buffer_start += k;
5187 if (n == len) break;
5188 if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5189 }
5190 return n;
5191 }
5192
5193 int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
5194 {
5195 float **outputs;
5196 int n=0;
5197 int z = f->channels;
5198 if (z > channels) z = channels;
5199 while (n < len) {
5200 int k = f->channel_buffer_end - f->channel_buffer_start;
5201 if (n+k >= len) k = len - n;
5202 if (k)
5203 convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5204 n += k;
5205 f->channel_buffer_start += k;
5206 if (n == len) break;
5207 if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5208 }
5209 return n;
5210 }
5211
5212 #ifndef STB_VORBIS_NO_STDIO
5213 int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output)
5214 {
5215 int data_len, offset, total, limit, error;
5216 short *data;
5217 stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
5218 if (v == NULL) return -1;
5219 limit = v->channels * 4096;
5220 *channels = v->channels;
5221 if (sample_rate)
5222 *sample_rate = v->sample_rate;
5223 offset = data_len = 0;
5224 total = limit;
5225 data = (short *) malloc(total * sizeof(*data));
5226 if (data == NULL) {
5227 stb_vorbis_close(v);
5228 return -2;
5229 }
5230 for (;;) {
5231 int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5232 if (n == 0) break;
5233 data_len += n;
5234 offset += n * v->channels;
5235 if (offset + limit > total) {
5236 short *data2;
5237 total *= 2;
5238 data2 = (short *) realloc(data, total * sizeof(*data));
5239 if (data2 == NULL) {
5240 free(data);
5241 stb_vorbis_close(v);
5242 return -2;
5243 }
5244 data = data2;
5245 }
5246 }
5247 *output = data;
5248 stb_vorbis_close(v);
5249 return data_len;
5250 }
5251 #endif // NO_STDIO
5252
5253 int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output)
5254 {
5255 int data_len, offset, total, limit, error;
5256 short *data;
5257 stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
5258 if (v == NULL) return -1;
5259 limit = v->channels * 4096;
5260 *channels = v->channels;
5261 if (sample_rate)
5262 *sample_rate = v->sample_rate;
5263 offset = data_len = 0;
5264 total = limit;
5265 data = (short *) malloc(total * sizeof(*data));
5266 if (data == NULL) {
5267 stb_vorbis_close(v);
5268 return -2;
5269 }
5270 for (;;) {
5271 int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5272 if (n == 0) break;
5273 data_len += n;
5274 offset += n * v->channels;
5275 if (offset + limit > total) {
5276 short *data2;
5277 total *= 2;
5278 data2 = (short *) realloc(data, total * sizeof(*data));
5279 if (data2 == NULL) {
5280 free(data);
5281 stb_vorbis_close(v);
5282 return -2;
5283 }
5284 data = data2;
5285 }
5286 }
5287 *output = data;
5288 stb_vorbis_close(v);
5289 return data_len;
5290 }
5291 #endif // STB_VORBIS_NO_INTEGER_CONVERSION
5292
5293 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
5294 {
5295 float **outputs;
5296 int len = num_floats / channels;
5297 int n=0;
5298 int z = f->channels;
5299 if (z > channels) z = channels;
5300 while (n < len) {
5301 int i,j;
5302 int k = f->channel_buffer_end - f->channel_buffer_start;
5303 if (n+k >= len) k = len - n;
5304 for (j=0; j < k; ++j) {
5305 for (i=0; i < z; ++i)
5306 *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
5307 for ( ; i < channels; ++i)
5308 *buffer++ = 0;
5309 }
5310 n += k;
5311 f->channel_buffer_start += k;
5312 if (n == len)
5313 break;
5314 if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5315 break;
5316 }
5317 return n;
5318 }
5319
5320 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
5321 {
5322 float **outputs;
5323 int n=0;
5324 int z = f->channels;
5325 if (z > channels) z = channels;
5326 while (n < num_samples) {
5327 int i;
5328 int k = f->channel_buffer_end - f->channel_buffer_start;
5329 if (n+k >= num_samples) k = num_samples - n;
5330 if (k) {
5331 for (i=0; i < z; ++i)
5332 memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
5333 for ( ; i < channels; ++i)
5334 memset(buffer[i]+n, 0, sizeof(float) * k);
5335 }
5336 n += k;
5337 f->channel_buffer_start += k;
5338 if (n == num_samples)
5339 break;
5340 if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5341 break;
5342 }
5343 return n;
5344 }
5345 #endif // STB_VORBIS_NO_PULLDATA_API
5346
5347 /* Version history
5348 1.09 - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
5349 1.08 - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
5350 avoid discarding last frame of audio data
5351 1.07 - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
5352 some more crash fixes when out of memory or with corrupt files
5353 1.06 - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
5354 some crash fixes when out of memory or with corrupt files
5355 1.05 - 2015/04/19 - don't define __forceinline if it's redundant
5356 1.04 - 2014/08/27 - fix missing const-correct case in API
5357 1.03 - 2014/08/07 - Warning fixes
5358 1.02 - 2014/07/09 - Declare qsort compare function _cdecl on windows
5359 1.01 - 2014/06/18 - fix stb_vorbis_get_samples_float
5360 1.0 - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in multichannel
5361 (API change) report sample rate for decode-full-file funcs
5362 0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5363 0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5364 0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5365 0.99993 - remove assert that fired on legal files with empty tables
5366 0.99992 - rewind-to-start
5367 0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5368 0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5369 0.9998 - add a full-decode function with a memory source
5370 0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5371 0.9996 - query length of vorbis stream in samples/seconds
5372 0.9995 - bugfix to another optimization that only happened in certain files
5373 0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5374 0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5375 0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5376 0.9991 - performance improvement of IMDCT
5377 0.999 - (should have been 0.9990) performance improvement of IMDCT
5378 0.998 - no-CRT support from Casey Muratori
5379 0.997 - bugfixes for bugs found by Terje Mathisen
5380 0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5381 0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5382 0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5383 0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5384 0.992 - fixes for MinGW warning
5385 0.991 - turn fast-float-conversion on by default
5386 0.990 - fix push-mode seek recovery if you seek into the headers
5387 0.98b - fix to bad release of 0.98
5388 0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5389 0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5390 0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5391 0.95 - clamping code for 16-bit functions
5392 0.94 - not publically released
5393 0.93 - fixed all-zero-floor case (was decoding garbage)
5394 0.92 - fixed a memory leak
5395 0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5396 0.90 - first public release
5397 */
5398
5399 #endif // STB_VORBIS_HEADER_ONLY