RetroArch
stb_image.h
Go to the documentation of this file.
1 /* stb_image - v2.06 - public domain image loader - http://nothings.org/stb_image.h
2  no warranty implied; use at your own risk
3 
4  Do this:
5  #define STB_IMAGE_IMPLEMENTATION
6  before you include this file in *one* C or C++ file to create the implementation.
7 
8  // i.e. it should look like this:
9  #include ...
10  #include ...
11  #include ...
12  #define STB_IMAGE_IMPLEMENTATION
13  #include "stb_image.h"
14 
15  You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
16  And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
17 
18 
19  QUICK NOTES:
20  Primarily of interest to game developers and other people who can
21  avoid problematic images and only need the trivial interface
22 
23  JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
24  PNG 1/2/4/8-bit-per-channel (16 bpc not supported)
25 
26  TGA (not sure what subset, if a subset)
27  BMP non-1bpp, non-RLE
28  PSD (composited view only, no extra channels)
29 
30  GIF (*comp always reports as 4-channel)
31  HDR (radiance rgbE format)
32  PIC (Softimage PIC)
33  PNM (PPM and PGM binary only)
34 
35  - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
36  - decode from arbitrary I/O callbacks
37  - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
38 
39  Full documentation under "DOCUMENTATION" below.
40 
41 
42  Revision 2.00 release notes:
43 
44  - Progressive JPEG is now supported.
45 
46  - PPM and PGM binary formats are now supported, thanks to Ken Miller.
47 
48  - x86 platforms now make use of SSE2 SIMD instructions for
49  JPEG decoding, and ARM platforms can use NEON SIMD if requested.
50  This work was done by Fabian "ryg" Giesen. SSE2 is used by
51  default, but NEON must be enabled explicitly; see docs.
52 
53  With other JPEG optimizations included in this version, we see
54  2x speedup on a JPEG on an x86 machine, and a 1.5x speedup
55  on a JPEG on an ARM machine, relative to previous versions of this
56  library. The same results will not obtain for all JPGs and for all
57  x86/ARM machines. (Note that progressive JPEGs are significantly
58  slower to decode than regular JPEGs.) This doesn't mean that this
59  is the fastest JPEG decoder in the land; rather, it brings it
60  closer to parity with standard libraries. If you want the fastest
61  decode, look elsewhere. (See "Philosophy" section of docs below.)
62 
63  See final bullet items below for more info on SIMD.
64 
65  - Added STBI_MALLOC, STBI_REALLOC, and STBI_FREE macros for replacing
66  the memory allocator. Unlike other STBI libraries, these macros don't
67  support a context parameter, so if you need to pass a context in to
68  the allocator, you'll have to store it in a global or a thread-local
69  variable.
70 
71  - Split existing STBI_NO_HDR flag into two flags, STBI_NO_HDR and
72  STBI_NO_LINEAR.
73  STBI_NO_HDR: suppress implementation of .hdr reader format
74  STBI_NO_LINEAR: suppress high-dynamic-range light-linear float API
75 
76  - You can suppress implementation of any of the decoders to reduce
77  your code footprint by #defining one or more of the following
78  symbols before creating the implementation.
79 
80  STBI_NO_JPEG
81  STBI_NO_PNG
82  STBI_NO_BMP
83  STBI_NO_PSD
84  STBI_NO_TGA
85  STBI_NO_GIF
86  STBI_NO_HDR
87  STBI_NO_PIC
88  STBI_NO_PNM (.ppm and .pgm)
89 
90  - You can request *only* certain decoders and suppress all other ones
91  (this will be more forward-compatible, as addition of new decoders
92  doesn't require you to disable them explicitly):
93 
94  STBI_ONLY_JPEG
95  STBI_ONLY_PNG
96  STBI_ONLY_BMP
97  STBI_ONLY_PSD
98  STBI_ONLY_TGA
99  STBI_ONLY_GIF
100  STBI_ONLY_HDR
101  STBI_ONLY_PIC
102  STBI_ONLY_PNM (.ppm and .pgm)
103 
104  Note that you can define multiples of these, and you will get all
105  of them ("only x" and "only y" is interpreted to mean "only x&y").
106 
107  - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
108  want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
109 
110  - Compilation of all SIMD code can be suppressed with
111  #define STBI_NO_SIMD
112  It should not be necessary to disable SIMD unless you have issues
113  compiling (e.g. using an x86 compiler which doesn't support SSE
114  intrinsics or that doesn't support the method used to detect
115  SSE2 support at run-time), and even those can be reported as
116  bugs so I can refine the built-in compile-time checking to be
117  smarter.
118 
119  - The old STBI_SIMD system which allowed installing a user-defined
120  IDCT etc. has been removed. If you need this, don't upgrade. My
121  assumption is that almost nobody was doing this, and those who
122  were will find the built-in SIMD more satisfactory anyway.
123 
124  - RGB values computed for JPEG images are slightly different from
125  previous versions of stb_image. (This is due to using less
126  integer precision in SIMD.) The C code has been adjusted so
127  that the same RGB values will be computed regardless of whether
128  SIMD support is available, so your app should always produce
129  consistent results. But these results are slightly different from
130  previous versions. (Specifically, about 3% of available YCbCr values
131  will compute different RGB results from pre-1.49 versions by +-1;
132  most of the deviating values are one smaller in the G channel.)
133 
134  - If you must produce consistent results with previous versions of
135  stb_image, #define STBI_JPEG_OLD and you will get the same results
136  you used to; however, you will not get the SIMD speedups for
137  the YCbCr-to-RGB conversion step (although you should still see
138  significant JPEG speedup from the other changes).
139 
140  Please note that STBI_JPEG_OLD is a temporary feature; it will be
141  removed in future versions of the library. It is only intended for
142  near-term back-compatibility use.
143 
144 
145  Latest revision history:
146  2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value
147  2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning
148  2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit
149  2.03 (2015-04-12) additional corruption checking
150  stbi_set_flip_vertically_on_load
151  fix NEON support; fix mingw support
152  2.02 (2015-01-19) fix incorrect assert, fix warning
153  2.01 (2015-01-17) fix various warnings
154  2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
155  2.00 (2014-12-25) optimize JPEG, including x86 SSE2 & ARM NEON SIMD
156  progressive JPEG
157  PGM/PPM support
158  STBI_MALLOC,STBI_REALLOC,STBI_FREE
159  STBI_NO_*, STBI_ONLY_*
160  GIF bugfix
161  1.48 (2014-12-14) fix incorrectly-named assert()
162  1.47 (2014-12-14) 1/2/4-bit PNG support (both grayscale and paletted)
163  optimize PNG
164  fix bug in interlaced PNG with user-specified channel count
165 
166  See end of file for full revision history.
167 
168 
169  ============================ Contributors =========================
170 
171  Image formats Bug fixes & warning fixes
172  Sean Barrett (jpeg, png, bmp) Marc LeBlanc
173  Nicolas Schulz (hdr, psd) Christpher Lloyd
174  Jonathan Dummer (tga) Dave Moore
175  Jean-Marc Lienher (gif) Won Chun
176  Tom Seddon (pic) the Horde3D community
177  Thatcher Ulrich (psd) Janez Zemva
178  Ken Miller (pgm, ppm) Jonathan Blow
179  Laurent Gomila
180  Aruelien Pocheville
181  Extensions, features Ryamond Barbiero
182  Jetro Lauha (stbi_info) David Woo
183  Martin "SpartanJ" Golini (stbi_info) Martin Golini
184  James "moose2000" Brown (iPhone PNG) Roy Eltham
185  Ben "Disch" Wenger (io callbacks) Luke Graham
186  Omar Cornut (1/2/4-bit PNG) Thomas Ruf
187  Nicolas Guillemot (vertical flip) John Bartholomew
188  Ken Hamada
189  Optimizations & bugfixes Cort Stratton
190  Fabian "ryg" Giesen Blazej Dariusz Roszkowski
191  Arseny Kapoulkine Thibault Reuille
192  Paul Du Bois
193  Guillaume George
194  If your name should be here but Jerry Jansson
195  isn't, let Sean know. Hayaki Saito
196  Johan Duparc
197  Ronny Chevalier
198  Michal Cichon
199  Tero Hanninen
200  Sergio Gonzalez
201  Cass Everitt
202  Engin Manap
203  Martins Mozeiko
204  Joseph Thomson
205  Phil Jordan
206 
207 License:
208  This software is in the public domain. Where that dedication is not
209  recognized, you are granted a perpetual, irrevocable license to copy
210  and modify this file however you want.
211 
212 */
213 
214 #ifndef STBI_INCLUDE_STB_IMAGE_H
215 #define STBI_INCLUDE_STB_IMAGE_H
216 
217 // DOCUMENTATION
218 //
219 // Limitations:
220 // - no 16-bit-per-channel PNG
221 // - no 12-bit-per-channel JPEG
222 // - no JPEGs with arithmetic coding
223 // - no 1-bit BMP
224 // - GIF always returns *comp=4
225 //
226 // Basic usage (see HDR discussion below for HDR usage):
227 // int x,y,n;
228 // unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
229 // // ... process data if not NULL ...
230 // // ... x = width, y = height, n = # 8-bit components per pixel ...
231 // // ... replace '0' with '1'..'4' to force that many components per pixel
232 // // ... but 'n' will always be the number that it would have been if you said 0
233 // stbi_image_free(data)
234 //
235 // Standard parameters:
236 // int *x -- outputs image width in pixels
237 // int *y -- outputs image height in pixels
238 // int *comp -- outputs # of image components in image file
239 // int req_comp -- if non-zero, # of image components requested in result
240 //
241 // The return value from an image loader is an 'unsigned char *' which points
242 // to the pixel data, or NULL on an allocation failure or if the image is
243 // corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
244 // with each pixel consisting of N interleaved 8-bit components; the first
245 // pixel pointed to is top-left-most in the image. There is no padding between
246 // image scanlines or between pixels, regardless of format. The number of
247 // components N is 'req_comp' if req_comp is non-zero, or *comp otherwise.
248 // If req_comp is non-zero, *comp has the number of components that _would_
249 // have been output otherwise. E.g. if you set req_comp to 4, you will always
250 // get RGBA output, but you can check *comp to see if it's trivially opaque
251 // because e.g. there were only 3 channels in the source image.
252 //
253 // An output image with N components has the following components interleaved
254 // in this order in each pixel:
255 //
256 // N=#comp components
257 // 1 grey
258 // 2 grey, alpha
259 // 3 red, green, blue
260 // 4 red, green, blue, alpha
261 //
262 // If image loading fails for any reason, the return value will be NULL,
263 // and *x, *y, *comp will be unchanged. The function stbi_failure_reason()
264 // can be queried for an extremely brief, end-user unfriendly explanation
265 // of why the load failed. Define STBI_NO_FAILURE_STRINGS to avoid
266 // compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
267 // more user-friendly ones.
268 //
269 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
270 //
271 // ===========================================================================
272 //
273 // Philosophy
274 //
275 // stb libraries are designed with the following priorities:
276 //
277 // 1. easy to use
278 // 2. easy to maintain
279 // 3. good performance
280 //
281 // Sometimes I let "good performance" creep up in priority over "easy to maintain",
282 // and for best performance I may provide less-easy-to-use APIs that give higher
283 // performance, in addition to the easy to use ones. Nevertheless, it's important
284 // to keep in mind that from the standpoint of you, a client of this library,
285 // all you care about is #1 and #3, and stb libraries do not emphasize #3 above all.
286 //
287 // Some secondary priorities arise directly from the first two, some of which
288 // make more explicit reasons why performance can't be emphasized.
289 //
290 // - Portable ("ease of use")
291 // - Small footprint ("easy to maintain")
292 // - No dependencies ("ease of use")
293 //
294 // ===========================================================================
295 //
296 // I/O callbacks
297 //
298 // I/O callbacks allow you to read from arbitrary sources, like packaged
299 // files or some other source. Data read from callbacks are processed
300 // through a small internal buffer (currently 128 bytes) to try to reduce
301 // overhead.
302 //
303 // The three functions you must define are "read" (reads some bytes of data),
304 // "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
305 //
306 // ===========================================================================
307 //
308 // SIMD support
309 //
310 // The JPEG decoder will try to automatically use SIMD kernels on x86 when
311 // supported by the compiler. For ARM Neon support, you must explicitly
312 // request it.
313 //
314 // (The old do-it-yourself SIMD API is no longer supported in the current
315 // code.)
316 //
317 // On x86, SSE2 will automatically be used when available based on a run-time
318 // test; if not, the generic C versions are used as a fall-back. On ARM targets,
319 // the typical path is to have separate builds for NEON and non-NEON devices
320 // (at least this is true for iOS and Android). Therefore, the NEON support is
321 // toggled by a build flag: define STBI_NEON to get NEON loops.
322 //
323 // The output of the JPEG decoder is slightly different from versions where
324 // SIMD support was introduced (that is, for versions before 1.49). The
325 // difference is only +-1 in the 8-bit RGB channels, and only on a small
326 // fraction of pixels. You can force the pre-1.49 behavior by defining
327 // STBI_JPEG_OLD, but this will disable some of the SIMD decoding path
328 // and hence cost some performance.
329 //
330 // If for some reason you do not want to use any of SIMD code, or if
331 // you have issues compiling it, you can disable it entirely by
332 // defining STBI_NO_SIMD.
333 //
334 // ===========================================================================
335 //
336 // HDR image support (disable by defining STBI_NO_HDR)
337 //
338 // stb_image now supports loading HDR images in general, and currently
339 // the Radiance .HDR file format, although the support is provided
340 // generically. You can still load any file through the existing interface;
341 // if you attempt to load an HDR file, it will be automatically remapped to
342 // LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
343 // both of these constants can be reconfigured through this interface:
344 //
345 // stbi_hdr_to_ldr_gamma(2.2f);
346 // stbi_hdr_to_ldr_scale(1.0f);
347 //
348 // (note, do not use _inverse_ constants; stbi_image will invert them
349 // appropriately).
350 //
351 // Additionally, there is a new, parallel interface for loading files as
352 // (linear) floats to preserve the full dynamic range:
353 //
354 // float *data = stbi_loadf(filename, &x, &y, &n, 0);
355 //
356 // If you load LDR images through this interface, those images will
357 // be promoted to floating point values, run through the inverse of
358 // constants corresponding to the above:
359 //
360 // stbi_ldr_to_hdr_scale(1.0f);
361 // stbi_ldr_to_hdr_gamma(2.2f);
362 //
363 // Finally, given a filename (or an open file or memory block--see header
364 // file for details) containing image data, you can query for the "most
365 // appropriate" interface to use (that is, whether the image is HDR or
366 // not), using:
367 //
368 // stbi_is_hdr(char *filename);
369 //
370 // ===========================================================================
371 //
372 // iPhone PNG support:
373 //
374 // By default we convert iphone-formatted PNGs back to RGB, even though
375 // they are internally encoded differently. You can disable this conversion
376 // by by calling stbi_convert_iphone_png_to_rgb(0), in which case
377 // you will always just get the native iphone "format" through (which
378 // is BGR stored in RGB).
379 //
380 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
381 // pixel to remove any premultiplied alpha *only* if the image file explicitly
382 // says there's premultiplied data (currently only happens in iPhone images,
383 // and only if iPhone convert-to-rgb processing is on).
384 //
385 
386 
387 #ifndef STBI_NO_STDIO
388 #include <stdio.h>
389 #endif /* STBI_NO_STDIO */
390 
391 #define STBI_VERSION 1
392 
393 enum
394 {
395  STBI_default = 0, /* only used for req_comp */
398  STBI_rgb = 3,
400 };
401 
402 typedef unsigned char stbi_uc;
403 
404 #ifdef __cplusplus
405 extern "C" {
406 #endif
407 
408 #ifdef STB_IMAGE_STATIC
409 #define STBIDEF static
410 #else
411 #define STBIDEF extern
412 #endif
413 
415 //
416 // PRIMARY API - works on images of any type
417 //
418 
419 /* load image by filename, open file, or memory buffer */
420 
421 typedef struct
422 {
423  int (*read) (void *user,char *data,int size); /* fill 'data' with 'size' bytes. return number of bytes actually read */
424  void (*skip) (void *user,int n); /* skip the next 'n' bytes, or 'unget' the last -n bytes if negative */
425  int (*eof) (void *user); /* returns nonzero if we are at end of file/data */
427 
428 STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *comp, int req_comp);
429 STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *comp, int req_comp);
430 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *comp, int req_comp);
431 
432 #ifndef STBI_NO_STDIO
433 STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp);
434 /* for stbi_load_from_file, file pointer is left pointing immediately after image */
435 #endif
436 
437 #ifndef STBI_NO_LINEAR
438  STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *comp, int req_comp);
439  STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
440  STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp);
441 
442  #ifndef STBI_NO_STDIO
443  STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *comp, int req_comp);
444  #endif
445 #endif
446 
447 #ifndef STBI_NO_HDR
448  STBIDEF void stbi_hdr_to_ldr_gamma(float gamma);
449  STBIDEF void stbi_hdr_to_ldr_scale(float scale);
450 #endif
451 
452 #ifndef STBI_NO_LINEAR
453  STBIDEF void stbi_ldr_to_hdr_gamma(float gamma);
454  STBIDEF void stbi_ldr_to_hdr_scale(float scale);
455 #endif /* STBI_NO_HDR */
456 
457 /* stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR */
458 STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
460 #ifndef STBI_NO_STDIO
461 STBIDEF int stbi_is_hdr (char const *filename);
463 #endif /* STBI_NO_STDIO */
464 
465 
466 /* get a VERY brief reason for failure
467  * NOT THREADSAFE */
468 STBIDEF const char *stbi_failure_reason (void);
469 
470 /* free the loaded image -- this is just free() */
471 STBIDEF void stbi_image_free (void *retval_from_stbi_load);
472 
473 /* get image dimensions & components without fully decoding */
474 STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
475 STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
476 
477 #ifndef STBI_NO_STDIO
478 STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp);
479 STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp);
480 
481 #endif
482 
483 // for image formats that explicitly notate that they have premultiplied alpha,
484 // we just return the colors as stored in the file. set this flag to force
485 // unpremultiplication. results are undefined if the unpremultiply overflow.
486 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
487 
488 // indicate whether we should process iphone images back to canonical format,
489 // or just pass them through "as-is"
490 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
491 
492 /* flip the image vertically, so the first pixel in the output array is the bottom left */
493 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
494 
495 /* ZLIB client - used by PNG, available for other purposes */
496 
497 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
498 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
499 STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
500 STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
501 
502 STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
503 STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
504 
505 
506 #ifdef __cplusplus
507 }
508 #endif
509 
511 #endif /* STBI_INCLUDE_STB_IMAGE_H */
512 
513 #ifdef STB_IMAGE_IMPLEMENTATION
514 
515 #if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
516  || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
517  || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
518  || defined(STBI_ONLY_ZLIB)
519  #ifndef STBI_ONLY_JPEG
520  #define STBI_NO_JPEG
521  #endif
522  #ifndef STBI_ONLY_PNG
523  #define STBI_NO_PNG
524  #endif
525  #ifndef STBI_ONLY_BMP
526  #define STBI_NO_BMP
527  #endif
528  #ifndef STBI_ONLY_PSD
529  #define STBI_NO_PSD
530  #endif
531  #ifndef STBI_ONLY_TGA
532  #define STBI_NO_TGA
533  #endif
534  #ifndef STBI_ONLY_GIF
535  #define STBI_NO_GIF
536  #endif
537  #ifndef STBI_ONLY_HDR
538  #define STBI_NO_HDR
539  #endif
540  #ifndef STBI_ONLY_PIC
541  #define STBI_NO_PIC
542  #endif
543  #ifndef STBI_ONLY_PNM
544  #define STBI_NO_PNM
545  #endif
546 #endif
547 
548 #if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
549 #define STBI_NO_ZLIB
550 #endif
551 
552 
553 #include <stdarg.h>
554 #include <stddef.h> /* ptrdiff_t on osx */
555 #include <stdlib.h>
556 #include <string.h>
557 
558 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
559 #include <math.h> /* ldexp */
560 #endif
561 
562 #ifndef STBI_NO_STDIO
563 #include <stdio.h>
564 #endif
565 
566 #ifndef STBI_ASSERT
567 #include <assert.h>
568 #define STBI_ASSERT(x) assert(x)
569 #endif
570 
571 
572 #ifndef _MSC_VER
573  #ifdef __cplusplus
574  #define stbi_inline inline
575  #else
576  #define stbi_inline
577  #endif
578 #else
579  #define stbi_inline __forceinline
580 #endif
581 
582 
583 #ifdef _MSC_VER
584 typedef unsigned short stbi__uint16;
585 typedef signed short stbi__int16;
586 typedef unsigned int stbi__uint32;
587 typedef signed int stbi__int32;
588 #else
589 #include <stdint.h>
590 typedef uint16_t stbi__uint16;
591 typedef int16_t stbi__int16;
592 typedef uint32_t stbi__uint32;
593 typedef int32_t stbi__int32;
594 #endif
595 
596 /* should produce compiler error if size is wrong */
597 typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
598 
599 #ifdef _MSC_VER
600 #define STBI_NOTUSED(v) (void)(v)
601 #else
602 #define STBI_NOTUSED(v) (void)sizeof(v)
603 #endif
604 
605 #ifdef _MSC_VER
606 #define STBI_HAS_LROTL
607 #endif
608 
609 #ifdef STBI_HAS_LROTL
610  #define stbi_lrot(x,y) _lrotl(x,y)
611 #else
612  #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (32 - (y))))
613 #endif
614 
615 #if defined(STBI_MALLOC) && defined(STBI_FREE) && defined(STBI_REALLOC)
616 // ok
617 #elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC)
618 // ok
619 #else
620 #error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC."
621 #endif
622 
623 #ifndef STBI_MALLOC
624 #define STBI_MALLOC(sz) malloc(sz)
625 #define STBI_REALLOC(p,sz) realloc(p,sz)
626 #define STBI_FREE(p) free(p)
627 #endif
628 
629 // x86/x64 detection
630 #if defined(__x86_64__) || defined(_M_X64)
631 #define STBI__X64_TARGET
632 #elif defined(__i386) || defined(_M_IX86)
633 #define STBI__X86_TARGET
634 #endif
635 
636 #if defined(__GNUC__) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
637 /* NOTE: not clear do we actually need this for the 64-bit path?
638  * gcc doesn't support sse2 intrinsics unless you compile with -msse2,
639  * (but compiling with -msse2 allows the compiler to use SSE2 everywhere;
640  * this is just broken and gcc are jerks for not fixing it properly
641  * http://www.virtualdub.org/blog/pivot/entry.php?id=363 )
642  */
643 #define STBI_NO_SIMD
644 #endif
645 
646 #if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
647 /* Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
648  *
649  * 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
650  * Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
651  * As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
652  * simultaneously enabling "-mstackrealign".
653  *
654  * See https://github.com/nothings/stb/issues/81 for more information.
655  *
656  * So default to no SSE2 on 32-bit MinGW. If you've read this far and added
657  * -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
658  */
659 #define STBI_NO_SIMD
660 #endif
661 
662 #if !defined(STBI_NO_SIMD) && defined(STBI__X86_TARGET)
663 #define STBI_SSE2
664 #include <emmintrin.h>
665 
666 #ifdef _MSC_VER
667 
668 #if _MSC_VER >= 1400 /* not VC6 */
669 #include <intrin.h> /* __cpuid */
670 static int stbi__cpuid3(void)
671 {
672  int info[4];
673  __cpuid(info,1);
674  return info[3];
675 }
676 #else
677 static int stbi__cpuid3(void)
678 {
679  int res;
680  __asm {
681  mov eax,1
682  cpuid
683  mov res,edx
684  }
685  return res;
686 }
687 #endif
688 
689 #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
690 
691 static int stbi__sse2_available()
692 {
693  int info3 = stbi__cpuid3();
694  return ((info3 >> 26) & 1) != 0;
695 }
696 #else /* assume GCC-style if not VC++ */
697 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
698 
699 static int stbi__sse2_available()
700 {
701 #if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 /* GCC 4.8 or later */
702  /* GCC 4.8+ has a nice way to do this */
703  return __builtin_cpu_supports("sse2");
704 #else
705  /* portable way to do this, preferably without using GCC inline ASM?
706  * just bail for now. */
707  return 0;
708 #endif
709 }
710 #endif
711 #endif
712 
713 /* ARM NEON */
714 #if defined(STBI_NO_SIMD) && defined(STBI_NEON)
715 #undef STBI_NEON
716 #endif
717 
718 #ifdef STBI_NEON
719 #include <arm_neon.h>
720 /* assume GCC or Clang on ARM targets */
721 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
722 #endif
723 
724 #ifndef STBI_SIMD_ALIGN
725 #define STBI_SIMD_ALIGN(type, name) type name
726 #endif
727 
729 //
730 // stbi__context struct and start_xxx functions
731 
732 // stbi__context structure is our basic context used by all images, so it
733 // contains all the IO context, plus some basic image information
734 typedef struct
735 {
736  stbi__uint32 img_x, img_y;
737  int img_n, img_out_n;
738 
740  void *io_user_data;
741 
742  int read_from_callbacks;
743  int buflen;
744  stbi_uc buffer_start[128];
745 
746  stbi_uc *img_buffer, *img_buffer_end;
747  stbi_uc *img_buffer_original;
748 } stbi__context;
749 
750 
751 static void stbi__refill_buffer(stbi__context *s);
752 
753 // initialize a memory-decode context
754 static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
755 {
756  s->io.read = NULL;
757  s->read_from_callbacks = 0;
758  s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
759  s->img_buffer_end = (stbi_uc *) buffer+len;
760 }
761 
762 // initialize a callback-based context
763 static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
764 {
765  s->io = *c;
766  s->io_user_data = user;
767  s->buflen = sizeof(s->buffer_start);
768  s->read_from_callbacks = 1;
769  s->img_buffer_original = s->buffer_start;
770  stbi__refill_buffer(s);
771 }
772 
773 #ifndef STBI_NO_STDIO
774 
775 static int stbi__stdio_read(void *user, char *data, int size)
776 {
777  return (int) fread(data,1,size,(FILE*) user);
778 }
779 
780 static void stbi__stdio_skip(void *user, int n)
781 {
782  fseek((FILE*) user, n, SEEK_CUR);
783 }
784 
785 static int stbi__stdio_eof(void *user)
786 {
787  return feof((FILE*) user);
788 }
789 
790 static stbi_io_callbacks stbi__stdio_callbacks =
791 {
792  stbi__stdio_read,
793  stbi__stdio_skip,
794  stbi__stdio_eof,
795 };
796 
797 static void stbi__start_file(stbi__context *s, FILE *f)
798 {
799  stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
800 }
801 
802 #endif /* !STBI_NO_STDIO */
803 
804 static void stbi__rewind(stbi__context *s)
805 {
806  /* conceptually rewind SHOULD rewind to the beginning of the stream,
807  * but we just rewind to the beginning of the initial buffer, because
808  * we only use it after doing 'test', which only ever looks at at most 92 bytes
809  */
810  s->img_buffer = s->img_buffer_original;
811 }
812 
813 #ifndef STBI_NO_JPEG
814 static int stbi__jpeg_test(stbi__context *s);
815 static stbi_uc *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
816 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
817 #endif
818 
819 #ifndef STBI_NO_PNG
820 static int stbi__png_test(stbi__context *s);
821 static stbi_uc *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
822 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
823 #endif
824 
825 #ifndef STBI_NO_BMP
826 static int stbi__bmp_test(stbi__context *s);
827 static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
828 static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
829 #endif
830 
831 #ifndef STBI_NO_TGA
832 static int stbi__tga_test(stbi__context *s);
833 static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
834 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
835 #endif
836 
837 #ifndef STBI_NO_PSD
838 static int stbi__psd_test(stbi__context *s);
839 static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
840 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
841 #endif
842 
843 #ifndef STBI_NO_HDR
844 static int stbi__hdr_test(stbi__context *s);
845 static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
846 static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
847 #endif
848 
849 #ifndef STBI_NO_PIC
850 static int stbi__pic_test(stbi__context *s);
851 static stbi_uc *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
852 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
853 #endif
854 
855 #ifndef STBI_NO_GIF
856 static int stbi__gif_test(stbi__context *s);
857 static stbi_uc *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
858 static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
859 #endif
860 
861 #ifndef STBI_NO_PNM
862 static int stbi__pnm_test(stbi__context *s);
863 static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
864 static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
865 #endif
866 
867 // this is not threadsafe
868 static const char *stbi__g_failure_reason;
869 
870 STBIDEF const char *stbi_failure_reason(void)
871 {
872  return stbi__g_failure_reason;
873 }
874 
875 static int stbi__err(const char *str)
876 {
877  stbi__g_failure_reason = str;
878  return 0;
879 }
880 
881 static void *stbi__malloc(size_t size)
882 {
883  return STBI_MALLOC(size);
884 }
885 
886 // stbi__err - error
887 // stbi__errpf - error returning pointer to float
888 // stbi__errpuc - error returning pointer to unsigned char
889 
890 #ifdef STBI_NO_FAILURE_STRINGS
891  #define stbi__err(x,y) 0
892 #elif defined(STBI_FAILURE_USERMSG)
893  #define stbi__err(x,y) stbi__err(y)
894 #else
895  #define stbi__err(x,y) stbi__err(x)
896 #endif
897 
898 #define stbi__errpf(x,y) ((float *) (stbi__err(x,y)?NULL:NULL))
899 #define stbi__errpuc(x,y) ((unsigned char *) (stbi__err(x,y)?NULL:NULL))
900 
901 STBIDEF void stbi_image_free(void *retval_from_stbi_load)
902 {
903  STBI_FREE(retval_from_stbi_load);
904 }
905 
906 #ifndef STBI_NO_LINEAR
907 static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
908 #endif
909 
910 #ifndef STBI_NO_HDR
911 static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp);
912 #endif
913 
914 static int stbi__vertically_flip_on_load = 0;
915 
916 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
917 {
918  stbi__vertically_flip_on_load = flag_true_if_should_flip;
919 }
920 
921 static unsigned char *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
922 {
923  #ifndef STBI_NO_JPEG
924  if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp);
925  #endif
926  #ifndef STBI_NO_PNG
927  if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp);
928  #endif
929  #ifndef STBI_NO_BMP
930  if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp);
931  #endif
932  #ifndef STBI_NO_GIF
933  if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp);
934  #endif
935  #ifndef STBI_NO_PSD
936  if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp);
937  #endif
938  #ifndef STBI_NO_PIC
939  if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp);
940  #endif
941  #ifndef STBI_NO_PNM
942  if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp);
943  #endif
944 
945  #ifndef STBI_NO_HDR
946  if (stbi__hdr_test(s)) {
947  float *hdr = stbi__hdr_load(s, x,y,comp,req_comp);
948  return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
949  }
950  #endif
951 
952  #ifndef STBI_NO_TGA
953  // test tga last because it's a crappy test!
954  if (stbi__tga_test(s))
955  return stbi__tga_load(s,x,y,comp,req_comp);
956  #endif
957 
958  return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
959 }
960 
961 static unsigned char *stbi__load_flip(stbi__context *s, int *x, int *y, int *comp, int req_comp)
962 {
963  unsigned char *result = stbi__load_main(s, x, y, comp, req_comp);
964 
965  if (stbi__vertically_flip_on_load && result != NULL) {
966  int w = *x, h = *y;
967  int depth = req_comp ? req_comp : *comp;
968  int row,col,z;
969  stbi_uc temp;
970 
971  // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once
972  for (row = 0; row < (h>>1); row++) {
973  for (col = 0; col < w; col++) {
974  for (z = 0; z < depth; z++) {
975  temp = result[(row * w + col) * depth + z];
976  result[(row * w + col) * depth + z] = result[((h - row - 1) * w + col) * depth + z];
977  result[((h - row - 1) * w + col) * depth + z] = temp;
978  }
979  }
980  }
981  }
982 
983  return result;
984 }
985 
986 #ifndef STBI_NO_HDR
987 static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
988 {
989  if (stbi__vertically_flip_on_load && result != NULL) {
990  int w = *x, h = *y;
991  int depth = req_comp ? req_comp : *comp;
992  int row,col,z;
993  float temp;
994 
995  // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once
996  for (row = 0; row < (h>>1); row++) {
997  for (col = 0; col < w; col++) {
998  for (z = 0; z < depth; z++) {
999  temp = result[(row * w + col) * depth + z];
1000  result[(row * w + col) * depth + z] = result[((h - row - 1) * w + col) * depth + z];
1001  result[((h - row - 1) * w + col) * depth + z] = temp;
1002  }
1003  }
1004  }
1005  }
1006 }
1007 #endif
1008 
1009 
1010 #ifndef STBI_NO_STDIO
1011 
1012 static FILE *stbi__fopen(char const *filename, char const *mode)
1013 {
1014  FILE *f;
1015 #if defined(_MSC_VER) && _MSC_VER >= 1400
1016  if (0 != fopen_s(&f, filename, mode))
1017  f=0;
1018 #else
1019  f = fopen(filename, mode);
1020 #endif
1021  return f;
1022 }
1023 
1024 
1025 STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1026 {
1027  FILE *f = stbi__fopen(filename, "rb");
1028  unsigned char *result;
1029  if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
1030  result = stbi_load_from_file(f,x,y,comp,req_comp);
1031  fclose(f);
1032  return result;
1033 }
1034 
1035 STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1036 {
1037  unsigned char *result;
1038  stbi__context s;
1039  stbi__start_file(&s,f);
1040  result = stbi__load_flip(&s,x,y,comp,req_comp);
1041  if (result) {
1042  /* need to 'unget' all the characters in the IO buffer */
1043  fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1044  }
1045  return result;
1046 }
1047 #endif /* !STBI_NO_STDIO */
1048 
1049 STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1050 {
1051  stbi__context s;
1052  stbi__start_mem(&s,buffer,len);
1053  return stbi__load_flip(&s,x,y,comp,req_comp);
1054 }
1055 
1056 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1057 {
1058  stbi__context s;
1059  stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1060  return stbi__load_flip(&s,x,y,comp,req_comp);
1061 }
1062 
1063 #ifndef STBI_NO_LINEAR
1064 static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1065 {
1066  unsigned char *data;
1067  #ifndef STBI_NO_HDR
1068  if (stbi__hdr_test(s)) {
1069  float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp);
1070  if (hdr_data)
1071  stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
1072  return hdr_data;
1073  }
1074  #endif
1075  data = stbi__load_flip(s, x, y, comp, req_comp);
1076  if (data)
1077  return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
1078  return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
1079 }
1080 
1081 STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1082 {
1083  stbi__context s;
1084  stbi__start_mem(&s,buffer,len);
1085  return stbi__loadf_main(&s,x,y,comp,req_comp);
1086 }
1087 
1088 STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1089 {
1090  stbi__context s;
1091  stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1092  return stbi__loadf_main(&s,x,y,comp,req_comp);
1093 }
1094 
1095 #ifndef STBI_NO_STDIO
1096 STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
1097 {
1098  float *result;
1099  FILE *f = stbi__fopen(filename, "rb");
1100  if (!f) return stbi__errpf("can't fopen", "Unable to open file");
1101  result = stbi_loadf_from_file(f,x,y,comp,req_comp);
1102  fclose(f);
1103  return result;
1104 }
1105 
1106 STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1107 {
1108  stbi__context s;
1109  stbi__start_file(&s,f);
1110  return stbi__loadf_main(&s,x,y,comp,req_comp);
1111 }
1112 #endif // !STBI_NO_STDIO
1113 
1114 #endif // !STBI_NO_LINEAR
1115 
1116 // these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
1117 // defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
1118 // reports false!
1119 
1121 {
1122  #ifndef STBI_NO_HDR
1123  stbi__context s;
1124  stbi__start_mem(&s,buffer,len);
1125  return stbi__hdr_test(&s);
1126  #else
1127  STBI_NOTUSED(buffer);
1128  STBI_NOTUSED(len);
1129  return 0;
1130  #endif
1131 }
1132 
1133 #ifndef STBI_NO_STDIO
1134 STBIDEF int stbi_is_hdr (char const *filename)
1135 {
1136  FILE *f = stbi__fopen(filename, "rb");
1137  int result=0;
1138  if (f) {
1140  fclose(f);
1141  }
1142  return result;
1143 }
1144 
1146 {
1147  #ifndef STBI_NO_HDR
1148  stbi__context s;
1149  stbi__start_file(&s,f);
1150  return stbi__hdr_test(&s);
1151  #else
1152  return 0;
1153  #endif
1154 }
1155 #endif // !STBI_NO_STDIO
1156 
1157 STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
1158 {
1159  #ifndef STBI_NO_HDR
1160  stbi__context s;
1161  stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1162  return stbi__hdr_test(&s);
1163  #else
1164  return 0;
1165  #endif
1166 }
1167 
1168 static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
1169 static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
1170 
1171 #ifndef STBI_NO_LINEAR
1172 STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
1173 STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
1174 #endif
1175 
1176 /* forward declarations */
1177 STBIDEF void stbi_hdr_to_ldr_gamma(float gamma);
1178 STBIDEF void stbi_hdr_to_ldr_scale(float scale);
1179 
1180 STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
1181 STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
1182 
1183 
1185 //
1186 // Common code used by all image loaders
1187 //
1188 
1189 enum
1190 {
1191  STBI__SCAN_load=0,
1192  STBI__SCAN_type,
1193  STBI__SCAN_header
1194 };
1195 
1196 static void stbi__refill_buffer(stbi__context *s)
1197 {
1198  int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
1199  if (n == 0) {
1200  // at end of file, treat same as if from memory, but need to handle case
1201  // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
1202  s->read_from_callbacks = 0;
1203  s->img_buffer = s->buffer_start;
1204  s->img_buffer_end = s->buffer_start+1;
1205  *s->img_buffer = 0;
1206  } else {
1207  s->img_buffer = s->buffer_start;
1208  s->img_buffer_end = s->buffer_start + n;
1209  }
1210 }
1211 
1212 stbi_inline static stbi_uc stbi__get8(stbi__context *s)
1213 {
1214  if (s->img_buffer < s->img_buffer_end)
1215  return *s->img_buffer++;
1216  if (s->read_from_callbacks) {
1217  stbi__refill_buffer(s);
1218  return *s->img_buffer++;
1219  }
1220  return 0;
1221 }
1222 
1223 stbi_inline static int stbi__at_eof(stbi__context *s)
1224 {
1225  if (s->io.read) {
1226  if (!(s->io.eof)(s->io_user_data)) return 0;
1227  // if feof() is true, check if buffer = end
1228  // special case: we've only got the special 0 character at the end
1229  if (s->read_from_callbacks == 0) return 1;
1230  }
1231 
1232  return s->img_buffer >= s->img_buffer_end;
1233 }
1234 
1235 static void stbi__skip(stbi__context *s, int n)
1236 {
1237  if (n < 0) {
1238  s->img_buffer = s->img_buffer_end;
1239  return;
1240  }
1241  if (s->io.read) {
1242  int blen = (int) (s->img_buffer_end - s->img_buffer);
1243  if (blen < n) {
1244  s->img_buffer = s->img_buffer_end;
1245  (s->io.skip)(s->io_user_data, n - blen);
1246  return;
1247  }
1248  }
1249  s->img_buffer += n;
1250 }
1251 
1252 static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
1253 {
1254  if (s->io.read) {
1255  int blen = (int) (s->img_buffer_end - s->img_buffer);
1256  if (blen < n) {
1257  int res, count;
1258 
1259  memcpy(buffer, s->img_buffer, blen);
1260 
1261  count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
1262  res = (count == (n-blen));
1263  s->img_buffer = s->img_buffer_end;
1264  return res;
1265  }
1266  }
1267 
1268  if (s->img_buffer+n <= s->img_buffer_end) {
1269  memcpy(buffer, s->img_buffer, n);
1270  s->img_buffer += n;
1271  return 1;
1272  } else
1273  return 0;
1274 }
1275 
1276 static int stbi__get16be(stbi__context *s)
1277 {
1278  int z = stbi__get8(s);
1279  return (z << 8) + stbi__get8(s);
1280 }
1281 
1282 static stbi__uint32 stbi__get32be(stbi__context *s)
1283 {
1284  stbi__uint32 z = stbi__get16be(s);
1285  return (z << 16) + stbi__get16be(s);
1286 }
1287 
1288 static int stbi__get16le(stbi__context *s)
1289 {
1290  int z = stbi__get8(s);
1291  return z + (stbi__get8(s) << 8);
1292 }
1293 
1294 static stbi__uint32 stbi__get32le(stbi__context *s)
1295 {
1296  stbi__uint32 z = stbi__get16le(s);
1297  return z + (stbi__get16le(s) << 16);
1298 }
1299 
1300 #define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings
1301 
1302 
1304 //
1305 // generic converter from built-in img_n to req_comp
1306 // individual types do this automatically as much as possible (e.g. jpeg
1307 // does all cases internally since it needs to colorspace convert anyway,
1308 // and it never has alpha, so very few cases ). png can automatically
1309 // interleave an alpha=255 channel, but falls back to this for other cases
1310 //
1311 // assume data buffer is malloced, so malloc a new one and free that one
1312 // only failure mode is malloc failing
1313 
1314 static stbi_uc stbi__compute_y(int r, int g, int b)
1315 {
1316  return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8);
1317 }
1318 
1319 static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1320 {
1321  int i,j;
1322  unsigned char *good;
1323 
1324  if (req_comp == img_n) return data;
1325  STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1326 
1327  good = (unsigned char *) stbi__malloc(req_comp * x * y);
1328  if (good == NULL) {
1329  STBI_FREE(data);
1330  return stbi__errpuc("outofmem", "Out of memory");
1331  }
1332 
1333  for (j=0; j < (int) y; ++j) {
1334  unsigned char *src = data + j * x * img_n ;
1335  unsigned char *dest = good + j * x * req_comp;
1336 
1337  #define COMBO(a,b) ((a)*8+(b))
1338  #define CASE(a,b) case COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1339  // convert source image with img_n components to one with req_comp components;
1340  // avoid switch per pixel, so use switch per scanline and massive macros
1341  switch (COMBO(img_n, req_comp)) {
1342  CASE(1,2) dest[0]=src[0], dest[1]=255; break;
1343  CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break;
1344  CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; break;
1345  CASE(2,1) dest[0]=src[0]; break;
1346  CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break;
1347  CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break;
1348  CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; break;
1349  CASE(3,1) dest[0]=stbi__compute_y(src[0],src[1],src[2]); break;
1350  CASE(3,2) dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; break;
1351  CASE(4,1) dest[0]=stbi__compute_y(src[0],src[1],src[2]); break;
1352  CASE(4,2) dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; break;
1353  CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break;
1354  default: STBI_ASSERT(0);
1355  }
1356  #undef CASE
1357  }
1358 
1359  STBI_FREE(data);
1360  return good;
1361 }
1362 
1363 #ifndef STBI_NO_LINEAR
1364 static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1365 {
1366  int i,k,n;
1367  float *output = (float *) stbi__malloc(x * y * comp * sizeof(float));
1368  if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
1369  // compute number of non-alpha components
1370  if (comp & 1) n = comp; else n = comp-1;
1371  for (i=0; i < x*y; ++i) {
1372  for (k=0; k < n; ++k) {
1373  output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
1374  }
1375  if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
1376  }
1377  STBI_FREE(data);
1378  return output;
1379 }
1380 #endif
1381 
1382 #ifndef STBI_NO_HDR
1383 #define stbi__float2int(x) ((int) (x))
1384 static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp)
1385 {
1386  int i,k,n;
1387  stbi_uc *output = (stbi_uc *) stbi__malloc(x * y * comp);
1388  if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
1389  // compute number of non-alpha components
1390  if (comp & 1) n = comp; else n = comp-1;
1391  for (i=0; i < x*y; ++i) {
1392  for (k=0; k < n; ++k) {
1393  float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
1394  if (z < 0) z = 0;
1395  if (z > 255) z = 255;
1396  output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1397  }
1398  if (k < comp) {
1399  float z = data[i*comp+k] * 255 + 0.5f;
1400  if (z < 0) z = 0;
1401  if (z > 255) z = 255;
1402  output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1403  }
1404  }
1405  STBI_FREE(data);
1406  return output;
1407 }
1408 #endif
1409 
1411 //
1412 // "baseline" JPEG/JFIF decoder
1413 //
1414 // simple implementation
1415 // - doesn't support delayed output of y-dimension
1416 // - simple interface (only one output format: 8-bit interleaved RGB)
1417 // - doesn't try to recover corrupt jpegs
1418 // - doesn't allow partial loading, loading multiple at once
1419 // - still fast on x86 (copying globals into locals doesn't help x86)
1420 // - allocates lots of intermediate memory (full size of all components)
1421 // - non-interleaved case requires this anyway
1422 // - allows good upsampling (see next)
1423 // high-quality
1424 // - upsampled channels are bilinearly interpolated, even across blocks
1425 // - quality integer IDCT derived from IJG's 'slow'
1426 // performance
1427 // - fast huffman; reasonable integer IDCT
1428 // - some SIMD kernels for common paths on targets with SSE2/NEON
1429 // - uses a lot of intermediate memory, could cache poorly
1430 
1431 #ifndef STBI_NO_JPEG
1432 
1433 // huffman decoding acceleration
1434 #define FAST_BITS 9 // larger handles more cases; smaller stomps less cache
1435 
1436 typedef struct
1437 {
1438  stbi_uc fast[1 << FAST_BITS];
1439  // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1440  stbi__uint16 code[256];
1441  stbi_uc values[256];
1442  stbi_uc size[257];
1443  unsigned int maxcode[18];
1444  int delta[17]; // old 'firstsymbol' - old 'firstcode'
1445 } stbi__huffman;
1446 
1447 typedef struct
1448 {
1449  stbi__context *s;
1450  stbi__huffman huff_dc[4];
1451  stbi__huffman huff_ac[4];
1452  stbi_uc dequant[4][64];
1453  stbi__int16 fast_ac[4][1 << FAST_BITS];
1454 
1455 // sizes for components, interleaved MCUs
1456  int img_h_max, img_v_max;
1457  int img_mcu_x, img_mcu_y;
1458  int img_mcu_w, img_mcu_h;
1459 
1460 // definition of jpeg image component
1461  struct
1462  {
1463  int id;
1464  int h,v;
1465  int tq;
1466  int hd,ha;
1467  int dc_pred;
1468 
1469  int x,y,w2,h2;
1470  stbi_uc *data;
1471  void *raw_data, *raw_coeff;
1472  stbi_uc *linebuf;
1473  short *coeff; // progressive only
1474  int coeff_w, coeff_h; // number of 8x8 coefficient blocks
1475  } img_comp[4];
1476 
1477  stbi__uint32 code_buffer; // jpeg entropy-coded buffer
1478  int code_bits; // number of valid bits
1479  unsigned char marker; // marker seen while filling entropy buffer
1480  int nomore; // flag if we saw a marker so must stop
1481 
1482  int progressive;
1483  int spec_start;
1484  int spec_end;
1485  int succ_high;
1486  int succ_low;
1487  int eob_run;
1488 
1489  int scan_n, order[4];
1490  int restart_interval, todo;
1491 
1492 // kernels
1493  void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
1494  void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
1495  stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
1496 } stbi__jpeg;
1497 
1498 static int stbi__build_huffman(stbi__huffman *h, int *count)
1499 {
1500  int i,j,k=0,code;
1501  // build size list for each symbol (from JPEG spec)
1502  for (i=0; i < 16; ++i)
1503  for (j=0; j < count[i]; ++j)
1504  h->size[k++] = (stbi_uc) (i+1);
1505  h->size[k] = 0;
1506 
1507  // compute actual symbols (from jpeg spec)
1508  code = 0;
1509  k = 0;
1510  for(j=1; j <= 16; ++j) {
1511  // compute delta to add to code to compute symbol id
1512  h->delta[j] = k - code;
1513  if (h->size[k] == j) {
1514  while (h->size[k] == j)
1515  h->code[k++] = (stbi__uint16) (code++);
1516  if (code-1 >= (1 << j)) return stbi__err("bad code lengths","Corrupt JPEG");
1517  }
1518  // compute largest code + 1 for this size, preshifted as needed later
1519  h->maxcode[j] = code << (16-j);
1520  code <<= 1;
1521  }
1522  h->maxcode[j] = 0xffffffff;
1523 
1524  // build non-spec acceleration table; 255 is flag for not-accelerated
1525  memset(h->fast, 255, 1 << FAST_BITS);
1526  for (i=0; i < k; ++i) {
1527  int s = h->size[i];
1528  if (s <= FAST_BITS) {
1529  int c = h->code[i] << (FAST_BITS-s);
1530  int m = 1 << (FAST_BITS-s);
1531  for (j=0; j < m; ++j) {
1532  h->fast[c+j] = (stbi_uc) i;
1533  }
1534  }
1535  }
1536  return 1;
1537 }
1538 
1539 // build a table that decodes both magnitude and value of small ACs in
1540 // one go.
1541 static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
1542 {
1543  int i;
1544  for (i=0; i < (1 << FAST_BITS); ++i) {
1545  stbi_uc fast = h->fast[i];
1546  fast_ac[i] = 0;
1547  if (fast < 255) {
1548  int rs = h->values[fast];
1549  int run = (rs >> 4) & 15;
1550  int magbits = rs & 15;
1551  int len = h->size[fast];
1552 
1553  if (magbits && len + magbits <= FAST_BITS) {
1554  // magnitude code followed by receive_extend code
1555  int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
1556  int m = 1 << (magbits - 1);
1557  if (k < m) k += (-1 << magbits) + 1;
1558  // if the result is small enough, we can fit it in fast_ac table
1559  if (k >= -128 && k <= 127)
1560  fast_ac[i] = (stbi__int16) ((k << 8) + (run << 4) + (len + magbits));
1561  }
1562  }
1563  }
1564 }
1565 
1566 static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
1567 {
1568  do {
1569  int b = j->nomore ? 0 : stbi__get8(j->s);
1570  if (b == 0xff) {
1571  int c = stbi__get8(j->s);
1572  if (c != 0) {
1573  j->marker = (unsigned char) c;
1574  j->nomore = 1;
1575  return;
1576  }
1577  }
1578  j->code_buffer |= b << (24 - j->code_bits);
1579  j->code_bits += 8;
1580  } while (j->code_bits <= 24);
1581 }
1582 
1583 // (1 << n) - 1
1584 static stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
1585 
1586 // decode a jpeg huffman value from the bitstream
1587 stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
1588 {
1589  unsigned int temp;
1590  int c,k;
1591 
1592  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1593 
1594  // look at the top FAST_BITS and determine what symbol ID it is,
1595  // if the code is <= FAST_BITS
1596  c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1597  k = h->fast[c];
1598  if (k < 255) {
1599  int s = h->size[k];
1600  if (s > j->code_bits)
1601  return -1;
1602  j->code_buffer <<= s;
1603  j->code_bits -= s;
1604  return h->values[k];
1605  }
1606 
1607  // naive test is to shift the code_buffer down so k bits are
1608  // valid, then test against maxcode. To speed this up, we've
1609  // preshifted maxcode left so that it has (16-k) 0s at the
1610  // end; in other words, regardless of the number of bits, it
1611  // wants to be compared against something shifted to have 16;
1612  // that way we don't need to shift inside the loop.
1613  temp = j->code_buffer >> 16;
1614  for (k=FAST_BITS+1 ; ; ++k)
1615  if (temp < h->maxcode[k])
1616  break;
1617  if (k == 17) {
1618  // error! code not found
1619  j->code_bits -= 16;
1620  return -1;
1621  }
1622 
1623  if (k > j->code_bits)
1624  return -1;
1625 
1626  // convert the huffman code to the symbol id
1627  c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
1628  STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
1629 
1630  // convert the id to a symbol
1631  j->code_bits -= k;
1632  j->code_buffer <<= k;
1633  return h->values[c];
1634 }
1635 
1636 // bias[n] = (-1<<n) + 1
1637 static int const stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
1638 
1639 // combined JPEG 'receive' and JPEG 'extend', since baseline
1640 // always extends everything it receives.
1641 stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
1642 {
1643  unsigned int k;
1644  int sgn;
1645  if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1646 
1647  sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
1648  k = stbi_lrot(j->code_buffer, n);
1649  STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask)));
1650  j->code_buffer = k & ~stbi__bmask[n];
1651  k &= stbi__bmask[n];
1652  j->code_bits -= n;
1653  return k + (stbi__jbias[n] & ~sgn);
1654 }
1655 
1656 // get some unsigned bits
1657 stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
1658 {
1659  unsigned int k;
1660  if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1661  k = stbi_lrot(j->code_buffer, n);
1662  j->code_buffer = k & ~stbi__bmask[n];
1663  k &= stbi__bmask[n];
1664  j->code_bits -= n;
1665  return k;
1666 }
1667 
1668 stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
1669 {
1670  unsigned int k;
1671  if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
1672  k = j->code_buffer;
1673  j->code_buffer <<= 1;
1674  --j->code_bits;
1675  return k & 0x80000000;
1676 }
1677 
1678 // given a value that's at position X in the zigzag stream,
1679 // where does it appear in the 8x8 matrix coded as row-major?
1680 static stbi_uc stbi__jpeg_dezigzag[64+15] =
1681 {
1682  0, 1, 8, 16, 9, 2, 3, 10,
1683  17, 24, 32, 25, 18, 11, 4, 5,
1684  12, 19, 26, 33, 40, 48, 41, 34,
1685  27, 20, 13, 6, 7, 14, 21, 28,
1686  35, 42, 49, 56, 57, 50, 43, 36,
1687  29, 22, 15, 23, 30, 37, 44, 51,
1688  58, 59, 52, 45, 38, 31, 39, 46,
1689  53, 60, 61, 54, 47, 55, 62, 63,
1690  // let corrupt input sample past end
1691  63, 63, 63, 63, 63, 63, 63, 63,
1692  63, 63, 63, 63, 63, 63, 63
1693 };
1694 
1695 // decode one 64-entry block--
1696 static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi_uc *dequant)
1697 {
1698  int diff,dc,k;
1699  int t;
1700 
1701  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1702  t = stbi__jpeg_huff_decode(j, hdc);
1703  if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1704 
1705  // 0 all the ac values now so we can do it 32-bits at a time
1706  memset(data,0,64*sizeof(data[0]));
1707 
1708  diff = t ? stbi__extend_receive(j, t) : 0;
1709  dc = j->img_comp[b].dc_pred + diff;
1710  j->img_comp[b].dc_pred = dc;
1711  data[0] = (short) (dc * dequant[0]);
1712 
1713  // decode AC components, see JPEG spec
1714  k = 1;
1715  do {
1716  unsigned int zig;
1717  int c,r,s;
1718  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1719  c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1720  r = fac[c];
1721  if (r) { // fast-AC path
1722  k += (r >> 4) & 15; // run
1723  s = r & 15; // combined length
1724  j->code_buffer <<= s;
1725  j->code_bits -= s;
1726  // decode into unzigzag'd location
1727  zig = stbi__jpeg_dezigzag[k++];
1728  data[zig] = (short) ((r >> 8) * dequant[zig]);
1729  } else {
1730  int rs = stbi__jpeg_huff_decode(j, hac);
1731  if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1732  s = rs & 15;
1733  r = rs >> 4;
1734  if (s == 0) {
1735  if (rs != 0xf0) break; // end block
1736  k += 16;
1737  } else {
1738  k += r;
1739  // decode into unzigzag'd location
1740  zig = stbi__jpeg_dezigzag[k++];
1741  data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
1742  }
1743  }
1744  } while (k < 64);
1745  return 1;
1746 }
1747 
1748 static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
1749 {
1750  if (j->spec_end != 0)
1751  return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1752 
1753  if (j->code_bits < 16)
1754  stbi__grow_buffer_unsafe(j);
1755 
1756  if (j->succ_high == 0)
1757  {
1758  int diff,dc;
1759  int t;
1760 
1761  /* first scan for DC coefficient, must be first */
1762  memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
1763  t = stbi__jpeg_huff_decode(j, hdc);
1764  diff = t ? stbi__extend_receive(j, t) : 0;
1765 
1766  dc = j->img_comp[b].dc_pred + diff;
1767  j->img_comp[b].dc_pred = dc;
1768  data[0] = (short) (dc << j->succ_low);
1769  }
1770  else
1771  {
1772  /* refinement scan for DC coefficient */
1773  if (stbi__jpeg_get_bit(j))
1774  data[0] += (short) (1 << j->succ_low);
1775  }
1776  return 1;
1777 }
1778 
1779 // @OPTIMIZE: store non-zigzagged during the decode passes,
1780 // and only de-zigzag when dequantizing
1781 static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
1782 {
1783  int k;
1784  if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1785 
1786  if (j->succ_high == 0) {
1787  int shift = j->succ_low;
1788 
1789  if (j->eob_run) {
1790  --j->eob_run;
1791  return 1;
1792  }
1793 
1794  k = j->spec_start;
1795  do {
1796  unsigned int zig;
1797  int c,r,s;
1798  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1799  c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1800  r = fac[c];
1801  if (r) { // fast-AC path
1802  k += (r >> 4) & 15; // run
1803  s = r & 15; // combined length
1804  j->code_buffer <<= s;
1805  j->code_bits -= s;
1806  zig = stbi__jpeg_dezigzag[k++];
1807  data[zig] = (short) ((r >> 8) << shift);
1808  } else {
1809  int rs = stbi__jpeg_huff_decode(j, hac);
1810  if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1811  s = rs & 15;
1812  r = rs >> 4;
1813  if (s == 0) {
1814  if (r < 15) {
1815  j->eob_run = (1 << r);
1816  if (r)
1817  j->eob_run += stbi__jpeg_get_bits(j, r);
1818  --j->eob_run;
1819  break;
1820  }
1821  k += 16;
1822  } else {
1823  k += r;
1824  zig = stbi__jpeg_dezigzag[k++];
1825  data[zig] = (short) (stbi__extend_receive(j,s) << shift);
1826  }
1827  }
1828  } while (k <= j->spec_end);
1829  } else {
1830  // refinement scan for these AC coefficients
1831 
1832  short bit = (short) (1 << j->succ_low);
1833 
1834  if (j->eob_run) {
1835  --j->eob_run;
1836  for (k = j->spec_start; k <= j->spec_end; ++k) {
1837  short *p = &data[stbi__jpeg_dezigzag[k]];
1838  if (*p != 0)
1839  if (stbi__jpeg_get_bit(j))
1840  if ((*p & bit)==0) {
1841  if (*p > 0)
1842  *p += bit;
1843  else
1844  *p -= bit;
1845  }
1846  }
1847  } else {
1848  k = j->spec_start;
1849  do {
1850  int r,s;
1851  int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
1852  if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1853  s = rs & 15;
1854  r = rs >> 4;
1855  if (s == 0) {
1856  if (r < 15) {
1857  j->eob_run = (1 << r) - 1;
1858  if (r)
1859  j->eob_run += stbi__jpeg_get_bits(j, r);
1860  r = 64; // force end of block
1861  } else {
1862  // r=15 s=0 should write 16 0s, so we just do
1863  // a run of 15 0s and then write s (which is 0),
1864  // so we don't have to do anything special here
1865  }
1866  } else {
1867  if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
1868  // sign bit
1869  if (stbi__jpeg_get_bit(j))
1870  s = bit;
1871  else
1872  s = -bit;
1873  }
1874 
1875  // advance by r
1876  while (k <= j->spec_end) {
1877  short *p = &data[stbi__jpeg_dezigzag[k++]];
1878  if (*p != 0) {
1879  if (stbi__jpeg_get_bit(j))
1880  if ((*p & bit)==0) {
1881  if (*p > 0)
1882  *p += bit;
1883  else
1884  *p -= bit;
1885  }
1886  } else {
1887  if (r == 0) {
1888  *p = (short) s;
1889  break;
1890  }
1891  --r;
1892  }
1893  }
1894  } while (k <= j->spec_end);
1895  }
1896  }
1897  return 1;
1898 }
1899 
1900 // take a -128..127 value and stbi__clamp it and convert to 0..255
1901 stbi_inline static stbi_uc stbi__clamp(int x)
1902 {
1903  // trick to use a single test to catch both cases
1904  if ((unsigned int) x > 255) {
1905  if (x < 0) return 0;
1906  if (x > 255) return 255;
1907  }
1908  return (stbi_uc) x;
1909 }
1910 
1911 #define stbi__f2f(x) ((int) (((x) * 4096 + 0.5)))
1912 #define stbi__fsh(x) ((x) << 12)
1913 
1914 // derived from jidctint -- DCT_ISLOW
1915 #define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
1916  int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
1917  p2 = s2; \
1918  p3 = s6; \
1919  p1 = (p2+p3) * stbi__f2f(0.5411961f); \
1920  t2 = p1 + p3*stbi__f2f(-1.847759065f); \
1921  t3 = p1 + p2*stbi__f2f( 0.765366865f); \
1922  p2 = s0; \
1923  p3 = s4; \
1924  t0 = stbi__fsh(p2+p3); \
1925  t1 = stbi__fsh(p2-p3); \
1926  x0 = t0+t3; \
1927  x3 = t0-t3; \
1928  x1 = t1+t2; \
1929  x2 = t1-t2; \
1930  t0 = s7; \
1931  t1 = s5; \
1932  t2 = s3; \
1933  t3 = s1; \
1934  p3 = t0+t2; \
1935  p4 = t1+t3; \
1936  p1 = t0+t3; \
1937  p2 = t1+t2; \
1938  p5 = (p3+p4)*stbi__f2f( 1.175875602f); \
1939  t0 = t0*stbi__f2f( 0.298631336f); \
1940  t1 = t1*stbi__f2f( 2.053119869f); \
1941  t2 = t2*stbi__f2f( 3.072711026f); \
1942  t3 = t3*stbi__f2f( 1.501321110f); \
1943  p1 = p5 + p1*stbi__f2f(-0.899976223f); \
1944  p2 = p5 + p2*stbi__f2f(-2.562915447f); \
1945  p3 = p3*stbi__f2f(-1.961570560f); \
1946  p4 = p4*stbi__f2f(-0.390180644f); \
1947  t3 += p1+p4; \
1948  t2 += p2+p3; \
1949  t1 += p2+p4; \
1950  t0 += p1+p3;
1951 
1952 static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
1953 {
1954  int i,val[64],*v=val;
1955  stbi_uc *o;
1956  short *d = data;
1957 
1958  // columns
1959  for (i=0; i < 8; ++i,++d, ++v) {
1960  // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
1961  if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
1962  && d[40]==0 && d[48]==0 && d[56]==0) {
1963  // no shortcut 0 seconds
1964  // (1|2|3|4|5|6|7)==0 0 seconds
1965  // all separate -0.047 seconds
1966  // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
1967  int dcterm = d[0] << 2;
1968  v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
1969  } else {
1970  STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
1971  // constants scaled things up by 1<<12; let's bring them back
1972  // down, but keep 2 extra bits of precision
1973  x0 += 512; x1 += 512; x2 += 512; x3 += 512;
1974  v[ 0] = (x0+t3) >> 10;
1975  v[56] = (x0-t3) >> 10;
1976  v[ 8] = (x1+t2) >> 10;
1977  v[48] = (x1-t2) >> 10;
1978  v[16] = (x2+t1) >> 10;
1979  v[40] = (x2-t1) >> 10;
1980  v[24] = (x3+t0) >> 10;
1981  v[32] = (x3-t0) >> 10;
1982  }
1983  }
1984 
1985  for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
1986  // no fast case since the first 1D IDCT spread components out
1987  STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
1988  // constants scaled things up by 1<<12, plus we had 1<<2 from first
1989  // loop, plus horizontal and vertical each scale by sqrt(8) so together
1990  // we've got an extra 1<<3, so 1<<17 total we need to remove.
1991  // so we want to round that, which means adding 0.5 * 1<<17,
1992  // aka 65536. Also, we'll end up with -128 to 127 that we want
1993  // to encode as 0..255 by adding 128, so we'll add that before the shift
1994  x0 += 65536 + (128<<17);
1995  x1 += 65536 + (128<<17);
1996  x2 += 65536 + (128<<17);
1997  x3 += 65536 + (128<<17);
1998  // tried computing the shifts into temps, or'ing the temps to see
1999  // if any were out of range, but that was slower
2000  o[0] = stbi__clamp((x0+t3) >> 17);
2001  o[7] = stbi__clamp((x0-t3) >> 17);
2002  o[1] = stbi__clamp((x1+t2) >> 17);
2003  o[6] = stbi__clamp((x1-t2) >> 17);
2004  o[2] = stbi__clamp((x2+t1) >> 17);
2005  o[5] = stbi__clamp((x2-t1) >> 17);
2006  o[3] = stbi__clamp((x3+t0) >> 17);
2007  o[4] = stbi__clamp((x3-t0) >> 17);
2008  }
2009 }
2010 
2011 #ifdef STBI_SSE2
2012 /* sse2 integer IDCT. not the fastest possible implementation but it
2013  * produces bit-identical results to the generic C version so it's
2014  * fully "transparent".
2015  */
2016 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2017 {
2018  /* This is constructed to match our regular (generic) integer IDCT exactly. */
2019  __m128i row0, row1, row2, row3, row4, row5, row6, row7;
2020  __m128i tmp;
2021 
2022  /* dot product constant: even elems=x, odd elems=y */
2023  #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
2024 
2025  /* out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit)
2026  * out(1) = c1[even]*x + c1[odd]*y
2027  */
2028  #define dct_rot(out0,out1, x,y,c0,c1) \
2029  __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
2030  __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
2031  __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
2032  __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
2033  __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
2034  __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
2035 
2036  /* out = in << 12 (in 16-bit, out 32-bit) */
2037  #define dct_widen(out, in) \
2038  __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
2039  __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
2040 
2041  /* wide add */
2042  #define dct_wadd(out, a, b) \
2043  __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
2044  __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
2045 
2046  /* wide sub */
2047  #define dct_wsub(out, a, b) \
2048  __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
2049  __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
2050 
2051  /* butterfly a/b, add bias, then shift by "s" and pack */
2052  #define dct_bfly32o(out0, out1, a,b,bias,s) \
2053  { \
2054  __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
2055  __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
2056  dct_wadd(sum, abiased, b); \
2057  dct_wsub(dif, abiased, b); \
2058  out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
2059  out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
2060  }
2061 
2062  /* 8-bit interleave step (for transposes) */
2063  #define dct_interleave8(a, b) \
2064  tmp = a; \
2065  a = _mm_unpacklo_epi8(a, b); \
2066  b = _mm_unpackhi_epi8(tmp, b)
2067 
2068  /* 16-bit interleave step (for transposes) */
2069  #define dct_interleave16(a, b) \
2070  tmp = a; \
2071  a = _mm_unpacklo_epi16(a, b); \
2072  b = _mm_unpackhi_epi16(tmp, b)
2073 
2074  #define dct_pass(bias,shift) \
2075  { \
2076  /* even part */ \
2077  dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
2078  __m128i sum04 = _mm_add_epi16(row0, row4); \
2079  __m128i dif04 = _mm_sub_epi16(row0, row4); \
2080  dct_widen(t0e, sum04); \
2081  dct_widen(t1e, dif04); \
2082  dct_wadd(x0, t0e, t3e); \
2083  dct_wsub(x3, t0e, t3e); \
2084  dct_wadd(x1, t1e, t2e); \
2085  dct_wsub(x2, t1e, t2e); \
2086  /* odd part */ \
2087  dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
2088  dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
2089  __m128i sum17 = _mm_add_epi16(row1, row7); \
2090  __m128i sum35 = _mm_add_epi16(row3, row5); \
2091  dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
2092  dct_wadd(x4, y0o, y4o); \
2093  dct_wadd(x5, y1o, y5o); \
2094  dct_wadd(x6, y2o, y5o); \
2095  dct_wadd(x7, y3o, y4o); \
2096  dct_bfly32o(row0,row7, x0,x7,bias,shift); \
2097  dct_bfly32o(row1,row6, x1,x6,bias,shift); \
2098  dct_bfly32o(row2,row5, x2,x5,bias,shift); \
2099  dct_bfly32o(row3,row4, x3,x4,bias,shift); \
2100  }
2101 
2102  __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
2103  __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
2104  __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
2105  __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
2106  __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
2107  __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
2108  __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
2109  __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
2110 
2111  /* rounding biases in column/row passes, see stbi__idct_block for explanation. */
2112  __m128i bias_0 = _mm_set1_epi32(512);
2113  __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
2114 
2115  /* load */
2116  row0 = _mm_load_si128((const __m128i *) (data + 0*8));
2117  row1 = _mm_load_si128((const __m128i *) (data + 1*8));
2118  row2 = _mm_load_si128((const __m128i *) (data + 2*8));
2119  row3 = _mm_load_si128((const __m128i *) (data + 3*8));
2120  row4 = _mm_load_si128((const __m128i *) (data + 4*8));
2121  row5 = _mm_load_si128((const __m128i *) (data + 5*8));
2122  row6 = _mm_load_si128((const __m128i *) (data + 6*8));
2123  row7 = _mm_load_si128((const __m128i *) (data + 7*8));
2124 
2125  /* column pass */
2126  dct_pass(bias_0, 10);
2127 
2128  {
2129  /* 16bit 8x8 transpose pass 1 */
2130  dct_interleave16(row0, row4);
2131  dct_interleave16(row1, row5);
2132  dct_interleave16(row2, row6);
2133  dct_interleave16(row3, row7);
2134 
2135  /* transpose pass 2 */
2136  dct_interleave16(row0, row2);
2137  dct_interleave16(row1, row3);
2138  dct_interleave16(row4, row6);
2139  dct_interleave16(row5, row7);
2140 
2141  /* transpose pass 3 */
2142  dct_interleave16(row0, row1);
2143  dct_interleave16(row2, row3);
2144  dct_interleave16(row4, row5);
2145  dct_interleave16(row6, row7);
2146  }
2147 
2148  /* row pass */
2149  dct_pass(bias_1, 17);
2150 
2151  {
2152  /* pack */
2153  __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
2154  __m128i p1 = _mm_packus_epi16(row2, row3);
2155  __m128i p2 = _mm_packus_epi16(row4, row5);
2156  __m128i p3 = _mm_packus_epi16(row6, row7);
2157 
2158  // 8bit 8x8 transpose pass 1
2159  dct_interleave8(p0, p2); // a0e0a1e1...
2160  dct_interleave8(p1, p3); // c0g0c1g1...
2161 
2162  // transpose pass 2
2163  dct_interleave8(p0, p1); // a0c0e0g0...
2164  dct_interleave8(p2, p3); // b0d0f0h0...
2165 
2166  // transpose pass 3
2167  dct_interleave8(p0, p2); // a0b0c0d0...
2168  dct_interleave8(p1, p3); // a4b4c4d4...
2169 
2170  // store
2171  _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
2172  _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
2173  _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
2174  _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
2175  _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
2176  _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
2177  _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
2178  _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
2179  }
2180 
2181 #undef dct_const
2182 #undef dct_rot
2183 #undef dct_widen
2184 #undef dct_wadd
2185 #undef dct_wsub
2186 #undef dct_bfly32o
2187 #undef dct_interleave8
2188 #undef dct_interleave16
2189 #undef dct_pass
2190 }
2191 
2192 #endif /* STBI_SSE2 */
2193 
2194 #ifdef STBI_NEON
2195 
2196 /* NEON integer IDCT. should produce bit-identical
2197  * results to the generic C version. */
2198 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2199 {
2200  int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
2201 
2202  int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
2203  int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
2204  int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
2205  int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
2206  int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
2207  int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
2208  int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
2209  int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
2210  int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
2211  int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
2212  int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
2213  int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
2214 
2215 #define dct_long_mul(out, inq, coeff) \
2216  int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
2217  int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
2218 
2219 #define dct_long_mac(out, acc, inq, coeff) \
2220  int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
2221  int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
2222 
2223 #define dct_widen(out, inq) \
2224  int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
2225  int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
2226 
2227 /* wide add */
2228 #define dct_wadd(out, a, b) \
2229  int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
2230  int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
2231 
2232 /* wide sub */
2233 #define dct_wsub(out, a, b) \
2234  int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
2235  int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
2236 
2237 // butterfly a/b, then shift using "shiftop" by "s" and pack
2238 #define dct_bfly32o(out0,out1, a,b,shiftop,s) \
2239  { \
2240  dct_wadd(sum, a, b); \
2241  dct_wsub(dif, a, b); \
2242  out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
2243  out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
2244  }
2245 
2246 #define dct_pass(shiftop, shift) \
2247  { \
2248  /* even part */ \
2249  int16x8_t sum26 = vaddq_s16(row2, row6); \
2250  dct_long_mul(p1e, sum26, rot0_0); \
2251  dct_long_mac(t2e, p1e, row6, rot0_1); \
2252  dct_long_mac(t3e, p1e, row2, rot0_2); \
2253  int16x8_t sum04 = vaddq_s16(row0, row4); \
2254  int16x8_t dif04 = vsubq_s16(row0, row4); \
2255  dct_widen(t0e, sum04); \
2256  dct_widen(t1e, dif04); \
2257  dct_wadd(x0, t0e, t3e); \
2258  dct_wsub(x3, t0e, t3e); \
2259  dct_wadd(x1, t1e, t2e); \
2260  dct_wsub(x2, t1e, t2e); \
2261  /* odd part */ \
2262  int16x8_t sum15 = vaddq_s16(row1, row5); \
2263  int16x8_t sum17 = vaddq_s16(row1, row7); \
2264  int16x8_t sum35 = vaddq_s16(row3, row5); \
2265  int16x8_t sum37 = vaddq_s16(row3, row7); \
2266  int16x8_t sumodd = vaddq_s16(sum17, sum35); \
2267  dct_long_mul(p5o, sumodd, rot1_0); \
2268  dct_long_mac(p1o, p5o, sum17, rot1_1); \
2269  dct_long_mac(p2o, p5o, sum35, rot1_2); \
2270  dct_long_mul(p3o, sum37, rot2_0); \
2271  dct_long_mul(p4o, sum15, rot2_1); \
2272  dct_wadd(sump13o, p1o, p3o); \
2273  dct_wadd(sump24o, p2o, p4o); \
2274  dct_wadd(sump23o, p2o, p3o); \
2275  dct_wadd(sump14o, p1o, p4o); \
2276  dct_long_mac(x4, sump13o, row7, rot3_0); \
2277  dct_long_mac(x5, sump24o, row5, rot3_1); \
2278  dct_long_mac(x6, sump23o, row3, rot3_2); \
2279  dct_long_mac(x7, sump14o, row1, rot3_3); \
2280  dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
2281  dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
2282  dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
2283  dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
2284  }
2285 
2286  // load
2287  row0 = vld1q_s16(data + 0*8);
2288  row1 = vld1q_s16(data + 1*8);
2289  row2 = vld1q_s16(data + 2*8);
2290  row3 = vld1q_s16(data + 3*8);
2291  row4 = vld1q_s16(data + 4*8);
2292  row5 = vld1q_s16(data + 5*8);
2293  row6 = vld1q_s16(data + 6*8);
2294  row7 = vld1q_s16(data + 7*8);
2295 
2296  // add DC bias
2297  row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
2298 
2299  // column pass
2300  dct_pass(vrshrn_n_s32, 10);
2301 
2302  // 16bit 8x8 transpose
2303  {
2304 // these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
2305 // whether compilers actually get this is another story, sadly.
2306 #define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
2307 #define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
2308 #define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
2309 
2310  // pass 1
2311  dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
2312  dct_trn16(row2, row3);
2313  dct_trn16(row4, row5);
2314  dct_trn16(row6, row7);
2315 
2316  // pass 2
2317  dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
2318  dct_trn32(row1, row3);
2319  dct_trn32(row4, row6);
2320  dct_trn32(row5, row7);
2321 
2322  // pass 3
2323  dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
2324  dct_trn64(row1, row5);
2325  dct_trn64(row2, row6);
2326  dct_trn64(row3, row7);
2327 
2328 #undef dct_trn16
2329 #undef dct_trn32
2330 #undef dct_trn64
2331  }
2332 
2333  // row pass
2334  // vrshrn_n_s32 only supports shifts up to 16, we need
2335  // 17. so do a non-rounding shift of 16 first then follow
2336  // up with a rounding shift by 1.
2337  dct_pass(vshrn_n_s32, 16);
2338 
2339  {
2340  /* pack and round */
2341  uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
2342  uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
2343  uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
2344  uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
2345  uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
2346  uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
2347  uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
2348  uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
2349 
2350  /* again, these can translate into one instruction, but often don't. */
2351 #define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
2352 #define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
2353 #define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
2354 
2355  /* sadly can't use interleaved stores here since we only write
2356  * 8 bytes to each scan line! */
2357 
2358  /* 8x8 8-bit transpose pass 1 */
2359  dct_trn8_8(p0, p1);
2360  dct_trn8_8(p2, p3);
2361  dct_trn8_8(p4, p5);
2362  dct_trn8_8(p6, p7);
2363 
2364  /* pass 2 */
2365  dct_trn8_16(p0, p2);
2366  dct_trn8_16(p1, p3);
2367  dct_trn8_16(p4, p6);
2368  dct_trn8_16(p5, p7);
2369 
2370  /* pass 3 */
2371  dct_trn8_32(p0, p4);
2372  dct_trn8_32(p1, p5);
2373  dct_trn8_32(p2, p6);
2374  dct_trn8_32(p3, p7);
2375 
2376  /* store */
2377  vst1_u8(out, p0); out += out_stride;
2378  vst1_u8(out, p1); out += out_stride;
2379  vst1_u8(out, p2); out += out_stride;
2380  vst1_u8(out, p3); out += out_stride;
2381  vst1_u8(out, p4); out += out_stride;
2382  vst1_u8(out, p5); out += out_stride;
2383  vst1_u8(out, p6); out += out_stride;
2384  vst1_u8(out, p7);
2385 
2386 #undef dct_trn8_8
2387 #undef dct_trn8_16
2388 #undef dct_trn8_32
2389  }
2390 
2391 #undef dct_long_mul
2392 #undef dct_long_mac
2393 #undef dct_widen
2394 #undef dct_wadd
2395 #undef dct_wsub
2396 #undef dct_bfly32o
2397 #undef dct_pass
2398 }
2399 
2400 #endif /* STBI_NEON */
2401 
2402 #define STBI__MARKER_none 0xff
2403 /* if there's a pending marker from the entropy stream, return that
2404  * otherwise, fetch from the stream and get a marker. if there's no
2405  * marker, return 0xff, which is never a valid marker value
2406  */
2407 static stbi_uc stbi__get_marker(stbi__jpeg *j)
2408 {
2409  stbi_uc x;
2410  if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
2411  x = stbi__get8(j->s);
2412  if (x != 0xff) return STBI__MARKER_none;
2413  while (x == 0xff)
2414  x = stbi__get8(j->s);
2415  return x;
2416 }
2417 
2418 /* in each scan, we'll have scan_n components, and the order
2419  * of the components is specified by order[]
2420  */
2421 #define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7)
2422 
2423 /* after a restart interval, stbi__jpeg_reset the entropy decoder and
2424  * the dc prediction
2425  */
2426 static void stbi__jpeg_reset(stbi__jpeg *j)
2427 {
2428  j->code_bits = 0;
2429  j->code_buffer = 0;
2430  j->nomore = 0;
2431  j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0;
2432  j->marker = STBI__MARKER_none;
2433  j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
2434  j->eob_run = 0;
2435  // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
2436  // since we don't even allow 1<<30 pixels
2437 }
2438 
2439 static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
2440 {
2441  stbi__jpeg_reset(z);
2442  if (!z->progressive) {
2443  if (z->scan_n == 1) {
2444  int i,j;
2445  STBI_SIMD_ALIGN(short, data[64]);
2446  int n = z->order[0];
2447  // non-interleaved data, we just need to process one block at a time,
2448  // in trivial scanline order
2449  // number of blocks to do just depends on how many actual "pixels" this
2450  // component has, independent of interleaved MCU blocking and such
2451  int w = (z->img_comp[n].x+7) >> 3;
2452  int h = (z->img_comp[n].y+7) >> 3;
2453  for (j=0; j < h; ++j) {
2454  for (i=0; i < w; ++i) {
2455  int ha = z->img_comp[n].ha;
2456  if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2457  z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2458  // every data block is an MCU, so countdown the restart interval
2459  if (--z->todo <= 0) {
2460  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2461  // if it's NOT a restart, then just bail, so we get corrupt data
2462  // rather than no data
2463  if (!STBI__RESTART(z->marker)) return 1;
2464  stbi__jpeg_reset(z);
2465  }
2466  }
2467  }
2468  return 1;
2469  } else { // interleaved
2470  int i,j,k,x,y;
2471  STBI_SIMD_ALIGN(short, data[64]);
2472  for (j=0; j < z->img_mcu_y; ++j) {
2473  for (i=0; i < z->img_mcu_x; ++i) {
2474  // scan an interleaved mcu... process scan_n components in order
2475  for (k=0; k < z->scan_n; ++k) {
2476  int n = z->order[k];
2477  // scan out an mcu's worth of this component; that's just determined
2478  // by the basic H and V specified for the component
2479  for (y=0; y < z->img_comp[n].v; ++y) {
2480  for (x=0; x < z->img_comp[n].h; ++x) {
2481  int x2 = (i*z->img_comp[n].h + x)*8;
2482  int y2 = (j*z->img_comp[n].v + y)*8;
2483  int ha = z->img_comp[n].ha;
2484  if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2485  z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
2486  }
2487  }
2488  }
2489  // after all interleaved components, that's an interleaved MCU,
2490  // so now count down the restart interval
2491  if (--z->todo <= 0) {
2492  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2493  if (!STBI__RESTART(z->marker)) return 1;
2494  stbi__jpeg_reset(z);
2495  }
2496  }
2497  }
2498  return 1;
2499  }
2500  } else {
2501  if (z->scan_n == 1) {
2502  int i,j;
2503  int n = z->order[0];
2504  // non-interleaved data, we just need to process one block at a time,
2505  // in trivial scanline order
2506  // number of blocks to do just depends on how many actual "pixels" this
2507  // component has, independent of interleaved MCU blocking and such
2508  int w = (z->img_comp[n].x+7) >> 3;
2509  int h = (z->img_comp[n].y+7) >> 3;
2510  for (j=0; j < h; ++j) {
2511  for (i=0; i < w; ++i) {
2512  short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2513  if (z->spec_start == 0) {
2514  if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2515  return 0;
2516  } else {
2517  int ha = z->img_comp[n].ha;
2518  if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
2519  return 0;
2520  }
2521  // every data block is an MCU, so countdown the restart interval
2522  if (--z->todo <= 0) {
2523  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2524  if (!STBI__RESTART(z->marker)) return 1;
2525  stbi__jpeg_reset(z);
2526  }
2527  }
2528  }
2529  return 1;
2530  } else { // interleaved
2531  int i,j,k,x,y;
2532  for (j=0; j < z->img_mcu_y; ++j) {
2533  for (i=0; i < z->img_mcu_x; ++i) {
2534  // scan an interleaved mcu... process scan_n components in order
2535  for (k=0; k < z->scan_n; ++k) {
2536  int n = z->order[k];
2537  // scan out an mcu's worth of this component; that's just determined
2538  // by the basic H and V specified for the component
2539  for (y=0; y < z->img_comp[n].v; ++y) {
2540  for (x=0; x < z->img_comp[n].h; ++x) {
2541  int x2 = (i*z->img_comp[n].h + x);
2542  int y2 = (j*z->img_comp[n].v + y);
2543  short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
2544  if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2545  return 0;
2546  }
2547  }
2548  }
2549  // after all interleaved components, that's an interleaved MCU,
2550  // so now count down the restart interval
2551  if (--z->todo <= 0) {
2552  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2553  if (!STBI__RESTART(z->marker)) return 1;
2554  stbi__jpeg_reset(z);
2555  }
2556  }
2557  }
2558  return 1;
2559  }
2560  }
2561 }
2562 
2563 static void stbi__jpeg_dequantize(short *data, stbi_uc *dequant)
2564 {
2565  int i;
2566  for (i=0; i < 64; ++i)
2567  data[i] *= dequant[i];
2568 }
2569 
2570 static void stbi__jpeg_finish(stbi__jpeg *z)
2571 {
2572  if (z->progressive) {
2573  // dequantize and idct the data
2574  int i,j,n;
2575  for (n=0; n < z->s->img_n; ++n) {
2576  int w = (z->img_comp[n].x+7) >> 3;
2577  int h = (z->img_comp[n].y+7) >> 3;
2578  for (j=0; j < h; ++j) {
2579  for (i=0; i < w; ++i) {
2580  short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2581  stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
2582  z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2583  }
2584  }
2585  }
2586  }
2587 }
2588 
2589 static int stbi__process_marker(stbi__jpeg *z, int m)
2590 {
2591  int L;
2592  switch (m) {
2593  case STBI__MARKER_none: // no marker found
2594  return stbi__err("expected marker","Corrupt JPEG");
2595 
2596  case 0xDD: // DRI - specify restart interval
2597  if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
2598  z->restart_interval = stbi__get16be(z->s);
2599  return 1;
2600 
2601  case 0xDB: // DQT - define quantization table
2602  L = stbi__get16be(z->s)-2;
2603  while (L > 0) {
2604  int q = stbi__get8(z->s);
2605  int p = q >> 4;
2606  int t = q & 15,i;
2607  if (p != 0) return stbi__err("bad DQT type","Corrupt JPEG");
2608  if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
2609  for (i=0; i < 64; ++i)
2610  z->dequant[t][stbi__jpeg_dezigzag[i]] = stbi__get8(z->s);
2611  L -= 65;
2612  }
2613  return L==0;
2614 
2615  case 0xC4: // DHT - define huffman table
2616  L = stbi__get16be(z->s)-2;
2617  while (L > 0) {
2618  stbi_uc *v;
2619  int sizes[16],i,n=0;
2620  int q = stbi__get8(z->s);
2621  int tc = q >> 4;
2622  int th = q & 15;
2623  if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
2624  for (i=0; i < 16; ++i) {
2625  sizes[i] = stbi__get8(z->s);
2626  n += sizes[i];
2627  }
2628  L -= 17;
2629  if (tc == 0) {
2630  if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
2631  v = z->huff_dc[th].values;
2632  } else {
2633  if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
2634  v = z->huff_ac[th].values;
2635  }
2636  for (i=0; i < n; ++i)
2637  v[i] = stbi__get8(z->s);
2638  if (tc != 0)
2639  stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
2640  L -= n;
2641  }
2642  return L==0;
2643  }
2644  // check for comment block or APP blocks
2645  if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
2646  stbi__skip(z->s, stbi__get16be(z->s)-2);
2647  return 1;
2648  }
2649  return 0;
2650 }
2651 
2652 // after we see SOS
2653 static int stbi__process_scan_header(stbi__jpeg *z)
2654 {
2655  int i;
2656  int Ls = stbi__get16be(z->s);
2657 
2658  z->scan_n = stbi__get8(z->s);
2659 
2660  if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n)
2661  return stbi__err("bad SOS component count","Corrupt JPEG");
2662  if (Ls != 6+2*z->scan_n)
2663  return stbi__err("bad SOS len","Corrupt JPEG");
2664 
2665  for (i=0; i < z->scan_n; ++i)
2666  {
2667  int id = stbi__get8(z->s), which;
2668  int q = stbi__get8(z->s);
2669 
2670  for (which = 0; which < z->s->img_n; ++which)
2671  if (z->img_comp[which].id == id)
2672  break;
2673  if (which == z->s->img_n)
2674  return 0; /* no match */
2675 
2676  z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3)
2677  return stbi__err("bad DC huff","Corrupt JPEG");
2678  z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3)
2679  return stbi__err("bad AC huff","Corrupt JPEG");
2680  z->order[i] = which;
2681  }
2682 
2683  {
2684  int aa;
2685  z->spec_start = stbi__get8(z->s);
2686  z->spec_end = stbi__get8(z->s); /* should be 63, but might be 0 */
2687  aa = stbi__get8(z->s);
2688  z->succ_high = (aa >> 4);
2689  z->succ_low = (aa & 15);
2690  if (z->progressive) {
2691  if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
2692  return stbi__err("bad SOS", "Corrupt JPEG");
2693  } else {
2694  if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
2695  if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
2696  z->spec_end = 63;
2697  }
2698  }
2699 
2700  return 1;
2701 }
2702 
2703 static int stbi__process_frame_header(stbi__jpeg *z, int scan)
2704 {
2705  stbi__context *s = z->s;
2706  int Lf,p,i,q, h_max=1,v_max=1,c;
2707  Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
2708  p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
2709  s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
2710  s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
2711  c = stbi__get8(s);
2712  if (c != 3 && c != 1) return stbi__err("bad component count","Corrupt JPEG"); // JFIF requires
2713  s->img_n = c;
2714  for (i=0; i < c; ++i) {
2715  z->img_comp[i].data = NULL;
2716  z->img_comp[i].linebuf = NULL;
2717  }
2718 
2719  if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
2720 
2721  for (i=0; i < s->img_n; ++i) {
2722  z->img_comp[i].id = stbi__get8(s);
2723  if (z->img_comp[i].id != i+1) // JFIF requires
2724  if (z->img_comp[i].id != i) // some version of jpegtran outputs non-JFIF-compliant files!
2725  return stbi__err("bad component ID","Corrupt JPEG");
2726  q = stbi__get8(s);
2727  z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
2728  z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
2729  z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
2730  }
2731 
2732  if (scan != STBI__SCAN_load) return 1;
2733 
2734  if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
2735 
2736  for (i=0; i < s->img_n; ++i) {
2737  if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
2738  if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
2739  }
2740 
2741  // compute interleaved mcu info
2742  z->img_h_max = h_max;
2743  z->img_v_max = v_max;
2744  z->img_mcu_w = h_max * 8;
2745  z->img_mcu_h = v_max * 8;
2746  z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
2747  z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
2748 
2749  for (i=0; i < s->img_n; ++i) {
2750  // number of effective pixels (e.g. for non-interleaved MCU)
2751  z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
2752  z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
2753  // to simplify generation, we'll allocate enough memory to decode
2754  // the bogus oversized data from using interleaved MCUs and their
2755  // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
2756  // discard the extra data until colorspace conversion
2757  z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
2758  z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
2759  z->img_comp[i].raw_data = stbi__malloc(z->img_comp[i].w2 * z->img_comp[i].h2+15);
2760 
2761  if (z->img_comp[i].raw_data == NULL) {
2762  for(--i; i >= 0; --i) {
2763  STBI_FREE(z->img_comp[i].raw_data);
2764  z->img_comp[i].data = NULL;
2765  }
2766  return stbi__err("outofmem", "Out of memory");
2767  }
2768  // align blocks for idct using mmx/sse
2769  z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
2770  z->img_comp[i].linebuf = NULL;
2771  if (z->progressive) {
2772  z->img_comp[i].coeff_w = (z->img_comp[i].w2 + 7) >> 3;
2773  z->img_comp[i].coeff_h = (z->img_comp[i].h2 + 7) >> 3;
2774  z->img_comp[i].raw_coeff = STBI_MALLOC(z->img_comp[i].coeff_w * z->img_comp[i].coeff_h * 64 * sizeof(short) + 15);
2775  z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
2776  } else {
2777  z->img_comp[i].coeff = 0;
2778  z->img_comp[i].raw_coeff = 0;
2779  }
2780  }
2781 
2782  return 1;
2783 }
2784 
2785 // use comparisons since in some cases we handle more than one case (e.g. SOF)
2786 #define stbi__DNL(x) ((x) == 0xdc)
2787 #define stbi__SOI(x) ((x) == 0xd8)
2788 #define stbi__EOI(x) ((x) == 0xd9)
2789 #define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
2790 #define stbi__SOS(x) ((x) == 0xda)
2791 
2792 #define stbi__SOF_progressive(x) ((x) == 0xc2)
2793 
2794 static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
2795 {
2796  int m;
2797  z->marker = STBI__MARKER_none; // initialize cached marker to empty
2798  m = stbi__get_marker(z);
2799  if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
2800  if (scan == STBI__SCAN_type) return 1;
2801  m = stbi__get_marker(z);
2802  while (!stbi__SOF(m)) {
2803  if (!stbi__process_marker(z,m)) return 0;
2804  m = stbi__get_marker(z);
2805  while (m == STBI__MARKER_none) {
2806  // some files have extra padding after their blocks, so ok, we'll scan
2807  if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
2808  m = stbi__get_marker(z);
2809  }
2810  }
2811  z->progressive = stbi__SOF_progressive(m);
2812  if (!stbi__process_frame_header(z, scan)) return 0;
2813  return 1;
2814 }
2815 
2816 // decode image to YCbCr format
2817 static int stbi__decode_jpeg_image(stbi__jpeg *j)
2818 {
2819  int m;
2820  for (m = 0; m < 4; m++) {
2821  j->img_comp[m].raw_data = NULL;
2822  j->img_comp[m].raw_coeff = NULL;
2823  }
2824  j->restart_interval = 0;
2825  if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
2826  m = stbi__get_marker(j);
2827  while (!stbi__EOI(m)) {
2828  if (stbi__SOS(m)) {
2829  if (!stbi__process_scan_header(j)) return 0;
2830  if (!stbi__parse_entropy_coded_data(j)) return 0;
2831  if (j->marker == STBI__MARKER_none ) {
2832  // handle 0s at the end of image data from IP Kamera 9060
2833  while (!stbi__at_eof(j->s)) {
2834  int x = stbi__get8(j->s);
2835  if (x == 255) {
2836  j->marker = stbi__get8(j->s);
2837  break;
2838  } else if (x != 0) {
2839  return stbi__err("junk before marker", "Corrupt JPEG");
2840  }
2841  }
2842  // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
2843  }
2844  } else {
2845  if (!stbi__process_marker(j, m)) return 0;
2846  }
2847  m = stbi__get_marker(j);
2848  }
2849  if (j->progressive)
2850  stbi__jpeg_finish(j);
2851  return 1;
2852 }
2853 
2854 // static jfif-centered resampling (across block boundaries)
2855 
2856 typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
2857  int w, int hs);
2858 
2859 #define stbi__div4(x) ((stbi_uc) ((x) >> 2))
2860 
2861 static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2862 {
2863  STBI_NOTUSED(out);
2864  STBI_NOTUSED(in_far);
2865  STBI_NOTUSED(w);
2866  STBI_NOTUSED(hs);
2867  return in_near;
2868 }
2869 
2870 static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2871 {
2872  // need to generate two samples vertically for every one in input
2873  int i;
2874  STBI_NOTUSED(hs);
2875  for (i=0; i < w; ++i)
2876  out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
2877  return out;
2878 }
2879 
2880 static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2881 {
2882  // need to generate two samples horizontally for every one in input
2883  int i;
2884  stbi_uc *input = in_near;
2885 
2886  if (w == 1) {
2887  // if only one sample, can't do any interpolation
2888  out[0] = out[1] = input[0];
2889  return out;
2890  }
2891 
2892  out[0] = input[0];
2893  out[1] = stbi__div4(input[0]*3 + input[1] + 2);
2894  for (i=1; i < w-1; ++i) {
2895  int n = 3*input[i]+2;
2896  out[i*2+0] = stbi__div4(n+input[i-1]);
2897  out[i*2+1] = stbi__div4(n+input[i+1]);
2898  }
2899  out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
2900  out[i*2+1] = input[w-1];
2901 
2902  STBI_NOTUSED(in_far);
2903  STBI_NOTUSED(hs);
2904 
2905  return out;
2906 }
2907 
2908 #define stbi__div16(x) ((stbi_uc) ((x) >> 4))
2909 
2910 static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2911 {
2912  // need to generate 2x2 samples for every one in input
2913  int i,t0,t1;
2914  if (w == 1) {
2915  out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
2916  return out;
2917  }
2918 
2919  t1 = 3*in_near[0] + in_far[0];
2920  out[0] = stbi__div4(t1+2);
2921  for (i=1; i < w; ++i) {
2922  t0 = t1;
2923  t1 = 3*in_near[i]+in_far[i];
2924  out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
2925  out[i*2 ] = stbi__div16(3*t1 + t0 + 8);
2926  }
2927  out[w*2-1] = stbi__div4(t1+2);
2928 
2929  STBI_NOTUSED(hs);
2930 
2931  return out;
2932 }
2933 
2934 #if defined(STBI_SSE2) || defined(STBI_NEON)
2935 static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2936 {
2937  /* need to generate 2x2 samples for every one in input */
2938  int i=0,t0,t1;
2939 
2940  if (w == 1) {
2941  out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
2942  return out;
2943  }
2944 
2945  t1 = 3*in_near[0] + in_far[0];
2946  /* process groups of 8 pixels for as long as we can.
2947  * note we can't handle the last pixel in a row in this loop
2948  * because we need to handle the filter boundary conditions.
2949  */
2950  for (; i < ((w-1) & ~7); i += 8)
2951  {
2952 #if defined(STBI_SSE2)
2953  /* load and perform the vertical filtering pass
2954  * this uses 3*x + y = 4*x + (y - x) */
2955  __m128i zero = _mm_setzero_si128();
2956  __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i));
2957  __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
2958  __m128i farw = _mm_unpacklo_epi8(farb, zero);
2959  __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
2960  __m128i diff = _mm_sub_epi16(farw, nearw);
2961  __m128i nears = _mm_slli_epi16(nearw, 2);
2962  __m128i curr = _mm_add_epi16(nears, diff); /* current row */
2963 
2964  /* horizontal filter works the same based on shifted vers of current
2965  * row. "prev" is current row shifted right by 1 pixel; we need to
2966  * insert the previous pixel value (from t1).
2967  * "next" is current row shifted left by 1 pixel, with first pixel
2968  * of next block of 8 pixels added in.
2969  */
2970  __m128i prv0 = _mm_slli_si128(curr, 2);
2971  __m128i nxt0 = _mm_srli_si128(curr, 2);
2972  __m128i prev = _mm_insert_epi16(prv0, t1, 0);
2973  __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
2974 
2975  /* horizontal filter, polyphase implementation since it's convenient:
2976  * even pixels = 3*cur + prev = cur*4 + (prev - cur)
2977  * odd pixels = 3*cur + next = cur*4 + (next - cur)
2978  * note the shared term. */
2979  __m128i bias = _mm_set1_epi16(8);
2980  __m128i curs = _mm_slli_epi16(curr, 2);
2981  __m128i prvd = _mm_sub_epi16(prev, curr);
2982  __m128i nxtd = _mm_sub_epi16(next, curr);
2983  __m128i curb = _mm_add_epi16(curs, bias);
2984  __m128i even = _mm_add_epi16(prvd, curb);
2985  __m128i odd = _mm_add_epi16(nxtd, curb);
2986 
2987  /* interleave even and odd pixels, then undo scaling. */
2988  __m128i int0 = _mm_unpacklo_epi16(even, odd);
2989  __m128i int1 = _mm_unpackhi_epi16(even, odd);
2990  __m128i de0 = _mm_srli_epi16(int0, 4);
2991  __m128i de1 = _mm_srli_epi16(int1, 4);
2992 
2993  /* pack and write output */
2994  __m128i outv = _mm_packus_epi16(de0, de1);
2995  _mm_storeu_si128((__m128i *) (out + i*2), outv);
2996 #elif defined(STBI_NEON)
2997  // load and perform the vertical filtering pass
2998  // this uses 3*x + y = 4*x + (y - x)
2999  uint8x8_t farb = vld1_u8(in_far + i);
3000  uint8x8_t nearb = vld1_u8(in_near + i);
3001  int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
3002  int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
3003  int16x8_t curr = vaddq_s16(nears, diff); // current row
3004 
3005  // horizontal filter works the same based on shifted vers of current
3006  // row. "prev" is current row shifted right by 1 pixel; we need to
3007  // insert the previous pixel value (from t1).
3008  // "next" is current row shifted left by 1 pixel, with first pixel
3009  // of next block of 8 pixels added in.
3010  int16x8_t prv0 = vextq_s16(curr, curr, 7);
3011  int16x8_t nxt0 = vextq_s16(curr, curr, 1);
3012  int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
3013  int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
3014 
3015  /* horizontal filter, polyphase implementation since it's convenient:
3016  * even pixels = 3*cur + prev = cur*4 + (prev - cur)
3017  * odd pixels = 3*cur + next = cur*4 + (next - cur)
3018  * note the shared term.
3019  */
3020  int16x8_t curs = vshlq_n_s16(curr, 2);
3021  int16x8_t prvd = vsubq_s16(prev, curr);
3022  int16x8_t nxtd = vsubq_s16(next, curr);
3023  int16x8_t even = vaddq_s16(curs, prvd);
3024  int16x8_t odd = vaddq_s16(curs, nxtd);
3025 
3026  /* undo scaling and round, then store with even/odd phases interleaved */
3027  uint8x8x2_t o;
3028  o.val[0] = vqrshrun_n_s16(even, 4);
3029  o.val[1] = vqrshrun_n_s16(odd, 4);
3030  vst2_u8(out + i*2, o);
3031 #endif
3032 
3033  /* "previous" value for next iteration */
3034  t1 = 3*in_near[i+7] + in_far[i+7];
3035  }
3036 
3037  t0 = t1;
3038  t1 = 3*in_near[i] + in_far[i];
3039  out[i*2] = stbi__div16(3*t1 + t0 + 8);
3040 
3041  for (++i; i < w; ++i) {
3042  t0 = t1;
3043  t1 = 3*in_near[i]+in_far[i];
3044  out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3045  out[i*2 ] = stbi__div16(3*t1 + t0 + 8);
3046  }
3047  out[w*2-1] = stbi__div4(t1+2);
3048 
3049  STBI_NOTUSED(hs);
3050 
3051  return out;
3052 }
3053 #endif
3054 
3055 static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3056 {
3057  /* resample with nearest-neighbor */
3058  int i,j;
3059  STBI_NOTUSED(in_far);
3060  for (i=0; i < w; ++i)
3061  for (j=0; j < hs; ++j)
3062  out[i*hs+j] = in_near[i];
3063  return out;
3064 }
3065 
3066 #ifdef STBI_JPEG_OLD
3067 /* this is the same YCbCr-to-RGB calculation that stb_image has used
3068  * historically before the algorithm changes in 1.49 */
3069 #define float2fixed(x) ((int) ((x) * 65536 + 0.5))
3070 static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
3071 {
3072  int i;
3073  for (i=0; i < count; ++i) {
3074  int y_fixed = (y[i] << 16) + 32768; // rounding
3075  int r,g,b;
3076  int cr = pcr[i] - 128;
3077  int cb = pcb[i] - 128;
3078  r = y_fixed + cr*float2fixed(1.40200f);
3079  g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f);
3080  b = y_fixed + cb*float2fixed(1.77200f);
3081  r >>= 16;
3082  g >>= 16;
3083  b >>= 16;
3084  if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3085  if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3086  if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3087  out[0] = (stbi_uc)r;
3088  out[1] = (stbi_uc)g;
3089  out[2] = (stbi_uc)b;
3090  out[3] = 255;
3091  out += step;
3092  }
3093 }
3094 #else
3095 /* this is a reduced-precision calculation of YCbCr-to-RGB introduced
3096  * to make sure the code produces the same results in both SIMD and scalar */
3097 #define float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8)
3098 static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
3099 {
3100  int i;
3101  for (i=0; i < count; ++i) {
3102  int y_fixed = (y[i] << 20) + (1<<19); /* rounding */
3103  int r,g,b;
3104  int cr = pcr[i] - 128;
3105  int cb = pcb[i] - 128;
3106  r = y_fixed + cr* float2fixed(1.40200f);
3107  g = y_fixed + (cr*-float2fixed(0.71414f)) + ((cb*-float2fixed(0.34414f)) & 0xffff0000);
3108  b = y_fixed + cb* float2fixed(1.77200f);
3109  r >>= 20;
3110  g >>= 20;
3111  b >>= 20;
3112  if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3113  if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3114  if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3115  out[0] = (stbi_uc)r;
3116  out[1] = (stbi_uc)g;
3117  out[2] = (stbi_uc)b;
3118  out[3] = 255;
3119  out += step;
3120  }
3121 }
3122 #endif
3123 
3124 #if defined(STBI_SSE2) || defined(STBI_NEON)
3125 static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
3126 {
3127  int i = 0;
3128 
3129 #ifdef STBI_SSE2
3130  /* step == 3 is pretty ugly on the final interleave, and i'm not convinced
3131  * it's useful in practice (you wouldn't use it for textures, for example).
3132  * so just accelerate step == 4 case.
3133  */
3134  if (step == 4)
3135  {
3136  /* this is a fairly straightforward implementation and not super-optimized. */
3137  __m128i signflip = _mm_set1_epi8(-0x80);
3138  __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f));
3139  __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
3140  __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
3141  __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f));
3142  __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
3143  __m128i xw = _mm_set1_epi16(255); /* alpha channel */
3144 
3145  for (; i+7 < count; i += 8)
3146  {
3147  // load
3148  __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
3149  __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
3150  __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
3151  __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
3152  __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
3153 
3154  // unpack to short (and left-shift cr, cb by 8)
3155  __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes);
3156  __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
3157  __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
3158 
3159  // color transform
3160  __m128i yws = _mm_srli_epi16(yw, 4);
3161  __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
3162  __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
3163  __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
3164  __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
3165  __m128i rws = _mm_add_epi16(cr0, yws);
3166  __m128i gwt = _mm_add_epi16(cb0, yws);
3167  __m128i bws = _mm_add_epi16(yws, cb1);
3168  __m128i gws = _mm_add_epi16(gwt, cr1);
3169 
3170  // descale
3171  __m128i rw = _mm_srai_epi16(rws, 4);
3172  __m128i bw = _mm_srai_epi16(bws, 4);
3173  __m128i gw = _mm_srai_epi16(gws, 4);
3174 
3175  // back to byte, set up for transpose
3176  __m128i brb = _mm_packus_epi16(rw, bw);
3177  __m128i gxb = _mm_packus_epi16(gw, xw);
3178 
3179  // transpose to interleave channels
3180  __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
3181  __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
3182  __m128i o0 = _mm_unpacklo_epi16(t0, t1);
3183  __m128i o1 = _mm_unpackhi_epi16(t0, t1);
3184 
3185  // store
3186  _mm_storeu_si128((__m128i *) (out + 0), o0);
3187  _mm_storeu_si128((__m128i *) (out + 16), o1);
3188  out += 32;
3189  }
3190  }
3191 #endif
3192 
3193 #ifdef STBI_NEON
3194  // in this version, step=3 support would be easy to add. but is there demand?
3195  if (step == 4) {
3196  // this is a fairly straightforward implementation and not super-optimized.
3197  uint8x8_t signflip = vdup_n_u8(0x80);
3198  int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f));
3199  int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
3200  int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
3201  int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f));
3202 
3203  for (; i+7 < count; i += 8) {
3204  // load
3205  uint8x8_t y_bytes = vld1_u8(y + i);
3206  uint8x8_t cr_bytes = vld1_u8(pcr + i);
3207  uint8x8_t cb_bytes = vld1_u8(pcb + i);
3208  int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
3209  int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
3210 
3211  // expand to s16
3212  int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
3213  int16x8_t crw = vshll_n_s8(cr_biased, 7);
3214  int16x8_t cbw = vshll_n_s8(cb_biased, 7);
3215 
3216  // color transform
3217  int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
3218  int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
3219  int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
3220  int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
3221  int16x8_t rws = vaddq_s16(yws, cr0);
3222  int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
3223  int16x8_t bws = vaddq_s16(yws, cb1);
3224 
3225  // undo scaling, round, convert to byte
3226  uint8x8x4_t o;
3227  o.val[0] = vqrshrun_n_s16(rws, 4);
3228  o.val[1] = vqrshrun_n_s16(gws, 4);
3229  o.val[2] = vqrshrun_n_s16(bws, 4);
3230  o.val[3] = vdup_n_u8(255);
3231 
3232  // store, interleaving r/g/b/a
3233  vst4_u8(out, o);
3234  out += 8*4;
3235  }
3236  }
3237 #endif
3238 
3239  for (; i < count; ++i) {
3240  int y_fixed = (y[i] << 20) + (1<<19); // rounding
3241  int r,g,b;
3242  int cr = pcr[i] - 128;
3243  int cb = pcb[i] - 128;
3244  r = y_fixed + cr* float2fixed(1.40200f);
3245  g = y_fixed + cr*-float2fixed(0.71414f) + ((cb*-float2fixed(0.34414f)) & 0xffff0000);
3246  b = y_fixed + cb* float2fixed(1.77200f);
3247  r >>= 20;
3248  g >>= 20;
3249  b >>= 20;
3250  if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3251  if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3252  if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3253  out[0] = (stbi_uc)r;
3254  out[1] = (stbi_uc)g;
3255  out[2] = (stbi_uc)b;
3256  out[3] = 255;
3257  out += step;
3258  }
3259 }
3260 #endif
3261 
3262 /* set up the kernels */
3263 static void stbi__setup_jpeg(stbi__jpeg *j)
3264 {
3265  j->idct_block_kernel = stbi__idct_block;
3266  j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
3267  j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
3268 
3269 #ifdef STBI_SSE2
3270  if (stbi__sse2_available()) {
3271  j->idct_block_kernel = stbi__idct_simd;
3272  #ifndef STBI_JPEG_OLD
3273  j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3274  #endif
3275  j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3276  }
3277 #endif
3278 
3279 #ifdef STBI_NEON
3280  j->idct_block_kernel = stbi__idct_simd;
3281  #ifndef STBI_JPEG_OLD
3282  j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3283  #endif
3284  j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3285 #endif
3286 }
3287 
3288 /* clean up the temporary component buffers */
3289 static void stbi__cleanup_jpeg(stbi__jpeg *j)
3290 {
3291  int i;
3292  for (i=0; i < j->s->img_n; ++i) {
3293  if (j->img_comp[i].raw_data) {
3294  STBI_FREE(j->img_comp[i].raw_data);
3295  j->img_comp[i].raw_data = NULL;
3296  j->img_comp[i].data = NULL;
3297  }
3298  if (j->img_comp[i].raw_coeff) {
3299  STBI_FREE(j->img_comp[i].raw_coeff);
3300  j->img_comp[i].raw_coeff = 0;
3301  j->img_comp[i].coeff = 0;
3302  }
3303  if (j->img_comp[i].linebuf) {
3304  STBI_FREE(j->img_comp[i].linebuf);
3305  j->img_comp[i].linebuf = NULL;
3306  }
3307  }
3308 }
3309 
3310 typedef struct
3311 {
3312  resample_row_func resample;
3313  stbi_uc *line0,*line1;
3314  int hs,vs; // expansion factor in each axis
3315  int w_lores; // horizontal pixels pre-expansion
3316  int ystep; // how far through vertical expansion we are
3317  int ypos; // which pre-expansion row we're on
3318 } stbi__resample;
3319 
3320 static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
3321 {
3322  int n, decode_n;
3323  z->s->img_n = 0; // make stbi__cleanup_jpeg safe
3324 
3325  // validate req_comp
3326  if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
3327 
3328  // load a jpeg image from whichever source, but leave in YCbCr format
3329  if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
3330 
3331  // determine actual number of components to generate
3332  n = req_comp ? req_comp : z->s->img_n;
3333 
3334  if (z->s->img_n == 3 && n < 3)
3335  decode_n = 1;
3336  else
3337  decode_n = z->s->img_n;
3338 
3339  // resample and color-convert
3340  {
3341  int k;
3342  unsigned int i,j;
3343  stbi_uc *output;
3344  stbi_uc *coutput[4];
3345 
3346  stbi__resample res_comp[4];
3347 
3348  for (k=0; k < decode_n; ++k) {
3349  stbi__resample *r = &res_comp[k];
3350 
3351  // allocate line buffer big enough for upsampling off the edges
3352  // with upsample factor of 4
3353  z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
3354  if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3355 
3356  r->hs = z->img_h_max / z->img_comp[k].h;
3357  r->vs = z->img_v_max / z->img_comp[k].v;
3358  r->ystep = r->vs >> 1;
3359  r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
3360  r->ypos = 0;
3361  r->line0 = r->line1 = z->img_comp[k].data;
3362 
3363  if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
3364  else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
3365  else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
3366  else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
3367  else r->resample = stbi__resample_row_generic;
3368  }
3369 
3370  // can't error after this so, this is safe
3371  output = (stbi_uc *) stbi__malloc(n * z->s->img_x * z->s->img_y + 1);
3372  if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3373 
3374  // now go ahead and resample
3375  for (j=0; j < z->s->img_y; ++j) {
3376  stbi_uc *out = output + n * z->s->img_x * j;
3377  for (k=0; k < decode_n; ++k) {
3378  stbi__resample *r = &res_comp[k];
3379  int y_bot = r->ystep >= (r->vs >> 1);
3380  coutput[k] = r->resample(z->img_comp[k].linebuf,
3381  y_bot ? r->line1 : r->line0,
3382  y_bot ? r->line0 : r->line1,
3383  r->w_lores, r->hs);
3384  if (++r->ystep >= r->vs) {
3385  r->ystep = 0;
3386  r->line0 = r->line1;
3387  if (++r->ypos < z->img_comp[k].y)
3388  r->line1 += z->img_comp[k].w2;
3389  }
3390  }
3391  if (n >= 3) {
3392  stbi_uc *y = coutput[0];
3393  if (z->s->img_n == 3) {
3394  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3395  } else
3396  for (i=0; i < z->s->img_x; ++i) {
3397  out[0] = out[1] = out[2] = y[i];
3398  out[3] = 255; // not used if n==3
3399  out += n;
3400  }
3401  } else {
3402  stbi_uc *y = coutput[0];
3403  if (n == 1)
3404  for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
3405  else
3406  for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255;
3407  }
3408  }
3409  stbi__cleanup_jpeg(z);
3410  *out_x = z->s->img_x;
3411  *out_y = z->s->img_y;
3412  if (comp) *comp = z->s->img_n; // report original components, not output
3413  return output;
3414  }
3415 }
3416 
3417 static unsigned char *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
3418 {
3419  stbi__jpeg j;
3420  j.s = s;
3421  stbi__setup_jpeg(&j);
3422  return load_jpeg_image(&j, x,y,comp,req_comp);
3423 }
3424 
3425 static int stbi__jpeg_test(stbi__context *s)
3426 {
3427  int r;
3428  stbi__jpeg j;
3429  j.s = s;
3430  stbi__setup_jpeg(&j);
3431  r = stbi__decode_jpeg_header(&j, STBI__SCAN_type);
3432  stbi__rewind(s);
3433  return r;
3434 }
3435 
3436 static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
3437 {
3438  if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
3439  stbi__rewind( j->s );
3440  return 0;
3441  }
3442  if (x) *x = j->s->img_x;
3443  if (y) *y = j->s->img_y;
3444  if (comp) *comp = j->s->img_n;
3445  return 1;
3446 }
3447 
3448 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
3449 {
3450  stbi__jpeg j;
3451  j.s = s;
3452  return stbi__jpeg_info_raw(&j, x, y, comp);
3453 }
3454 #endif
3455 
3456 // public domain zlib decode v0.2 Sean Barrett 2006-11-18
3457 // simple implementation
3458 // - all input must be provided in an upfront buffer
3459 // - all output is written to a single output buffer (can malloc/realloc)
3460 // performance
3461 // - fast huffman
3462 
3463 #ifndef STBI_NO_ZLIB
3464 
3465 // fast-way is faster to check than jpeg huffman, but slow way is slower
3466 #define STBI__ZFAST_BITS 9 // accelerate all cases in default tables
3467 #define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1)
3468 
3469 // zlib-style huffman encoding
3470 // (jpegs packs from left, zlib from right, so can't share code)
3471 typedef struct
3472 {
3473  stbi__uint16 fast[1 << STBI__ZFAST_BITS];
3474  stbi__uint16 firstcode[16];
3475  int maxcode[17];
3476  stbi__uint16 firstsymbol[16];
3477  stbi_uc size[288];
3478  stbi__uint16 value[288];
3479 } stbi__zhuffman;
3480 
3481 stbi_inline static int stbi__bitreverse16(int n)
3482 {
3483  n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
3484  n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
3485  n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
3486  n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
3487  return n;
3488 }
3489 
3490 stbi_inline static int stbi__bit_reverse(int v, int bits)
3491 {
3492  STBI_ASSERT(bits <= 16);
3493  // to bit reverse n bits, reverse 16 and shift
3494  // e.g. 11 bits, bit reverse and shift away 5
3495  return stbi__bitreverse16(v) >> (16-bits);
3496 }
3497 
3498 static int stbi__zbuild_huffman(stbi__zhuffman *z, stbi_uc *sizelist, int num)
3499 {
3500  int i,k=0;
3501  int code, next_code[16], sizes[17];
3502 
3503  // DEFLATE spec for generating codes
3504  memset(sizes, 0, sizeof(sizes));
3505  memset(z->fast, 0, sizeof(z->fast));
3506  for (i=0; i < num; ++i)
3507  ++sizes[sizelist[i]];
3508  sizes[0] = 0;
3509  for (i=1; i < 16; ++i)
3510  if (sizes[i] > (1 << i))
3511  return stbi__err("bad sizes", "Corrupt PNG");
3512  code = 0;
3513  for (i=1; i < 16; ++i) {
3514  next_code[i] = code;
3515  z->firstcode[i] = (stbi__uint16) code;
3516  z->firstsymbol[i] = (stbi__uint16) k;
3517  code = (code + sizes[i]);
3518  if (sizes[i])
3519  if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
3520  z->maxcode[i] = code << (16-i); // preshift for inner loop
3521  code <<= 1;
3522  k += sizes[i];
3523  }
3524  z->maxcode[16] = 0x10000; // sentinel
3525  for (i=0; i < num; ++i) {
3526  int s = sizelist[i];
3527  if (s) {
3528  int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
3529  stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
3530  z->size [c] = (stbi_uc ) s;
3531  z->value[c] = (stbi__uint16) i;
3532  if (s <= STBI__ZFAST_BITS) {
3533  int k = stbi__bit_reverse(next_code[s],s);
3534  while (k < (1 << STBI__ZFAST_BITS)) {
3535  z->fast[k] = fastv;
3536  k += (1 << s);
3537  }
3538  }
3539  ++next_code[s];
3540  }
3541  }
3542  return 1;
3543 }
3544 
3545 // zlib-from-memory implementation for PNG reading
3546 // because PNG allows splitting the zlib stream arbitrarily,
3547 // and it's annoying structurally to have PNG call ZLIB call PNG,
3548 // we require PNG read all the IDATs and combine them into a single
3549 // memory buffer
3550 
3551 typedef struct
3552 {
3553  stbi_uc *zbuffer, *zbuffer_end;
3554  int num_bits;
3555  stbi__uint32 code_buffer;
3556 
3557  char *zout;
3558  char *zout_start;
3559  char *zout_end;
3560  int z_expandable;
3561 
3562  stbi__zhuffman z_length, z_distance;
3563 } stbi__zbuf;
3564 
3565 stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
3566 {
3567  if (z->zbuffer >= z->zbuffer_end) return 0;
3568  return *z->zbuffer++;
3569 }
3570 
3571 static void stbi__fill_bits(stbi__zbuf *z)
3572 {
3573  do {
3574  STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
3575  z->code_buffer |= stbi__zget8(z) << z->num_bits;
3576  z->num_bits += 8;
3577  } while (z->num_bits <= 24);
3578 }
3579 
3580 stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
3581 {
3582  unsigned int k;
3583  if (z->num_bits < n) stbi__fill_bits(z);
3584  k = z->code_buffer & ((1 << n) - 1);
3585  z->code_buffer >>= n;
3586  z->num_bits -= n;
3587  return k;
3588 }
3589 
3590 static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
3591 {
3592  int b,s,k;
3593  // not resolved by fast table, so compute it the slow way
3594  // use jpeg approach, which requires MSbits at top
3595  k = stbi__bit_reverse(a->code_buffer, 16);
3596  for (s=STBI__ZFAST_BITS+1; ; ++s)
3597  if (k < z->maxcode[s])
3598  break;
3599  if (s == 16) return -1; // invalid code!
3600  // code size is s, so:
3601  b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
3602  STBI_ASSERT(z->size[b] == s);
3603  a->code_buffer >>= s;
3604  a->num_bits -= s;
3605  return z->value[b];
3606 }
3607 
3608 stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
3609 {
3610  int b,s;
3611  if (a->num_bits < 16) stbi__fill_bits(a);
3612  b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
3613  if (b) {
3614  s = b >> 9;
3615  a->code_buffer >>= s;
3616  a->num_bits -= s;
3617  return b & 511;
3618  }
3619  return stbi__zhuffman_decode_slowpath(a, z);
3620 }
3621 
3622 static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes
3623 {
3624  char *q;
3625  int cur, limit;
3626  z->zout = zout;
3627  if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
3628  cur = (int) (z->zout - z->zout_start);
3629  limit = (int) (z->zout_end - z->zout_start);
3630  while (cur + n > limit)
3631  limit *= 2;
3632  q = (char *) STBI_REALLOC(z->zout_start, limit);
3633  if (q == NULL) return stbi__err("outofmem", "Out of memory");
3634  z->zout_start = q;
3635  z->zout = q + cur;
3636  z->zout_end = q + limit;
3637  return 1;
3638 }
3639 
3640 static int stbi__zlength_base[31] = {
3641  3,4,5,6,7,8,9,10,11,13,
3642  15,17,19,23,27,31,35,43,51,59,
3643  67,83,99,115,131,163,195,227,258,0,0 };
3644 
3645 static int stbi__zlength_extra[31]=
3646 { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
3647 
3648 static int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
3649 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
3650 
3651 static int stbi__zdist_extra[32] =
3652 { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
3653 
3654 static int stbi__parse_huffman_block(stbi__zbuf *a)
3655 {
3656  char *zout = a->zout;
3657  for(;;) {
3658  int z = stbi__zhuffman_decode(a, &a->z_length);
3659  if (z < 256) {
3660  if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
3661  if (zout >= a->zout_end) {
3662  if (!stbi__zexpand(a, zout, 1)) return 0;
3663  zout = a->zout;
3664  }
3665  *zout++ = (char) z;
3666  } else {
3667  stbi_uc *p;
3668  int len,dist;
3669  if (z == 256) {
3670  a->zout = zout;
3671  return 1;
3672  }
3673  z -= 257;
3674  len = stbi__zlength_base[z];
3675  if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
3676  z = stbi__zhuffman_decode(a, &a->z_distance);
3677  if (z < 0) return stbi__err("bad huffman code","Corrupt PNG");
3678  dist = stbi__zdist_base[z];
3679  if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
3680  if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
3681  if (zout + len > a->zout_end) {
3682  if (!stbi__zexpand(a, zout, len)) return 0;
3683  zout = a->zout;
3684  }
3685  p = (stbi_uc *) (zout - dist);
3686  if (dist == 1) { // run of one byte; common in images.
3687  stbi_uc v = *p;
3688  if (len) { do *zout++ = v; while (--len); }
3689  } else {
3690  if (len) { do *zout++ = *p++; while (--len); }
3691  }
3692  }
3693  }
3694 }
3695 
3696 static int stbi__compute_huffman_codes(stbi__zbuf *a)
3697 {
3698  static stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
3699  stbi__zhuffman z_codelength;
3700  stbi_uc lencodes[286+32+137];//padding for maximum single op
3701  stbi_uc codelength_sizes[19];
3702  int i,n;
3703 
3704  int hlit = stbi__zreceive(a,5) + 257;
3705  int hdist = stbi__zreceive(a,5) + 1;
3706  int hclen = stbi__zreceive(a,4) + 4;
3707 
3708  memset(codelength_sizes, 0, sizeof(codelength_sizes));
3709  for (i=0; i < hclen; ++i) {
3710  int s = stbi__zreceive(a,3);
3711  codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
3712  }
3713  if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
3714 
3715  n = 0;
3716  while (n < hlit + hdist) {
3717  int c = stbi__zhuffman_decode(a, &z_codelength);
3718  if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
3719  if (c < 16)
3720  lencodes[n++] = (stbi_uc) c;
3721  else if (c == 16) {
3722  c = stbi__zreceive(a,2)+3;
3723  memset(lencodes+n, lencodes[n-1], c);
3724  n += c;
3725  } else if (c == 17) {
3726  c = stbi__zreceive(a,3)+3;
3727  memset(lencodes+n, 0, c);
3728  n += c;
3729  } else {
3730  STBI_ASSERT(c == 18);
3731  c = stbi__zreceive(a,7)+11;
3732  memset(lencodes+n, 0, c);
3733  n += c;
3734  }
3735  }
3736  if (n != hlit+hdist) return stbi__err("bad codelengths","Corrupt PNG");
3737  if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
3738  if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
3739  return 1;
3740 }
3741 
3742 static int stbi__parse_uncomperssed_block(stbi__zbuf *a)
3743 {
3744  stbi_uc header[4];
3745  int len,nlen,k;
3746  if (a->num_bits & 7)
3747  stbi__zreceive(a, a->num_bits & 7); // discard
3748  // drain the bit-packed data into header
3749  k = 0;
3750  while (a->num_bits > 0) {
3751  header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
3752  a->code_buffer >>= 8;
3753  a->num_bits -= 8;
3754  }
3755  STBI_ASSERT(a->num_bits == 0);
3756  // now fill header the normal way
3757  while (k < 4)
3758  header[k++] = stbi__zget8(a);
3759  len = header[1] * 256 + header[0];
3760  nlen = header[3] * 256 + header[2];
3761  if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
3762  if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
3763  if (a->zout + len > a->zout_end)
3764  if (!stbi__zexpand(a, a->zout, len)) return 0;
3765  memcpy(a->zout, a->zbuffer, len);
3766  a->zbuffer += len;
3767  a->zout += len;
3768  return 1;
3769 }
3770 
3771 static int stbi__parse_zlib_header(stbi__zbuf *a)
3772 {
3773  int cmf = stbi__zget8(a);
3774  int cm = cmf & 15;
3775  /* int cinfo = cmf >> 4; */
3776  int flg = stbi__zget8(a);
3777  if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
3778  if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
3779  if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
3780  // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
3781  return 1;
3782 }
3783 
3784 // @TODO: should statically initialize these for optimal thread safety
3785 static stbi_uc stbi__zdefault_length[288], stbi__zdefault_distance[32];
3786 static void stbi__init_zdefaults(void)
3787 {
3788  int i; // use <= to match clearly with spec
3789  for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8;
3790  for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9;
3791  for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7;
3792  for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8;
3793 
3794  for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5;
3795 }
3796 
3797 static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
3798 {
3799  int final, type;
3800  if (parse_header)
3801  if (!stbi__parse_zlib_header(a)) return 0;
3802  a->num_bits = 0;
3803  a->code_buffer = 0;
3804  do {
3805  final = stbi__zreceive(a,1);
3806  type = stbi__zreceive(a,2);
3807  if (type == 0) {
3808  if (!stbi__parse_uncomperssed_block(a)) return 0;
3809  } else if (type == 3) {
3810  return 0;
3811  } else {
3812  if (type == 1) {
3813  // use fixed code lengths
3814  if (!stbi__zdefault_distance[31]) stbi__init_zdefaults();
3815  if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , 288)) return 0;
3816  if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0;
3817  } else {
3818  if (!stbi__compute_huffman_codes(a)) return 0;
3819  }
3820  if (!stbi__parse_huffman_block(a)) return 0;
3821  }
3822  } while (!final);
3823  return 1;
3824 }
3825 
3826 static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
3827 {
3828  a->zout_start = obuf;
3829  a->zout = obuf;
3830  a->zout_end = obuf + olen;
3831  a->z_expandable = exp;
3832 
3833  return stbi__parse_zlib(a, parse_header);
3834 }
3835 
3836 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
3837 {
3838  stbi__zbuf a;
3839  char *p = (char *) stbi__malloc(initial_size);
3840  if (p == NULL) return NULL;
3841  a.zbuffer = (stbi_uc *) buffer;
3842  a.zbuffer_end = (stbi_uc *) buffer + len;
3843  if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
3844  if (outlen) *outlen = (int) (a.zout - a.zout_start);
3845  return a.zout_start;
3846  } else {
3847  STBI_FREE(a.zout_start);
3848  return NULL;
3849  }
3850 }
3851 
3852 STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
3853 {
3854  return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
3855 }
3856 
3857 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
3858 {
3859  stbi__zbuf a;
3860  char *p = (char *) stbi__malloc(initial_size);
3861  if (p == NULL) return NULL;
3862  a.zbuffer = (stbi_uc *) buffer;
3863  a.zbuffer_end = (stbi_uc *) buffer + len;
3864  if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
3865  if (outlen) *outlen = (int) (a.zout - a.zout_start);
3866  return a.zout_start;
3867  } else {
3868  STBI_FREE(a.zout_start);
3869  return NULL;
3870  }
3871 }
3872 
3873 STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
3874 {
3875  stbi__zbuf a;
3876  a.zbuffer = (stbi_uc *) ibuffer;
3877  a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
3878  if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
3879  return (int) (a.zout - a.zout_start);
3880  else
3881  return -1;
3882 }
3883 
3884 STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
3885 {
3886  stbi__zbuf a;
3887  char *p = (char *) stbi__malloc(16384);
3888  if (p == NULL) return NULL;
3889  a.zbuffer = (stbi_uc *) buffer;
3890  a.zbuffer_end = (stbi_uc *) buffer+len;
3891  if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
3892  if (outlen) *outlen = (int) (a.zout - a.zout_start);
3893  return a.zout_start;
3894  } else {
3895  STBI_FREE(a.zout_start);
3896  return NULL;
3897  }
3898 }
3899 
3900 STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
3901 {
3902  stbi__zbuf a;
3903  a.zbuffer = (stbi_uc *) ibuffer;
3904  a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
3905  if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
3906  return (int) (a.zout - a.zout_start);
3907  else
3908  return -1;
3909 }
3910 #endif
3911 
3912 // public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18
3913 // simple implementation
3914 // - only 8-bit samples
3915 // - no CRC checking
3916 // - allocates lots of intermediate memory
3917 // - avoids problem of streaming data between subsystems
3918 // - avoids explicit window management
3919 // performance
3920 // - uses stb_zlib, a PD zlib implementation with fast huffman decoding
3921 
3922 #ifndef STBI_NO_PNG
3923 typedef struct
3924 {
3925  stbi__uint32 length;
3926  stbi__uint32 type;
3927 } stbi__pngchunk;
3928 
3929 static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
3930 {
3931  stbi__pngchunk c;
3932  c.length = stbi__get32be(s);
3933  c.type = stbi__get32be(s);
3934  return c;
3935 }
3936 
3937 static int stbi__check_png_header(stbi__context *s)
3938 {
3939  static stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
3940  int i;
3941  for (i=0; i < 8; ++i)
3942  if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
3943  return 1;
3944 }
3945 
3946 typedef struct
3947 {
3948  stbi__context *s;
3949  stbi_uc *idata, *expanded, *out;
3950 } stbi__png;
3951 
3952 
3953 enum {
3954  STBI__F_none=0,
3955  STBI__F_sub=1,
3956  STBI__F_up=2,
3957  STBI__F_avg=3,
3958  STBI__F_paeth=4,
3959  // synthetic filters used for first scanline to avoid needing a dummy row of 0s
3960  STBI__F_avg_first,
3961  STBI__F_paeth_first
3962 };
3963 
3964 static stbi_uc first_row_filter[5] =
3965 {
3966  STBI__F_none,
3967  STBI__F_sub,
3968  STBI__F_none,
3969  STBI__F_avg_first,
3970  STBI__F_paeth_first
3971 };
3972 
3973 static int stbi__paeth(int a, int b, int c)
3974 {
3975  int p = a + b - c;
3976  int pa = abs(p-a);
3977  int pb = abs(p-b);
3978  int pc = abs(p-c);
3979  if (pa <= pb && pa <= pc) return a;
3980  if (pb <= pc) return b;
3981  return c;
3982 }
3983 
3984 static stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
3985 
3986 // create the png data from post-deflated data
3987 static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
3988 {
3989  stbi__context *s = a->s;
3990  stbi__uint32 i,j,stride = x*out_n;
3991  stbi__uint32 img_len, img_width_bytes;
3992  int k;
3993  int img_n = s->img_n; // copy it into a local for later
3994 
3995  STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
3996  a->out = (stbi_uc *) stbi__malloc(x * y * out_n); // extra bytes to write off the end into
3997  if (!a->out) return stbi__err("outofmem", "Out of memory");
3998 
3999  img_width_bytes = (((img_n * x * depth) + 7) >> 3);
4000  img_len = (img_width_bytes + 1) * y;
4001  if (s->img_x == x && s->img_y == y) {
4002  if (raw_len != img_len) return stbi__err("not enough pixels","Corrupt PNG");
4003  } else { // interlaced:
4004  if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
4005  }
4006 
4007  for (j=0; j < y; ++j) {
4008  stbi_uc *cur = a->out + stride*j;
4009  stbi_uc *prior = cur - stride;
4010  int filter = *raw++;
4011  int filter_bytes = img_n;
4012  int width = x;
4013  if (filter > 4)
4014  return stbi__err("invalid filter","Corrupt PNG");
4015 
4016  if (depth < 8) {
4017  STBI_ASSERT(img_width_bytes <= x);
4018  cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
4019  filter_bytes = 1;
4020  width = img_width_bytes;
4021  }
4022 
4023  // if first row, use special filter that doesn't sample previous row
4024  if (j == 0) filter = first_row_filter[filter];
4025 
4026  // handle first byte explicitly
4027  for (k=0; k < filter_bytes; ++k) {
4028  switch (filter) {
4029  case STBI__F_none : cur[k] = raw[k]; break;
4030  case STBI__F_sub : cur[k] = raw[k]; break;
4031  case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
4032  case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break;
4033  case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break;
4034  case STBI__F_avg_first : cur[k] = raw[k]; break;
4035  case STBI__F_paeth_first: cur[k] = raw[k]; break;
4036  }
4037  }
4038 
4039  if (depth == 8) {
4040  if (img_n != out_n)
4041  cur[img_n] = 255; // first pixel
4042  raw += img_n;
4043  cur += out_n;
4044  prior += out_n;
4045  } else {
4046  raw += 1;
4047  cur += 1;
4048  prior += 1;
4049  }
4050 
4051  // this is a little gross, so that we don't switch per-pixel or per-component
4052  if (depth < 8 || img_n == out_n) {
4053  int nk = (width - 1)*img_n;
4054  #define CASE(f) \
4055  case f: \
4056  for (k=0; k < nk; ++k)
4057  switch (filter) {
4058  // "none" filter turns into a memcpy here; make that explicit.
4059  case STBI__F_none: memcpy(cur, raw, nk); break;
4060  CASE(STBI__F_sub) cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); break;
4061  CASE(STBI__F_up) cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
4062  CASE(STBI__F_avg) cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); break;
4063  CASE(STBI__F_paeth) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); break;
4064  CASE(STBI__F_avg_first) cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); break;
4065  CASE(STBI__F_paeth_first) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); break;
4066  }
4067  #undef CASE
4068  raw += nk;
4069  } else {
4070  STBI_ASSERT(img_n+1 == out_n);
4071  #define CASE(f) \
4072  case f: \
4073  for (i=x-1; i >= 1; --i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n) \
4074  for (k=0; k < img_n; ++k)
4075  switch (filter) {
4076  CASE(STBI__F_none) cur[k] = raw[k]; break;
4077  CASE(STBI__F_sub) cur[k] = STBI__BYTECAST(raw[k] + cur[k-out_n]); break;
4078  CASE(STBI__F_up) cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
4079  CASE(STBI__F_avg) cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-out_n])>>1)); break;
4080  CASE(STBI__F_paeth) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-out_n],prior[k],prior[k-out_n])); break;
4081  CASE(STBI__F_avg_first) cur[k] = STBI__BYTECAST(raw[k] + (cur[k-out_n] >> 1)); break;
4082  CASE(STBI__F_paeth_first) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-out_n],0,0)); break;
4083  }
4084  #undef CASE
4085  }
4086  }
4087 
4088  // we make a separate pass to expand bits to pixels; for performance,
4089  // this could run two scanlines behind the above code, so it won't
4090  // intefere with filtering but will still be in the cache.
4091  if (depth < 8) {
4092  for (j=0; j < y; ++j) {
4093  stbi_uc *cur = a->out + stride*j;
4094  stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes;
4095  // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
4096  // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
4097  stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
4098 
4099  // note that the final byte might overshoot and write more data than desired.
4100  // we can allocate enough data that this never writes out of memory, but it
4101  // could also overwrite the next scanline. can it overwrite non-empty data
4102  // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
4103  // so we need to explicitly clamp the final ones
4104 
4105  if (depth == 4) {
4106  for (k=x*img_n; k >= 2; k-=2, ++in) {
4107  *cur++ = scale * ((*in >> 4) );
4108  *cur++ = scale * ((*in ) & 0x0f);
4109  }
4110  if (k > 0) *cur++ = scale * ((*in >> 4) );
4111  } else if (depth == 2) {
4112  for (k=x*img_n; k >= 4; k-=4, ++in) {
4113  *cur++ = scale * ((*in >> 6) );
4114  *cur++ = scale * ((*in >> 4) & 0x03);
4115  *cur++ = scale * ((*in >> 2) & 0x03);
4116  *cur++ = scale * ((*in ) & 0x03);
4117  }
4118  if (k > 0) *cur++ = scale * ((*in >> 6) );
4119  if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
4120  if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
4121  } else if (depth == 1) {
4122  for (k=x*img_n; k >= 8; k-=8, ++in) {
4123  *cur++ = scale * ((*in >> 7) );
4124  *cur++ = scale * ((*in >> 6) & 0x01);
4125  *cur++ = scale * ((*in >> 5) & 0x01);
4126  *cur++ = scale * ((*in >> 4) & 0x01);
4127  *cur++ = scale * ((*in >> 3) & 0x01);
4128  *cur++ = scale * ((*in >> 2) & 0x01);
4129  *cur++ = scale * ((*in >> 1) & 0x01);
4130  *cur++ = scale * ((*in ) & 0x01);
4131  }
4132  if (k > 0) *cur++ = scale * ((*in >> 7) );
4133  if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
4134  if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
4135  if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
4136  if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
4137  if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
4138  if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
4139  }
4140  if (img_n != out_n) {
4141  // insert alpha = 255
4142  stbi_uc *cur = a->out + stride*j;
4143  int i;
4144  if (img_n == 1) {
4145  for (i=x-1; i >= 0; --i) {
4146  cur[i*2+1] = 255;
4147  cur[i*2+0] = cur[i];
4148  }
4149  } else {
4150  STBI_ASSERT(img_n == 3);
4151  for (i=x-1; i >= 0; --i) {
4152  cur[i*4+3] = 255;
4153  cur[i*4+2] = cur[i*3+2];
4154  cur[i*4+1] = cur[i*3+1];
4155  cur[i*4+0] = cur[i*3+0];
4156  }
4157  }
4158  }
4159  }
4160  }
4161 
4162  return 1;
4163 }
4164 
4165 static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
4166 {
4167  stbi_uc *final;
4168  int p;
4169  if (!interlaced)
4170  return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
4171 
4172  // de-interlacing
4173  final = (stbi_uc *) stbi__malloc(a->s->img_x * a->s->img_y * out_n);
4174  for (p=0; p < 7; ++p) {
4175  int xorig[] = { 0,4,0,2,0,1,0 };
4176  int yorig[] = { 0,0,4,0,2,0,1 };
4177  int xspc[] = { 8,8,4,4,2,2,1 };
4178  int yspc[] = { 8,8,8,4,4,2,2 };
4179  int i,j,x,y;
4180  // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
4181  x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
4182  y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
4183  if (x && y) {
4184  stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
4185  if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
4186  STBI_FREE(final);
4187  return 0;
4188  }
4189  for (j=0; j < y; ++j) {
4190  for (i=0; i < x; ++i) {
4191  int out_y = j*yspc[p]+yorig[p];
4192  int out_x = i*xspc[p]+xorig[p];
4193  memcpy(final + out_y*a->s->img_x*out_n + out_x*out_n,
4194  a->out + (j*x+i)*out_n, out_n);
4195  }
4196  }
4197  STBI_FREE(a->out);
4198  image_data += img_len;
4199  image_data_len -= img_len;
4200  }
4201  }
4202  a->out = final;
4203 
4204  return 1;
4205 }
4206 
4207 static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
4208 {
4209  stbi__context *s = z->s;
4210  stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4211  stbi_uc *p = z->out;
4212 
4213  // compute color-based transparency, assuming we've
4214  // already got 255 as the alpha value in the output
4215  STBI_ASSERT(out_n == 2 || out_n == 4);
4216 
4217  if (out_n == 2) {
4218  for (i=0; i < pixel_count; ++i) {
4219  p[1] = (p[0] == tc[0] ? 0 : 255);
4220  p += 2;
4221  }
4222  } else {
4223  for (i=0; i < pixel_count; ++i) {
4224  if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4225  p[3] = 0;
4226  p += 4;
4227  }
4228  }
4229  return 1;
4230 }
4231 
4232 static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
4233 {
4234  stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
4235  stbi_uc *p, *temp_out, *orig = a->out;
4236 
4237  p = (stbi_uc *) stbi__malloc(pixel_count * pal_img_n);
4238  if (p == NULL) return stbi__err("outofmem", "Out of memory");
4239 
4240  // between here and free(out) below, exitting would leak
4241  temp_out = p;
4242 
4243  if (pal_img_n == 3) {
4244  for (i=0; i < pixel_count; ++i) {
4245  int n = orig[i]*4;
4246  p[0] = palette[n ];
4247  p[1] = palette[n+1];
4248  p[2] = palette[n+2];
4249  p += 3;
4250  }
4251  } else {
4252  for (i=0; i < pixel_count; ++i) {
4253  int n = orig[i]*4;
4254  p[0] = palette[n ];
4255  p[1] = palette[n+1];
4256  p[2] = palette[n+2];
4257  p[3] = palette[n+3];
4258  p += 4;
4259  }
4260  }
4261  STBI_FREE(a->out);
4262  a->out = temp_out;
4263 
4264  STBI_NOTUSED(len);
4265 
4266  return 1;
4267 }
4268 
4269 static int stbi__unpremultiply_on_load = 0;
4270 static int stbi__de_iphone_flag = 0;
4271 
4272 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
4273 {
4274  stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
4275 }
4276 
4277 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
4278 {
4279  stbi__de_iphone_flag = flag_true_if_should_convert;
4280 }
4281 
4282 static void stbi__de_iphone(stbi__png *z)
4283 {
4284  stbi__context *s = z->s;
4285  stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4286  stbi_uc *p = z->out;
4287 
4288  if (s->img_out_n == 3) { // convert bgr to rgb
4289  for (i=0; i < pixel_count; ++i) {
4290  stbi_uc t = p[0];
4291  p[0] = p[2];
4292  p[2] = t;
4293  p += 3;
4294  }
4295  } else {
4296  STBI_ASSERT(s->img_out_n == 4);
4297  if (stbi__unpremultiply_on_load) {
4298  // convert bgr to rgb and unpremultiply
4299  for (i=0; i < pixel_count; ++i) {
4300  stbi_uc a = p[3];
4301  stbi_uc t = p[0];
4302  if (a) {
4303  p[0] = p[2] * 255 / a;
4304  p[1] = p[1] * 255 / a;
4305  p[2] = t * 255 / a;
4306  } else {
4307  p[0] = p[2];
4308  p[2] = t;
4309  }
4310  p += 4;
4311  }
4312  } else {
4313  // convert bgr to rgb
4314  for (i=0; i < pixel_count; ++i) {
4315  stbi_uc t = p[0];
4316  p[0] = p[2];
4317  p[2] = t;
4318  p += 4;
4319  }
4320  }
4321  }
4322 }
4323 
4324 #define STBI__PNG_TYPE(a,b,c,d) (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
4325 
4326 static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
4327 {
4328  stbi_uc palette[1024], pal_img_n=0;
4329  stbi_uc has_trans=0, tc[3];
4330  stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
4331  int first=1,k,interlace=0, color=0, depth=0, is_iphone=0;
4332  stbi__context *s = z->s;
4333 
4334  z->expanded = NULL;
4335  z->idata = NULL;
4336  z->out = NULL;
4337 
4338  if (!stbi__check_png_header(s)) return 0;
4339 
4340  if (scan == STBI__SCAN_type) return 1;
4341 
4342  for (;;) {
4343  stbi__pngchunk c = stbi__get_chunk_header(s);
4344  switch (c.type) {
4345  case STBI__PNG_TYPE('C','g','B','I'):
4346  is_iphone = 1;
4347  stbi__skip(s, c.length);
4348  break;
4349  case STBI__PNG_TYPE('I','H','D','R'): {
4350  int comp,filter;
4351  if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
4352  first = 0;
4353  if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
4354  s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4355  s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4356  depth = stbi__get8(s); if (depth != 1 && depth != 2 && depth != 4 && depth != 8) return stbi__err("1/2/4/8-bit only","PNG not supported: 1/2/4/8-bit only");
4357  color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG");
4358  if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
4359  comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG");
4360  filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG");
4361  interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
4362  if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
4363  if (!pal_img_n) {
4364  s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
4365  if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
4366  if (scan == STBI__SCAN_header) return 1;
4367  } else {
4368  // if paletted, then pal_n is our final components, and
4369  // img_n is # components to decompress/filter.
4370  s->img_n = 1;
4371  if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
4372  // if SCAN_header, have to scan to see if we have a tRNS
4373  }
4374  break;
4375  }
4376 
4377  case STBI__PNG_TYPE('P','L','T','E'): {
4378  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4379  if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
4380  pal_len = c.length / 3;
4381  if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
4382  for (i=0; i < pal_len; ++i) {
4383  palette[i*4+0] = stbi__get8(s);
4384  palette[i*4+1] = stbi__get8(s);
4385  palette[i*4+2] = stbi__get8(s);
4386  palette[i*4+3] = 255;
4387  }
4388  break;
4389  }
4390 
4391  case STBI__PNG_TYPE('t','R','N','S'): {
4392  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4393  if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
4394  if (pal_img_n) {
4395  if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
4396  if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
4397  if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
4398  pal_img_n = 4;
4399  for (i=0; i < c.length; ++i)
4400  palette[i*4+3] = stbi__get8(s);
4401  } else {
4402  if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
4403  if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
4404  has_trans = 1;
4405  for (k=0; k < s->img_n; ++k)
4406  tc[k] = (stbi_uc) (stbi__get16be(s) & 255) * stbi__depth_scale_table[depth]; // non 8-bit images will be larger
4407  }
4408  break;
4409  }
4410 
4411  case STBI__PNG_TYPE('I','D','A','T'): {
4412  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4413  if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
4414  if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
4415  if ((int)(ioff + c.length) < (int)ioff) return 0;
4416  if (ioff + c.length > idata_limit) {
4417  stbi_uc *p;
4418  if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
4419  while (ioff + c.length > idata_limit)
4420  idata_limit *= 2;
4421  p = (stbi_uc *) STBI_REALLOC(z->idata, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
4422  z->idata = p;
4423  }
4424  if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
4425  ioff += c.length;
4426  break;
4427  }
4428 
4429  case STBI__PNG_TYPE('I','E','N','D'): {
4430  stbi__uint32 raw_len, bpl;
4431  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4432  if (scan != STBI__SCAN_load) return 1;
4433  if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
4434  // initial guess for decoded data size to avoid unnecessary reallocs
4435  bpl = (s->img_x * depth + 7) / 8; // bytes per line, per component
4436  raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
4437  z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
4438  if (z->expanded == NULL) return 0; // zlib should set error
4439  STBI_FREE(z->idata); z->idata = NULL;
4440  if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
4441  s->img_out_n = s->img_n+1;
4442  else
4443  s->img_out_n = s->img_n;
4444  if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, depth, color, interlace)) return 0;
4445  if (has_trans)
4446  if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
4447  if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
4448  stbi__de_iphone(z);
4449  if (pal_img_n) {
4450  // pal_img_n == 3 or 4
4451  s->img_n = pal_img_n; // record the actual colors we had
4452  s->img_out_n = pal_img_n;
4453  if (req_comp >= 3) s->img_out_n = req_comp;
4454  if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
4455  return 0;
4456  }
4457  STBI_FREE(z->expanded); z->expanded = NULL;
4458  return 1;
4459  }
4460 
4461  default:
4462  // if critical, fail
4463  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4464  if ((c.type & (1 << 29)) == 0) {
4465  #ifndef STBI_NO_FAILURE_STRINGS
4466  // not threadsafe
4467  static char invalid_chunk[] = "XXXX PNG chunk not known";
4468  invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
4469  invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
4470  invalid_chunk[2] = STBI__BYTECAST(c.type >> 8);
4471  invalid_chunk[3] = STBI__BYTECAST(c.type >> 0);
4472  #endif
4473  return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
4474  }
4475  stbi__skip(s, c.length);
4476  break;
4477  }
4478  // end of PNG chunk, read and skip CRC
4479  stbi__get32be(s);
4480  }
4481 }
4482 
4483 static unsigned char *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp)
4484 {
4485  unsigned char *result=NULL;
4486  if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
4487  if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
4488  result = p->out;
4489  p->out = NULL;
4490  if (req_comp && req_comp != p->s->img_out_n) {
4491  result = stbi__convert_format(result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4492  p->s->img_out_n = req_comp;
4493  if (result == NULL) return result;
4494  }
4495  *x = p->s->img_x;
4496  *y = p->s->img_y;
4497  if (n) *n = p->s->img_out_n;
4498  }
4499  STBI_FREE(p->out); p->out = NULL;
4500  STBI_FREE(p->expanded); p->expanded = NULL;
4501  STBI_FREE(p->idata); p->idata = NULL;
4502 
4503  return result;
4504 }
4505 
4506 static unsigned char *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
4507 {
4508  stbi__png p;
4509  p.s = s;
4510  return stbi__do_png(&p, x,y,comp,req_comp);
4511 }
4512 
4513 static int stbi__png_test(stbi__context *s)
4514 {
4515  int r;
4516  r = stbi__check_png_header(s);
4517  stbi__rewind(s);
4518  return r;
4519 }
4520 
4521 static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
4522 {
4523  if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
4524  stbi__rewind( p->s );
4525  return 0;
4526  }
4527  if (x) *x = p->s->img_x;
4528  if (y) *y = p->s->img_y;
4529  if (comp) *comp = p->s->img_n;
4530  return 1;
4531 }
4532 
4533 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
4534 {
4535  stbi__png p;
4536  p.s = s;
4537  return stbi__png_info_raw(&p, x, y, comp);
4538 }
4539 #endif
4540 
4541 // Microsoft/Windows BMP image
4542 
4543 #ifndef STBI_NO_BMP
4544 static int stbi__bmp_test_raw(stbi__context *s)
4545 {
4546  int r;
4547  int sz;
4548  if (stbi__get8(s) != 'B') return 0;
4549  if (stbi__get8(s) != 'M') return 0;
4550  stbi__get32le(s); // discard filesize
4551  stbi__get16le(s); // discard reserved
4552  stbi__get16le(s); // discard reserved
4553  stbi__get32le(s); // discard data offset
4554  sz = stbi__get32le(s);
4555  r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
4556  return r;
4557 }
4558 
4559 static int stbi__bmp_test(stbi__context *s)
4560 {
4561  int r = stbi__bmp_test_raw(s);
4562  stbi__rewind(s);
4563  return r;
4564 }
4565 
4566 
4567 // returns 0..31 for the highest set bit
4568 static int stbi__high_bit(unsigned int z)
4569 {
4570  int n=0;
4571  if (z == 0) return -1;
4572  if (z >= 0x10000) n += 16, z >>= 16;
4573  if (z >= 0x00100) n += 8, z >>= 8;
4574  if (z >= 0x00010) n += 4, z >>= 4;
4575  if (z >= 0x00004) n += 2, z >>= 2;
4576  if (z >= 0x00002) n += 1, z >>= 1;
4577  return n;
4578 }
4579 
4580 static int stbi__bitcount(unsigned int a)
4581 {
4582  a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2
4583  a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4
4584  a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
4585  a = (a + (a >> 8)); // max 16 per 8 bits
4586  a = (a + (a >> 16)); // max 32 per 8 bits
4587  return a & 0xff;
4588 }
4589 
4590 static int stbi__shiftsigned(int v, int shift, int bits)
4591 {
4592  int result;
4593  int z=0;
4594 
4595  if (shift < 0) v <<= -shift;
4596  else v >>= shift;
4597  result = v;
4598 
4599  z = bits;
4600  while (z < 8) {
4601  result += v >> z;
4602  z += bits;
4603  }
4604  return result;
4605 }
4606 
4607 static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
4608 {
4609  stbi_uc *out;
4610  unsigned int mr=0,mg=0,mb=0,ma=0;
4611  stbi_uc pal[256][4];
4612  int psize=0,i,j,compress=0,width;
4613  int bpp, flip_vertically, pad, target, offset, hsz;
4614  if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
4615  stbi__get32le(s); // discard filesize
4616  stbi__get16le(s); // discard reserved
4617  stbi__get16le(s); // discard reserved
4618  offset = stbi__get32le(s);
4619  hsz = stbi__get32le(s);
4620  if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
4621  if (hsz == 12) {
4622  s->img_x = stbi__get16le(s);
4623  s->img_y = stbi__get16le(s);
4624  } else {
4625  s->img_x = stbi__get32le(s);
4626  s->img_y = stbi__get32le(s);
4627  }
4628  if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
4629  bpp = stbi__get16le(s);
4630  if (bpp == 1) return stbi__errpuc("monochrome", "BMP type not supported: 1-bit");
4631  flip_vertically = ((int) s->img_y) > 0;
4632  s->img_y = abs((int) s->img_y);
4633  if (hsz == 12) {
4634  if (bpp < 24)
4635  psize = (offset - 14 - 24) / 3;
4636  } else {
4637  compress = stbi__get32le(s);
4638  if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
4639  stbi__get32le(s); // discard sizeof
4640  stbi__get32le(s); // discard hres
4641  stbi__get32le(s); // discard vres
4642  stbi__get32le(s); // discard colorsused
4643  stbi__get32le(s); // discard max important
4644  if (hsz == 40 || hsz == 56) {
4645  if (hsz == 56) {
4646  stbi__get32le(s);
4647  stbi__get32le(s);
4648  stbi__get32le(s);
4649  stbi__get32le(s);
4650  }
4651  if (bpp == 16 || bpp == 32)
4652  {
4653  mr = mg = mb = 0;
4654  if (compress == 0)
4655  {
4656  if (bpp == 32)
4657  {
4658  mr = 0xffu << 16;
4659  mg = 0xffu << 8;
4660  mb = 0xffu << 0;
4661  ma = 0xffu << 24;
4662  }
4663  else
4664  {
4665  mr = 31u << 10;
4666  mg = 31u << 5;
4667  mb = 31u << 0;
4668  }
4669  } else if (compress == 3) {
4670  mr = stbi__get32le(s);
4671  mg = stbi__get32le(s);
4672  mb = stbi__get32le(s);
4673  // not documented, but generated by photoshop and handled by mspaint
4674  if (mr == mg && mg == mb) {
4675  // ?!?!?
4676  return stbi__errpuc("bad BMP", "bad BMP");
4677  }
4678  } else
4679  return stbi__errpuc("bad BMP", "bad BMP");
4680  }
4681  } else {
4682  STBI_ASSERT(hsz == 108 || hsz == 124);
4683  mr = stbi__get32le(s);
4684  mg = stbi__get32le(s);
4685  mb = stbi__get32le(s);
4686  ma = stbi__get32le(s);
4687  stbi__get32le(s); // discard color space
4688  for (i=0; i < 12; ++i)
4689  stbi__get32le(s); // discard color space parameters
4690  if (hsz == 124) {
4691  stbi__get32le(s); // discard rendering intent
4692  stbi__get32le(s); // discard offset of profile data
4693  stbi__get32le(s); // discard size of profile data
4694  stbi__get32le(s); // discard reserved
4695  }
4696  }
4697  if (bpp < 16)
4698  psize = (offset - 14 - hsz) >> 2;
4699  }
4700  s->img_n = ma ? 4 : 3;
4701  if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
4702  target = req_comp;
4703  else
4704  target = s->img_n; // if they want monochrome, we'll post-convert
4705  out = (stbi_uc *) stbi__malloc(target * s->img_x * s->img_y);
4706  if (!out) return stbi__errpuc("outofmem", "Out of memory");
4707  if (bpp < 16) {
4708  int z=0;
4709  if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
4710  for (i=0; i < psize; ++i) {
4711  pal[i][2] = stbi__get8(s);
4712  pal[i][1] = stbi__get8(s);
4713  pal[i][0] = stbi__get8(s);
4714  if (hsz != 12) stbi__get8(s);
4715  pal[i][3] = 255;
4716  }
4717  stbi__skip(s, offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4));
4718  if (bpp == 4) width = (s->img_x + 1) >> 1;
4719  else if (bpp == 8) width = s->img_x;
4720  else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
4721  pad = (-width)&3;
4722  for (j=0; j < (int) s->img_y; ++j) {
4723  for (i=0; i < (int) s->img_x; i += 2) {
4724  int v=stbi__get8(s),v2=0;
4725  if (bpp == 4) {
4726  v2 = v & 15;
4727  v >>= 4;
4728  }
4729  out[z++] = pal[v][0];
4730  out[z++] = pal[v][1];
4731  out[z++] = pal[v][2];
4732  if (target == 4) out[z++] = 255;
4733  if (i+1 == (int) s->img_x) break;
4734  v = (bpp == 8) ? stbi__get8(s) : v2;
4735  out[z++] = pal[v][0];
4736  out[z++] = pal[v][1];
4737  out[z++] = pal[v][2];
4738  if (target == 4) out[z++] = 255;
4739  }
4740  stbi__skip(s, pad);
4741  }
4742  } else {
4743  int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
4744  int z = 0;
4745  int easy=0;
4746  stbi__skip(s, offset - 14 - hsz);
4747  if (bpp == 24) width = 3 * s->img_x;
4748  else if (bpp == 16) width = 2*s->img_x;
4749  else /* bpp = 32 and pad = 0 */ width=0;
4750  pad = (-width) & 3;
4751  if (bpp == 24) {
4752  easy = 1;
4753  } else if (bpp == 32) {
4754  if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
4755  easy = 2;
4756  }
4757  if (!easy) {
4758  if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
4759  // right shift amt to put high bit in position #7
4760  rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
4761  gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
4762  bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
4763  ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
4764  }
4765  for (j=0; j < (int) s->img_y; ++j) {
4766  if (easy) {
4767  for (i=0; i < (int) s->img_x; ++i) {
4768  unsigned char a;
4769  out[z+2] = stbi__get8(s);
4770  out[z+1] = stbi__get8(s);
4771  out[z+0] = stbi__get8(s);
4772  z += 3;
4773  a = (easy == 2 ? stbi__get8(s) : 255);
4774  if (target == 4) out[z++] = a;
4775  }
4776  } else {
4777  for (i=0; i < (int) s->img_x; ++i) {
4778  stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
4779  int a;
4780  out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
4781  out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
4782  out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
4783  a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
4784  if (target == 4) out[z++] = STBI__BYTECAST(a);
4785  }
4786  }
4787  stbi__skip(s, pad);
4788  }
4789  }
4790  if (flip_vertically) {
4791  stbi_uc t;
4792  for (j=0; j < (int) s->img_y>>1; ++j) {
4793  stbi_uc *p1 = out + j *s->img_x*target;
4794  stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
4795  for (i=0; i < (int) s->img_x*target; ++i) {
4796  t = p1[i], p1[i] = p2[i], p2[i] = t;
4797  }
4798  }
4799  }
4800 
4801  if (req_comp && req_comp != target) {
4802  out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
4803  if (out == NULL) return out; // stbi__convert_format frees input on failure
4804  }
4805 
4806  *x = s->img_x;
4807  *y = s->img_y;
4808  if (comp) *comp = s->img_n;
4809  return out;
4810 }
4811 #endif
4812 
4813 // Targa Truevision - TGA
4814 // by Jonathan Dummer
4815 #ifndef STBI_NO_TGA
4816 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
4817 {
4818  int tga_w, tga_h, tga_comp;
4819  int sz;
4820  stbi__get8(s); // discard Offset
4821  sz = stbi__get8(s); // color type
4822  if( sz > 1 ) {
4823  stbi__rewind(s);
4824  return 0; // only RGB or indexed allowed
4825  }
4826  sz = stbi__get8(s); // image type
4827  // only RGB or grey allowed, +/- RLE
4828  if ((sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11)) return 0;
4829  stbi__skip(s,9);
4830  tga_w = stbi__get16le(s);
4831  if( tga_w < 1 ) {
4832  stbi__rewind(s);
4833  return 0; // test width
4834  }
4835  tga_h = stbi__get16le(s);
4836  if( tga_h < 1 ) {
4837  stbi__rewind(s);
4838  return 0; // test height
4839  }
4840  sz = stbi__get8(s); // bits per pixel
4841  // only RGB or RGBA or grey allowed
4842  if ((sz != 8) && (sz != 16) && (sz != 24) && (sz != 32)) {
4843  stbi__rewind(s);
4844  return 0;
4845  }
4846  tga_comp = sz;
4847  if (x) *x = tga_w;
4848  if (y) *y = tga_h;
4849  if (comp) *comp = tga_comp / 8;
4850  return 1; // seems to have passed everything
4851 }
4852 
4853 static int stbi__tga_test(stbi__context *s)
4854 {
4855  int res;
4856  int sz;
4857  stbi__get8(s); // discard Offset
4858  sz = stbi__get8(s); // color type
4859  if ( sz > 1 ) return 0; // only RGB or indexed allowed
4860  sz = stbi__get8(s); // image type
4861  if ( (sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11) ) return 0; // only RGB or grey allowed, +/- RLE
4862  stbi__get16be(s); // discard palette start
4863  stbi__get16be(s); // discard palette length
4864  stbi__get8(s); // discard bits per palette color entry
4865  stbi__get16be(s); // discard x origin
4866  stbi__get16be(s); // discard y origin
4867  if ( stbi__get16be(s) < 1 ) return 0; // test width
4868  if ( stbi__get16be(s) < 1 ) return 0; // test height
4869  sz = stbi__get8(s); // bits per pixel
4870  if ( (sz != 8) && (sz != 16) && (sz != 24) && (sz != 32) )
4871  res = 0;
4872  else
4873  res = 1;
4874  stbi__rewind(s);
4875  return res;
4876 }
4877 
4878 static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
4879 {
4880  // read in the TGA header stuff
4881  int tga_offset = stbi__get8(s);
4882  int tga_indexed = stbi__get8(s);
4883  int tga_image_type = stbi__get8(s);
4884  int tga_is_RLE = 0;
4885  int tga_palette_start = stbi__get16le(s);
4886  int tga_palette_len = stbi__get16le(s);
4887  int tga_palette_bits = stbi__get8(s);
4888  int tga_x_origin = stbi__get16le(s);
4889  int tga_y_origin = stbi__get16le(s);
4890  int tga_width = stbi__get16le(s);
4891  int tga_height = stbi__get16le(s);
4892  int tga_bits_per_pixel = stbi__get8(s);
4893  int tga_comp = tga_bits_per_pixel / 8;
4894  int tga_inverted = stbi__get8(s);
4895  // image data
4896  unsigned char *tga_data;
4897  unsigned char *tga_palette = NULL;
4898  int i, j;
4899  unsigned char raw_data[4] = {0};
4900  int RLE_count = 0;
4901  int RLE_repeating = 0;
4902  int read_next_pixel = 1;
4903 
4904  // do a tiny bit of precessing
4905  if ( tga_image_type >= 8 )
4906  {
4907  tga_image_type -= 8;
4908  tga_is_RLE = 1;
4909  }
4910  /* int tga_alpha_bits = tga_inverted & 15; */
4911  tga_inverted = 1 - ((tga_inverted >> 5) & 1);
4912 
4913  // error check
4914  if ( //(tga_indexed) ||
4915  (tga_width < 1) || (tga_height < 1) ||
4916  (tga_image_type < 1) || (tga_image_type > 3) ||
4917  ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16) &&
4918  (tga_bits_per_pixel != 24) && (tga_bits_per_pixel != 32))
4919  )
4920  {
4921  return NULL; // we don't report this as a bad TGA because we don't even know if it's TGA
4922  }
4923 
4924  // If I'm paletted, then I'll use the number of bits from the palette
4925  if ( tga_indexed )
4926  {
4927  tga_comp = tga_palette_bits / 8;
4928  }
4929 
4930  // tga info
4931  *x = tga_width;
4932  *y = tga_height;
4933  if (comp) *comp = tga_comp;
4934 
4935  tga_data = (unsigned char*)stbi__malloc( (size_t)tga_width * tga_height * tga_comp );
4936  if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
4937 
4938  // skip to the data's starting position (offset usually = 0)
4939  stbi__skip(s, tga_offset );
4940 
4941  if ( !tga_indexed && !tga_is_RLE) {
4942  for (i=0; i < tga_height; ++i) {
4943  int y = tga_inverted ? tga_height -i - 1 : i;
4944  stbi_uc *tga_row = tga_data + y*tga_width*tga_comp;
4945  stbi__getn(s, tga_row, tga_width * tga_comp);
4946  }
4947  } else {
4948  // do I need to load a palette?
4949  if ( tga_indexed)
4950  {
4951  // any data to skip? (offset usually = 0)
4952  stbi__skip(s, tga_palette_start );
4953  // load the palette
4954  tga_palette = (unsigned char*)stbi__malloc( tga_palette_len * tga_palette_bits / 8 );
4955  if (!tga_palette) {
4956  STBI_FREE(tga_data);
4957  return stbi__errpuc("outofmem", "Out of memory");
4958  }
4959  if (!stbi__getn(s, tga_palette, tga_palette_len * tga_palette_bits / 8 )) {
4960  STBI_FREE(tga_data);
4961  STBI_FREE(tga_palette);
4962  return stbi__errpuc("bad palette", "Corrupt TGA");
4963  }
4964  }
4965  // load the data
4966  for (i=0; i < tga_width * tga_height; ++i)
4967  {
4968  // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
4969  if ( tga_is_RLE )
4970  {
4971  if ( RLE_count == 0 )
4972  {
4973  // yep, get the next byte as a RLE command
4974  int RLE_cmd = stbi__get8(s);
4975  RLE_count = 1 + (RLE_cmd & 127);
4976  RLE_repeating = RLE_cmd >> 7;
4977  read_next_pixel = 1;
4978  } else if ( !RLE_repeating )
4979  {
4980  read_next_pixel = 1;
4981  }
4982  } else
4983  {
4984  read_next_pixel = 1;
4985  }
4986  // OK, if I need to read a pixel, do it now
4987  if ( read_next_pixel )
4988  {
4989  // load however much data we did have
4990  if ( tga_indexed )
4991  {
4992  // read in 1 byte, then perform the lookup
4993  int pal_idx = stbi__get8(s);
4994  if ( pal_idx >= tga_palette_len )
4995  {
4996  // invalid index
4997  pal_idx = 0;
4998  }
4999  pal_idx *= tga_bits_per_pixel / 8;
5000  for (j = 0; j*8 < tga_bits_per_pixel; ++j)
5001  {
5002  raw_data[j] = tga_palette[pal_idx+j];
5003  }
5004  } else
5005  {
5006  // read in the data raw
5007  for (j = 0; j*8 < tga_bits_per_pixel; ++j)
5008  {
5009  raw_data[j] = stbi__get8(s);
5010  }
5011  }
5012  // clear the reading flag for the next pixel
5013  read_next_pixel = 0;
5014  } // end of reading a pixel
5015 
5016  // copy data
5017  for (j = 0; j < tga_comp; ++j)
5018  tga_data[i*tga_comp+j] = raw_data[j];
5019 
5020  // in case we're in RLE mode, keep counting down
5021  --RLE_count;
5022  }
5023  // do I need to invert the image?
5024  if ( tga_inverted )
5025  {
5026  for (j = 0; j*2 < tga_height; ++j)
5027  {
5028  int index1 = j * tga_width * tga_comp;
5029  int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
5030  for (i = tga_width * tga_comp; i > 0; --i)
5031  {
5032  unsigned char temp = tga_data[index1];
5033  tga_data[index1] = tga_data[index2];
5034  tga_data[index2] = temp;
5035  ++index1;
5036  ++index2;
5037  }
5038  }
5039  }
5040  // clear my palette, if I had one
5041  if ( tga_palette != NULL )
5042  {
5043  STBI_FREE( tga_palette );
5044  }
5045  }
5046 
5047  // swap RGB
5048  if (tga_comp >= 3)
5049  {
5050  unsigned char* tga_pixel = tga_data;
5051  for (i=0; i < tga_width * tga_height; ++i)
5052  {
5053  unsigned char temp = tga_pixel[0];
5054  tga_pixel[0] = tga_pixel[2];
5055  tga_pixel[2] = temp;
5056  tga_pixel += tga_comp;
5057  }
5058  }
5059 
5060  // convert to target component count
5061  if (req_comp && req_comp != tga_comp)
5062  tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
5063 
5064  // the things I do to get rid of an error message, and yet keep
5065  // Microsoft's C compilers happy... [8^(
5066  tga_palette_start = tga_palette_len = tga_palette_bits =
5067  tga_x_origin = tga_y_origin = 0;
5068  // OK, done
5069  return tga_data;
5070 }
5071 #endif
5072 
5073 /* Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB */
5074 
5075 #ifndef STBI_NO_PSD
5076 static int stbi__psd_test(stbi__context *s)
5077 {
5078  int r = (stbi__get32be(s) == 0x38425053);
5079  stbi__rewind(s);
5080  return r;
5081 }
5082 
5083 static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
5084 {
5085  int pixelCount;
5086  int channelCount, compression;
5087  int channel, i, count, len;
5088  int w,h;
5089  stbi_uc *out;
5090 
5091  /* Check identifier */
5092  if (stbi__get32be(s) != 0x38425053) // "8BPS"
5093  return stbi__errpuc("not PSD", "Corrupt PSD image");
5094 
5095  /* Check file type version. */
5096  if (stbi__get16be(s) != 1)
5097  return stbi__errpuc("wrong version", "Unsupported version of PSD image");
5098 
5099  /* Skip 6 reserved bytes. */
5100  stbi__skip(s, 6 );
5101 
5102  /* Read the number of channels (R, G, B, A, etc). */
5103  channelCount = stbi__get16be(s);
5104  if (channelCount < 0 || channelCount > 16)
5105  return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
5106 
5107  /* Read the rows and columns of the image. */
5108  h = stbi__get32be(s);
5109  w = stbi__get32be(s);
5110 
5111  /* Make sure the depth is 8 bits. */
5112  if (stbi__get16be(s) != 8)
5113  return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 bit");
5114 
5115  // Make sure the color mode is RGB.
5116  // Valid options are:
5117  // 0: Bitmap
5118  // 1: Grayscale
5119  // 2: Indexed color
5120  // 3: RGB color
5121  // 4: CMYK color
5122  // 7: Multichannel
5123  // 8: Duotone
5124  // 9: Lab color
5125  if (stbi__get16be(s) != 3)
5126  return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
5127 
5128  /* Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) */
5129  stbi__skip(s,stbi__get32be(s) );
5130 
5131  /* Skip the image resources. (resolution, pen tool paths, etc) */
5132  stbi__skip(s, stbi__get32be(s) );
5133 
5134  /* Skip the reserved data. */
5135  stbi__skip(s, stbi__get32be(s) );
5136 
5137  // Find out if the data is compressed.
5138  // Known values:
5139  // 0: no compression
5140  // 1: RLE compressed
5141  compression = stbi__get16be(s);
5142  if (compression > 1)
5143  return stbi__errpuc("bad compression", "PSD has an unknown compression format");
5144 
5145  // Create the destination image.
5146  out = (stbi_uc *) stbi__malloc(4 * w*h);
5147  if (!out) return stbi__errpuc("outofmem", "Out of memory");
5148  pixelCount = w*h;
5149 
5150  /* Finally, the image data. */
5151  if (compression)
5152  {
5153  // RLE as used by .PSD and .TIFF
5154  // Loop until you get the number of unpacked bytes you are expecting:
5155  // Read the next source byte into n.
5156  // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
5157  // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
5158  // Else if n is 128, noop.
5159  // Endloop
5160 
5161  /* The RLE-compressed data is preceeded by a 2-byte data count
5162  * for each row in the data, which we're going to just skip. */
5163  stbi__skip(s, h * channelCount * 2 );
5164 
5165  // Read the RLE data by channel.
5166  for (channel = 0; channel < 4; channel++) {
5167  stbi_uc *p;
5168 
5169  p = out+channel;
5170  if (channel >= channelCount) {
5171  // Fill this channel with default data.
5172  for (i = 0; i < pixelCount; i++, p += 4)
5173  *p = (channel == 3 ? 255 : 0);
5174  } else {
5175  // Read the RLE data.
5176  count = 0;
5177  while (count < pixelCount) {
5178  len = stbi__get8(s);
5179  if (len == 128) {
5180  // No-op.
5181  } else if (len < 128) {
5182  // Copy next len+1 bytes literally.
5183  len++;
5184  count += len;
5185  while (len) {
5186  *p = stbi__get8(s);
5187  p += 4;
5188  len--;
5189  }
5190  } else if (len > 128) {
5191  stbi_uc val;
5192  // Next -len+1 bytes in the dest are replicated from next source byte.
5193  // (Interpret len as a negative 8-bit int.)
5194  len ^= 0x0FF;
5195  len += 2;
5196  val = stbi__get8(s);
5197  count += len;
5198  while (len) {
5199  *p = val;
5200  p += 4;
5201  len--;
5202  }
5203  }
5204  }
5205  }
5206  }
5207 
5208  } else {
5209  // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...)
5210  // where each channel consists of an 8-bit value for each pixel in the image.
5211 
5212  // Read the data by channel.
5213  for (channel = 0; channel < 4; channel++) {
5214  stbi_uc *p;
5215 
5216  p = out + channel;
5217  if (channel > channelCount) {
5218  // Fill this channel with default data.
5219  for (i = 0; i < pixelCount; i++, p += 4)
5220  *p = channel == 3 ? 255 : 0;
5221  } else {
5222  // Read the data.
5223  for (i = 0; i < pixelCount; i++, p += 4)
5224  *p = stbi__get8(s);
5225  }
5226  }
5227  }
5228 
5229  if (req_comp && req_comp != 4) {
5230  out = stbi__convert_format(out, 4, req_comp, w, h);
5231  if (out == NULL) return out; // stbi__convert_format frees input on failure
5232  }
5233 
5234  if (comp) *comp = 4;
5235  *y = h;
5236  *x = w;
5237 
5238  return out;
5239 }
5240 #endif
5241 
5242 /* *************************************************************************************************
5243  * Softimage PIC loader
5244  * by Tom Seddon
5245  *
5246  * See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
5247  * See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
5248  */
5249 
5250 #ifndef STBI_NO_PIC
5251 static int stbi__pic_is4(stbi__context *s,const char *str)
5252 {
5253  int i;
5254  for (i=0; i<4; ++i)
5255  if (stbi__get8(s) != (stbi_uc)str[i])
5256  return 0;
5257 
5258  return 1;
5259 }
5260 
5261 static int stbi__pic_test_core(stbi__context *s)
5262 {
5263  int i;
5264 
5265  if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
5266  return 0;
5267 
5268  for(i=0;i<84;++i)
5269  stbi__get8(s);
5270 
5271  if (!stbi__pic_is4(s,"PICT"))
5272  return 0;
5273 
5274  return 1;
5275 }
5276 
5277 typedef struct
5278 {
5280 } stbi__pic_packet;
5281 
5282 static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
5283 {
5284  int mask=0x80, i;
5285 
5286  for (i=0; i<4; ++i, mask>>=1) {
5287  if (channel & mask) {
5288  if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
5289  dest[i]=stbi__get8(s);
5290  }
5291  }
5292 
5293  return dest;
5294 }
5295 
5296 static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
5297 {
5298  int mask=0x80,i;
5299 
5300  for (i=0;i<4; ++i, mask>>=1)
5301  if (channel&mask)
5302  dest[i]=src[i];
5303 }
5304 
5305 static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
5306 {
5307  int act_comp=0,num_packets=0,y,chained;
5308  stbi__pic_packet packets[10];
5309 
5310  /* this will (should...) cater for even some bizarre stuff like having data
5311  * for the same channel in multiple packets.
5312  */
5313  do
5314  {
5315  stbi__pic_packet *packet;
5316 
5317  if (num_packets==sizeof(packets)/sizeof(packets[0]))
5318  return stbi__errpuc("bad format","too many packets");
5319 
5320  packet = &packets[num_packets++];
5321 
5322  chained = stbi__get8(s);
5323  packet->size = stbi__get8(s);
5324  packet->type = stbi__get8(s);
5325  packet->channel = stbi__get8(s);
5326 
5327  act_comp |= packet->channel;
5328 
5329  if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)");
5330  if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp");
5331  } while (chained);
5332 
5333  *comp = (act_comp & 0x10 ? 4 : 3); /* has alpha channel? */
5334 
5335  for(y=0; y<height; ++y) {
5336  int packet_idx;
5337 
5338  for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
5339  stbi__pic_packet *packet = &packets[packet_idx];
5340  stbi_uc *dest = result+y*width*4;
5341 
5342  switch (packet->type) {
5343  default:
5344  return stbi__errpuc("bad format","packet has bad compression type");
5345 
5346  case 0: {//uncompressed
5347  int x;
5348 
5349  for(x=0;x<width;++x, dest+=4)
5350  if (!stbi__readval(s,packet->channel,dest))
5351  return 0;
5352  break;
5353  }
5354 
5355  case 1://Pure RLE
5356  {
5357  int left=width, i;
5358 
5359  while (left>0) {
5360  stbi_uc count,value[4];
5361 
5362  count=stbi__get8(s);
5363  if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)");
5364 
5365  if (count > left)
5366  count = (stbi_uc) left;
5367 
5368  if (!stbi__readval(s,packet->channel,value)) return 0;
5369 
5370  for(i=0; i<count; ++i,dest+=4)
5371  stbi__copyval(packet->channel,dest,value);
5372  left -= count;
5373  }
5374  }
5375  break;
5376 
5377  case 2: {//Mixed RLE
5378  int left=width;
5379  while (left>0) {
5380  int count = stbi__get8(s), i;
5381  if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)");
5382 
5383  if (count >= 128) { // Repeated
5384  stbi_uc value[4];
5385  int i;
5386 
5387  if (count==128)
5388  count = stbi__get16be(s);
5389  else
5390  count -= 127;
5391  if (count > left)
5392  return stbi__errpuc("bad file","scanline overrun");
5393 
5394  if (!stbi__readval(s,packet->channel,value))
5395  return 0;
5396 
5397  for(i=0;i<count;++i, dest += 4)
5398  stbi__copyval(packet->channel,dest,value);
5399  } else { // Raw
5400  ++count;
5401  if (count>left) return stbi__errpuc("bad file","scanline overrun");
5402 
5403  for(i=0;i<count;++i, dest+=4)
5404  if (!stbi__readval(s,packet->channel,dest))
5405  return 0;
5406  }
5407  left-=count;
5408  }
5409  break;
5410  }
5411  }
5412  }
5413  }
5414 
5415  return result;
5416 }
5417 
5418 static stbi_uc *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp)
5419 {
5420  stbi_uc *result;
5421  int i, x,y;
5422 
5423  for (i=0; i<92; ++i)
5424  stbi__get8(s);
5425 
5426  x = stbi__get16be(s);
5427  y = stbi__get16be(s);
5428  if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)");
5429  if ((1 << 28) / x < y) return stbi__errpuc("too large", "Image too large to decode");
5430 
5431  stbi__get32be(s); /* skip `ratio' */
5432  stbi__get16be(s); /* skip `fields' */
5433  stbi__get16be(s); /* skip `pad' */
5434 
5435  /* intermediate buffer is RGBA */
5436  result = (stbi_uc *) stbi__malloc(x*y*4);
5437  memset(result, 0xff, x*y*4);
5438 
5439  if (!stbi__pic_load_core(s,x,y,comp, result))
5440  {
5441  STBI_FREE(result);
5442  result=0;
5443  }
5444  *px = x;
5445  *py = y;
5446  if (req_comp == 0) req_comp = *comp;
5447  result=stbi__convert_format(result,4,req_comp,x,y);
5448 
5449  return result;
5450 }
5451 
5452 static int stbi__pic_test(stbi__context *s)
5453 {
5454  int r = stbi__pic_test_core(s);
5455  stbi__rewind(s);
5456  return r;
5457 }
5458 #endif
5459 
5460 /* *************************************************************************************************
5461  * GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
5462  */
5463 
5464 #ifndef STBI_NO_GIF
5465 typedef struct
5466 {
5467  stbi__int16 prefix;
5468  stbi_uc first;
5469  stbi_uc suffix;
5470 } stbi__gif_lzw;
5471 
5472 typedef struct
5473 {
5474  int w,h;
5475  stbi_uc *out; /* output buffer (always 4 components) */
5476  int flags, bgindex, ratio, transparent, eflags;
5477  stbi_uc pal[256][4];
5478  stbi_uc lpal[256][4];
5479  stbi__gif_lzw codes[4096];
5481  int parse, step;
5482  int lflags;
5483  int start_x, start_y;
5484  int max_x, max_y;
5485  int cur_x, cur_y;
5486  int line_size;
5487 } stbi__gif;
5488 
5489 static int stbi__gif_test_raw(stbi__context *s)
5490 {
5491  int sz;
5492  if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
5493  sz = stbi__get8(s);
5494  if (sz != '9' && sz != '7') return 0;
5495  if (stbi__get8(s) != 'a') return 0;
5496  return 1;
5497 }
5498 
5499 static int stbi__gif_test(stbi__context *s)
5500 {
5501  int r = stbi__gif_test_raw(s);
5502  stbi__rewind(s);
5503  return r;
5504 }
5505 
5506 static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
5507 {
5508  int i;
5509  for (i=0; i < num_entries; ++i) {
5510  pal[i][2] = stbi__get8(s);
5511  pal[i][1] = stbi__get8(s);
5512  pal[i][0] = stbi__get8(s);
5513  pal[i][3] = transp == i ? 0 : 255;
5514  }
5515 }
5516 
5517 static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
5518 {
5519  stbi_uc version;
5520  if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
5521  return stbi__err("not GIF", "Corrupt GIF");
5522 
5523  version = stbi__get8(s);
5524  if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF");
5525  if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF");
5526 
5527  stbi__g_failure_reason = "";
5528  g->w = stbi__get16le(s);
5529  g->h = stbi__get16le(s);
5530  g->flags = stbi__get8(s);
5531  g->bgindex = stbi__get8(s);
5532  g->ratio = stbi__get8(s);
5533  g->transparent = -1;
5534 
5535  if (comp != 0) *comp = 4; /* can't actually tell whether it's 3 or 4 until we parse the comments */
5536 
5537  if (is_info) return 1;
5538 
5539  if (g->flags & 0x80)
5540  stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
5541 
5542  return 1;
5543 }
5544 
5545 static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
5546 {
5547  stbi__gif g;
5548  if (!stbi__gif_header(s, &g, comp, 1)) {
5549  stbi__rewind( s );
5550  return 0;
5551  }
5552  if (x) *x = g.w;
5553  if (y) *y = g.h;
5554  return 1;
5555 }
5556 
5557 static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
5558 {
5559  stbi_uc *p, *c;
5560 
5561  /* recurse to decode the prefixes, since the linked-list is backwards,
5562  * and working backwards through an interleaved image would be nasty
5563  */
5564  if (g->codes[code].prefix >= 0)
5565  stbi__out_gif_code(g, g->codes[code].prefix);
5566 
5567  if (g->cur_y >= g->max_y)
5568  return;
5569 
5570  p = &g->out[g->cur_x + g->cur_y];
5571  c = &g->color_table[g->codes[code].suffix * 4];
5572 
5573  if (c[3] >= 128) {
5574  p[0] = c[2];
5575  p[1] = c[1];
5576  p[2] = c[0];
5577  p[3] = c[3];
5578  }
5579  g->cur_x += 4;
5580 
5581  if (g->cur_x >= g->max_x) {
5582  g->cur_x = g->start_x;
5583  g->cur_y += g->step;
5584 
5585  while (g->cur_y >= g->max_y && g->parse > 0) {
5586  g->step = (1 << g->parse) * g->line_size;
5587  g->cur_y = g->start_y + (g->step >> 1);
5588  --g->parse;
5589  }
5590  }
5591 }
5592 
5593 static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
5594 {
5595  stbi_uc lzw_cs;
5596  stbi__int32 len, code;
5597  stbi__uint32 first;
5598  stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
5599  stbi__gif_lzw *p;
5600 
5601  lzw_cs = stbi__get8(s);
5602  if (lzw_cs > 12) return NULL;
5603  clear = 1 << lzw_cs;
5604  first = 1;
5605  codesize = lzw_cs + 1;
5606  codemask = (1 << codesize) - 1;
5607  bits = 0;
5608  valid_bits = 0;
5609  for (code = 0; code < clear; code++) {
5610  g->codes[code].prefix = -1;
5611  g->codes[code].first = (stbi_uc) code;
5612  g->codes[code].suffix = (stbi_uc) code;
5613  }
5614 
5615  /* support no starting clear code */
5616  avail = clear+2;
5617  oldcode = -1;
5618 
5619  len = 0;
5620  for(;;) {
5621  if (valid_bits < codesize) {
5622  if (len == 0) {
5623  len = stbi__get8(s); /* start new block */
5624  if (len == 0)
5625  return g->out;
5626  }
5627  --len;
5628  bits |= (stbi__int32) stbi__get8(s) << valid_bits;
5629  valid_bits += 8;
5630  } else {
5631  stbi__int32 code = bits & codemask;
5632  bits >>= codesize;
5633  valid_bits -= codesize;
5634  // @OPTIMIZE: is there some way we can accelerate the non-clear path?
5635  if (code == clear) { // clear code
5636  codesize = lzw_cs + 1;
5637  codemask = (1 << codesize) - 1;
5638  avail = clear + 2;
5639  oldcode = -1;
5640  first = 0;
5641  } else if (code == clear + 1) { // end of stream code
5642  stbi__skip(s, len);
5643  while ((len = stbi__get8(s)) > 0)
5644  stbi__skip(s,len);
5645  return g->out;
5646  } else if (code <= avail) {
5647  if (first) return stbi__errpuc("no clear code", "Corrupt GIF");
5648 
5649  if (oldcode >= 0) {
5650  p = &g->codes[avail++];
5651  if (avail > 4096) return stbi__errpuc("too many codes", "Corrupt GIF");
5652  p->prefix = (stbi__int16) oldcode;
5653  p->first = g->codes[oldcode].first;
5654  p->suffix = (code == avail) ? p->first : g->codes[code].first;
5655  } else if (code == avail)
5656  return stbi__errpuc("illegal code in raster", "Corrupt GIF");
5657 
5658  stbi__out_gif_code(g, (stbi__uint16) code);
5659 
5660  if ((avail & codemask) == 0 && avail <= 0x0FFF) {
5661  codesize++;
5662  codemask = (1 << codesize) - 1;
5663  }
5664 
5665  oldcode = code;
5666  } else {
5667  return stbi__errpuc("illegal code in raster", "Corrupt GIF");
5668  }
5669  }
5670  }
5671 }
5672 
5673 static void stbi__fill_gif_background(stbi__gif *g)
5674 {
5675  int i;
5676  stbi_uc *c = g->pal[g->bgindex];
5677  /* @OPTIMIZE: write a dword at a time */
5678  for (i = 0; i < g->w * g->h * 4; i += 4)
5679  {
5680  stbi_uc *p = &g->out[i];
5681  p[0] = c[2];
5682  p[1] = c[1];
5683  p[2] = c[0];
5684  p[3] = c[3];
5685  }
5686 }
5687 
5688 /* this function is designed to support animated gifs, although stb_image doesn't support it */
5689 static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp)
5690 {
5691  int i;
5692  stbi_uc *old_out = 0;
5693 
5694  if (g->out == 0) {
5695  if (!stbi__gif_header(s, g, comp,0))
5696  return 0;
5697 
5698  g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h);
5699  if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory");
5700  stbi__fill_gif_background(g);
5701  } else {
5702  // animated-gif-only path
5703  if (((g->eflags & 0x1C) >> 2) == 3) {
5704  old_out = g->out;
5705  g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h);
5706  if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory");
5707  memcpy(g->out, old_out, g->w*g->h*4);
5708  }
5709  }
5710 
5711  for (;;) {
5712  switch (stbi__get8(s)) {
5713  case 0x2C: /* Image Descriptor */
5714  {
5715  stbi__int32 x, y, w, h;
5716  stbi_uc *o;
5717 
5718  x = stbi__get16le(s);
5719  y = stbi__get16le(s);
5720  w = stbi__get16le(s);
5721  h = stbi__get16le(s);
5722  if (((x + w) > (g->w)) || ((y + h) > (g->h)))
5723  return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
5724 
5725  g->line_size = g->w * 4;
5726  g->start_x = x * 4;
5727  g->start_y = y * g->line_size;
5728  g->max_x = g->start_x + w * 4;
5729  g->max_y = g->start_y + h * g->line_size;
5730  g->cur_x = g->start_x;
5731  g->cur_y = g->start_y;
5732 
5733  g->lflags = stbi__get8(s);
5734 
5735  if (g->lflags & 0x40) {
5736  g->step = 8 * g->line_size; // first interlaced spacing
5737  g->parse = 3;
5738  } else {
5739  g->step = g->line_size;
5740  g->parse = 0;
5741  }
5742 
5743  if (g->lflags & 0x80) {
5744  stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
5745  g->color_table = (stbi_uc *) g->lpal;
5746  } else if (g->flags & 0x80) {
5747  for (i=0; i < 256; ++i) // @OPTIMIZE: stbi__jpeg_reset only the previous transparent
5748  g->pal[i][3] = 255;
5749  if (g->transparent >= 0 && (g->eflags & 0x01))
5750  g->pal[g->transparent][3] = 0;
5751  g->color_table = (stbi_uc *) g->pal;
5752  } else
5753  return stbi__errpuc("missing color table", "Corrupt GIF");
5754 
5755  o = stbi__process_gif_raster(s, g);
5756  if (o == NULL) return NULL;
5757 
5758  if (req_comp && req_comp != 4)
5759  o = stbi__convert_format(o, 4, req_comp, g->w, g->h);
5760  return o;
5761  }
5762 
5763  case 0x21: // Comment Extension.
5764  {
5765  int len;
5766  if (stbi__get8(s) == 0xF9) { // Graphic Control Extension.
5767  len = stbi__get8(s);
5768  if (len == 4) {
5769  g->eflags = stbi__get8(s);
5770  stbi__get16le(s); // delay
5771  g->transparent = stbi__get8(s);
5772  } else {
5773  stbi__skip(s, len);
5774  break;
5775  }
5776  }
5777  while ((len = stbi__get8(s)) != 0)
5778  stbi__skip(s, len);
5779  break;
5780  }
5781 
5782  case 0x3B: // gif stream termination code
5783  return (stbi_uc *) s; // using '1' causes warning on some compilers
5784 
5785  default:
5786  return stbi__errpuc("unknown code", "Corrupt GIF");
5787  }
5788  }
5789 }
5790 
5791 static stbi_uc *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
5792 {
5793  stbi_uc *u = 0;
5794  stbi__gif g;
5795  memset(&g, 0, sizeof(g));
5796 
5797  u = stbi__gif_load_next(s, &g, comp, req_comp);
5798  if (u == (stbi_uc *) s) u = 0; // end of animated gif marker
5799  if (u) {
5800  *x = g.w;
5801  *y = g.h;
5802  }
5803 
5804  return u;
5805 }
5806 
5807 static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
5808 {
5809  return stbi__gif_info_raw(s,x,y,comp);
5810 }
5811 #endif
5812 
5813 // *************************************************************************************************
5814 // Radiance RGBE HDR loader
5815 // originally by Nicolas Schulz
5816 #ifndef STBI_NO_HDR
5817 static int stbi__hdr_test_core(stbi__context *s)
5818 {
5819  const char *signature = "#?RADIANCE\n";
5820  int i;
5821  for (i=0; signature[i]; ++i)
5822  if (stbi__get8(s) != signature[i])
5823  return 0;
5824  return 1;
5825 }
5826 
5827 static int stbi__hdr_test(stbi__context* s)
5828 {
5829  int r = stbi__hdr_test_core(s);
5830  stbi__rewind(s);
5831  return r;
5832 }
5833 
5834 #define STBI__HDR_BUFLEN 1024
5835 static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
5836 {
5837  int len=0;
5838  char c = '\0';
5839 
5840  c = (char) stbi__get8(z);
5841 
5842  while (!stbi__at_eof(z) && c != '\n') {
5843  buffer[len++] = c;
5844  if (len == STBI__HDR_BUFLEN-1) {
5845  // flush to end of line
5846  while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
5847  ;
5848  break;
5849  }
5850  c = (char) stbi__get8(z);
5851  }
5852 
5853  buffer[len] = 0;
5854  return buffer;
5855 }
5856 
5857 static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
5858 {
5859  if ( input[3] != 0 ) {
5860  float f1;
5861  // Exponent
5862  f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
5863  if (req_comp <= 2)
5864  output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
5865  else {
5866  output[0] = input[0] * f1;
5867  output[1] = input[1] * f1;
5868  output[2] = input[2] * f1;
5869  }
5870  if (req_comp == 2) output[1] = 1;
5871  if (req_comp == 4) output[3] = 1;
5872  } else {
5873  switch (req_comp) {
5874  case 4: output[3] = 1; /* fallthrough */
5875  case 3: output[0] = output[1] = output[2] = 0;
5876  break;
5877  case 2: output[1] = 1; /* fallthrough */
5878  case 1: output[0] = 0;
5879  break;
5880  }
5881  }
5882 }
5883 
5884 static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
5885 {
5886  char buffer[STBI__HDR_BUFLEN];
5887  char *token;
5888  int valid = 0;
5889  int width, height;
5890  stbi_uc *scanline;
5891  float *hdr_data;
5892  int len;
5893  unsigned char count, value;
5894  int i, j, k, c1,c2, z;
5895 
5896 
5897  // Check identifier
5898  if (strcmp(stbi__hdr_gettoken(s,buffer), "#?RADIANCE") != 0)
5899  return stbi__errpf("not HDR", "Corrupt HDR image");
5900 
5901  // Parse header
5902  for(;;) {
5903  token = stbi__hdr_gettoken(s,buffer);
5904  if (token[0] == 0) break;
5905  if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
5906  }
5907 
5908  if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format");
5909 
5910  // Parse width and height
5911  // can't use sscanf() if we're not using stdio!
5912  token = stbi__hdr_gettoken(s,buffer);
5913  if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format");
5914  token += 3;
5915  height = (int) strtol(token, &token, 10);
5916  while (*token == ' ') ++token;
5917  if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format");
5918  token += 3;
5919  width = (int) strtol(token, NULL, 10);
5920 
5921  *x = width;
5922  *y = height;
5923 
5924  if (comp) *comp = 3;
5925  if (req_comp == 0) req_comp = 3;
5926 
5927  // Read data
5928  hdr_data = (float *) stbi__malloc(height * width * req_comp * sizeof(float));
5929 
5930  // Load image data
5931  // image data is stored as some number of sca
5932  if ( width < 8 || width >= 32768) {
5933  // Read flat data
5934  for (j=0; j < height; ++j) {
5935  for (i=0; i < width; ++i) {
5936  stbi_uc rgbe[4];
5937  main_decode_loop:
5938  stbi__getn(s, rgbe, 4);
5939  stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
5940  }
5941  }
5942  } else {
5943  /* Read RLE-encoded data */
5944  scanline = NULL;
5945 
5946  for (j = 0; j < height; ++j) {
5947  c1 = stbi__get8(s);
5948  c2 = stbi__get8(s);
5949  len = stbi__get8(s);
5950  if (c1 != 2 || c2 != 2 || (len & 0x80)) {
5951  /* not run-length encoded, so we have to
5952  * actually use THIS data as a decoded
5953  * pixel (note this can't be a valid pixel
5954  * --one of RGB must be >= 128) */
5955  stbi_uc rgbe[4];
5956  rgbe[0] = (stbi_uc) c1;
5957  rgbe[1] = (stbi_uc) c2;
5958  rgbe[2] = (stbi_uc) len;
5959  rgbe[3] = (stbi_uc) stbi__get8(s);
5960  stbi__hdr_convert(hdr_data, rgbe, req_comp);
5961  i = 1;
5962  j = 0;
5963  STBI_FREE(scanline);
5964  goto main_decode_loop; // yes, this makes no sense
5965  }
5966  len <<= 8;
5967  len |= stbi__get8(s);
5968  if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
5969  if (scanline == NULL) scanline = (stbi_uc *) stbi__malloc(width * 4);
5970 
5971  for (k = 0; k < 4; ++k) {
5972  i = 0;
5973  while (i < width) {
5974  count = stbi__get8(s);
5975  if (count > 128) {
5976  // Run
5977  value = stbi__get8(s);
5978  count -= 128;
5979  for (z = 0; z < count; ++z)
5980  scanline[i++ * 4 + k] = value;
5981  } else {
5982  // Dump
5983  for (z = 0; z < count; ++z)
5984  scanline[i++ * 4 + k] = stbi__get8(s);
5985  }
5986  }
5987  }
5988  for (i=0; i < width; ++i)
5989  stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
5990  }
5991  STBI_FREE(scanline);
5992  }
5993 
5994  return hdr_data;
5995 }
5996 
5997 static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
5998 {
5999  char buffer[STBI__HDR_BUFLEN];
6000  char *token;
6001  int valid = 0;
6002 
6003  if (strcmp(stbi__hdr_gettoken(s,buffer), "#?RADIANCE") != 0) {
6004  stbi__rewind( s );
6005  return 0;
6006  }
6007 
6008  for(;;) {
6009  token = stbi__hdr_gettoken(s,buffer);
6010  if (token[0] == 0) break;
6011  if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6012  }
6013 
6014  if (!valid) {
6015  stbi__rewind( s );
6016  return 0;
6017  }
6018  token = stbi__hdr_gettoken(s,buffer);
6019  if (strncmp(token, "-Y ", 3)) {
6020  stbi__rewind( s );
6021  return 0;
6022  }
6023  token += 3;
6024  *y = (int) strtol(token, &token, 10);
6025  while (*token == ' ') ++token;
6026  if (strncmp(token, "+X ", 3)) {
6027  stbi__rewind( s );
6028  return 0;
6029  }
6030  token += 3;
6031  *x = (int) strtol(token, NULL, 10);
6032  *comp = 3;
6033  return 1;
6034 }
6035 #endif /* STBI_NO_HDR */
6036 
6037 #ifndef STBI_NO_BMP
6038 static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
6039 {
6040  int hsz;
6041  if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') {
6042  stbi__rewind( s );
6043  return 0;
6044  }
6045  stbi__skip(s,12);
6046  hsz = stbi__get32le(s);
6047  if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) {
6048  stbi__rewind( s );
6049  return 0;
6050  }
6051  if (hsz == 12) {
6052  *x = stbi__get16le(s);
6053  *y = stbi__get16le(s);
6054  } else {
6055  *x = stbi__get32le(s);
6056  *y = stbi__get32le(s);
6057  }
6058  if (stbi__get16le(s) != 1) {
6059  stbi__rewind( s );
6060  return 0;
6061  }
6062  *comp = stbi__get16le(s) / 8;
6063  return 1;
6064 }
6065 #endif
6066 
6067 #ifndef STBI_NO_PSD
6068 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
6069 {
6070  int channelCount;
6071  if (stbi__get32be(s) != 0x38425053) {
6072  stbi__rewind( s );
6073  return 0;
6074  }
6075  if (stbi__get16be(s) != 1) {
6076  stbi__rewind( s );
6077  return 0;
6078  }
6079  stbi__skip(s, 6);
6080  channelCount = stbi__get16be(s);
6081  if (channelCount < 0 || channelCount > 16) {
6082  stbi__rewind( s );
6083  return 0;
6084  }
6085  *y = stbi__get32be(s);
6086  *x = stbi__get32be(s);
6087  if (stbi__get16be(s) != 8) {
6088  stbi__rewind( s );
6089  return 0;
6090  }
6091  if (stbi__get16be(s) != 3) {
6092  stbi__rewind( s );
6093  return 0;
6094  }
6095  *comp = 4;
6096  return 1;
6097 }
6098 #endif
6099 
6100 #ifndef STBI_NO_PIC
6101 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
6102 {
6103  int act_comp=0,num_packets=0,chained;
6104  stbi__pic_packet packets[10];
6105 
6106  stbi__skip(s, 92);
6107 
6108  *x = stbi__get16be(s);
6109  *y = stbi__get16be(s);
6110  if (stbi__at_eof(s)) return 0;
6111  if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
6112  stbi__rewind( s );
6113  return 0;
6114  }
6115 
6116  stbi__skip(s, 8);
6117 
6118  do {
6119  stbi__pic_packet *packet;
6120 
6121  if (num_packets==sizeof(packets)/sizeof(packets[0]))
6122  return 0;
6123 
6124  packet = &packets[num_packets++];
6125  chained = stbi__get8(s);
6126  packet->size = stbi__get8(s);
6127  packet->type = stbi__get8(s);
6128  packet->channel = stbi__get8(s);
6129  act_comp |= packet->channel;
6130 
6131  if (stbi__at_eof(s)) {
6132  stbi__rewind( s );
6133  return 0;
6134  }
6135  if (packet->size != 8) {
6136  stbi__rewind( s );
6137  return 0;
6138  }
6139  } while (chained);
6140 
6141  *comp = (act_comp & 0x10 ? 4 : 3);
6142 
6143  return 1;
6144 }
6145 #endif
6146 
6147 // *************************************************************************************************
6148 // Portable Gray Map and Portable Pixel Map loader
6149 // by Ken Miller
6150 //
6151 // PGM: http://netpbm.sourceforge.net/doc/pgm.html
6152 // PPM: http://netpbm.sourceforge.net/doc/ppm.html
6153 //
6154 // Known limitations:
6155 // Does not support comments in the header section
6156 // Does not support ASCII image data (formats P2 and P3)
6157 // Does not support 16-bit-per-channel
6158 
6159 #ifndef STBI_NO_PNM
6160 
6161 static int stbi__pnm_test(stbi__context *s)
6162 {
6163  char p, t;
6164  p = (char) stbi__get8(s);
6165  t = (char) stbi__get8(s);
6166  if (p != 'P' || (t != '5' && t != '6')) {
6167  stbi__rewind( s );
6168  return 0;
6169  }
6170  return 1;
6171 }
6172 
6173 static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
6174 {
6175  stbi_uc *out;
6176  if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
6177  return 0;
6178  *x = s->img_x;
6179  *y = s->img_y;
6180  *comp = s->img_n;
6181 
6182  out = (stbi_uc *) stbi__malloc(s->img_n * s->img_x * s->img_y);
6183  if (!out) return stbi__errpuc("outofmem", "Out of memory");
6184  stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
6185 
6186  if (req_comp && req_comp != s->img_n) {
6187  out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
6188  if (out == NULL) return out; // stbi__convert_format frees input on failure
6189  }
6190  return out;
6191 }
6192 
6193 static int stbi__pnm_isspace(char c)
6194 {
6195  return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
6196 }
6197 
6198 static void stbi__pnm_skip_whitespace(stbi__context *s, char *c)
6199 {
6200  while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
6201  *c = (char) stbi__get8(s);
6202 }
6203 
6204 static int stbi__pnm_isdigit(char c)
6205 {
6206  return c >= '0' && c <= '9';
6207 }
6208 
6209 static int stbi__pnm_getinteger(stbi__context *s, char *c)
6210 {
6211  int value = 0;
6212 
6213  while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
6214  value = value*10 + (*c - '0');
6215  *c = (char) stbi__get8(s);
6216  }
6217 
6218  return value;
6219 }
6220 
6221 static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
6222 {
6223  int maxv;
6224  char c, p, t;
6225 
6226  stbi__rewind( s );
6227 
6228  // Get identifier
6229  p = (char) stbi__get8(s);
6230  t = (char) stbi__get8(s);
6231  if (p != 'P' || (t != '5' && t != '6')) {
6232  stbi__rewind( s );
6233  return 0;
6234  }
6235 
6236  *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm
6237 
6238  c = (char) stbi__get8(s);
6239  stbi__pnm_skip_whitespace(s, &c);
6240 
6241  *x = stbi__pnm_getinteger(s, &c); // read width
6242  stbi__pnm_skip_whitespace(s, &c);
6243 
6244  *y = stbi__pnm_getinteger(s, &c); // read height
6245  stbi__pnm_skip_whitespace(s, &c);
6246 
6247  maxv = stbi__pnm_getinteger(s, &c); // read max value
6248 
6249  if (maxv > 255)
6250  return stbi__err("max value > 255", "PPM image not 8-bit");
6251  else
6252  return 1;
6253 }
6254 #endif
6255 
6256 static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
6257 {
6258  #ifndef STBI_NO_JPEG
6259  if (stbi__jpeg_info(s, x, y, comp)) return 1;
6260  #endif
6261 
6262  #ifndef STBI_NO_PNG
6263  if (stbi__png_info(s, x, y, comp)) return 1;
6264  #endif
6265 
6266  #ifndef STBI_NO_GIF
6267  if (stbi__gif_info(s, x, y, comp)) return 1;
6268  #endif
6269 
6270  #ifndef STBI_NO_BMP
6271  if (stbi__bmp_info(s, x, y, comp)) return 1;
6272  #endif
6273 
6274  #ifndef STBI_NO_PSD
6275  if (stbi__psd_info(s, x, y, comp)) return 1;
6276  #endif
6277 
6278  #ifndef STBI_NO_PIC
6279  if (stbi__pic_info(s, x, y, comp)) return 1;
6280  #endif
6281 
6282  #ifndef STBI_NO_PNM
6283  if (stbi__pnm_info(s, x, y, comp)) return 1;
6284  #endif
6285 
6286  #ifndef STBI_NO_HDR
6287  if (stbi__hdr_info(s, x, y, comp)) return 1;
6288  #endif
6289 
6290  // test tga last because it's a crappy test!
6291  #ifndef STBI_NO_TGA
6292  if (stbi__tga_info(s, x, y, comp))
6293  return 1;
6294  #endif
6295  return stbi__err("unknown image type", "Image not of any known type, or corrupt");
6296 }
6297 
6298 #ifndef STBI_NO_STDIO
6299 STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
6300 {
6301  FILE *f = stbi__fopen(filename, "rb");
6302  int result;
6303  if (!f) return stbi__err("can't fopen", "Unable to open file");
6304  result = stbi_info_from_file(f, x, y, comp);
6305  fclose(f);
6306  return result;
6307 }
6308 
6309 STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
6310 {
6311  int r;
6312  stbi__context s;
6313  long pos = ftell(f);
6314  stbi__start_file(&s, f);
6315  r = stbi__info_main(&s,x,y,comp);
6316  fseek(f,pos,SEEK_SET);
6317  return r;
6318 }
6319 #endif /* !STBI_NO_STDIO */
6320 
6321 STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
6322 {
6323  stbi__context s;
6324  stbi__start_mem(&s,buffer,len);
6325  return stbi__info_main(&s,x,y,comp);
6326 }
6327 
6328 STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
6329 {
6330  stbi__context s;
6331  stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
6332  return stbi__info_main(&s,x,y,comp);
6333 }
6334 
6335 #endif /* STB_IMAGE_IMPLEMENTATION */
6336 
6337 /*
6338  revision history:
6339  2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value
6340  2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning
6341  2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit
6342  2.03 (2015-04-12) extra corruption checking (mmozeiko)
6343  stbi_set_flip_vertically_on_load (nguillemot)
6344  fix NEON support; fix mingw support
6345  2.02 (2015-01-19) fix incorrect assert, fix warning
6346  2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
6347  2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
6348  2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
6349  progressive JPEG (stb)
6350  PGM/PPM support (Ken Miller)
6351  STBI_MALLOC,STBI_REALLOC,STBI_FREE
6352  GIF bugfix -- seemingly never worked
6353  STBI_NO_*, STBI_ONLY_*
6354  1.48 (2014-12-14) fix incorrectly-named assert()
6355  1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
6356  optimize PNG (ryg)
6357  fix bug in interlaced PNG with user-specified channel count (stb)
6358  1.46 (2014-08-26)
6359  fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
6360  1.45 (2014-08-16)
6361  fix MSVC-ARM internal compiler error by wrapping malloc
6362  1.44 (2014-08-07)
6363  various warning fixes from Ronny Chevalier
6364  1.43 (2014-07-15)
6365  fix MSVC-only compiler problem in code changed in 1.42
6366  1.42 (2014-07-09)
6367  don't define _CRT_SECURE_NO_WARNINGS (affects user code)
6368  fixes to stbi__cleanup_jpeg path
6369  added STBI_ASSERT to avoid requiring assert.h
6370  1.41 (2014-06-25)
6371  fix search&replace from 1.36 that messed up comments/error messages
6372  1.40 (2014-06-22)
6373  fix gcc struct-initialization warning
6374  1.39 (2014-06-15)
6375  fix to TGA optimization when req_comp != number of components in TGA;
6376  fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
6377  add support for BMP version 5 (more ignored fields)
6378  1.38 (2014-06-06)
6379  suppress MSVC warnings on integer casts truncating values
6380  fix accidental rename of 'skip' field of I/O
6381  1.37 (2014-06-04)
6382  remove duplicate typedef
6383  1.36 (2014-06-03)
6384  convert to header file single-file library
6385  if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
6386  1.35 (2014-05-27)
6387  various warnings
6388  fix broken STBI_SIMD path
6389  fix bug where stbi_load_from_file no longer left file pointer in correct place
6390  fix broken non-easy path for 32-bit BMP (possibly never used)
6391  TGA optimization by Arseny Kapoulkine
6392  1.34 (unknown)
6393  use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
6394  1.33 (2011-07-14)
6395  make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
6396  1.32 (2011-07-13)
6397  support for "info" function for all supported filetypes (SpartanJ)
6398  1.31 (2011-06-20)
6399  a few more leak fixes, bug in PNG handling (SpartanJ)
6400  1.30 (2011-06-11)
6401  added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
6402  removed deprecated format-specific test/load functions
6403  removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
6404  error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
6405  fix inefficiency in decoding 32-bit BMP (David Woo)
6406  1.29 (2010-08-16)
6407  various warning fixes from Aurelien Pocheville
6408  1.28 (2010-08-01)
6409  fix bug in GIF palette transparency (SpartanJ)
6410  1.27 (2010-08-01)
6411  cast-to-stbi_uc to fix warnings
6412  1.26 (2010-07-24)
6413  fix bug in file buffering for PNG reported by SpartanJ
6414  1.25 (2010-07-17)
6415  refix trans_data warning (Won Chun)
6416  1.24 (2010-07-12)
6417  perf improvements reading from files on platforms with lock-heavy fgetc()
6418  minor perf improvements for jpeg
6419  deprecated type-specific functions so we'll get feedback if they're needed
6420  attempt to fix trans_data warning (Won Chun)
6421  1.23 fixed bug in iPhone support
6422  1.22 (2010-07-10)
6423  removed image *writing* support
6424  stbi_info support from Jetro Lauha
6425  GIF support from Jean-Marc Lienher
6426  iPhone PNG-extensions from James Brown
6427  warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
6428  1.21 fix use of 'stbi_uc' in header (reported by jon blow)
6429  1.20 added support for Softimage PIC, by Tom Seddon
6430  1.19 bug in interlaced PNG corruption check (found by ryg)
6431  1.18 (2008-08-02)
6432  fix a threading bug (local mutable static)
6433  1.17 support interlaced PNG
6434  1.16 major bugfix - stbi__convert_format converted one too many pixels
6435  1.15 initialize some fields for thread safety
6436  1.14 fix threadsafe conversion bug
6437  header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
6438  1.13 threadsafe
6439  1.12 const qualifiers in the API
6440  1.11 Support installable IDCT, colorspace conversion routines
6441  1.10 Fixes for 64-bit (don't use "unsigned long")
6442  optimized upsampling by Fabian "ryg" Giesen
6443  1.09 Fix format-conversion for PSD code (bad global variables!)
6444  1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz
6445  1.07 attempt to fix C++ warning/errors again
6446  1.06 attempt to fix C++ warning/errors again
6447  1.05 fix TGA loading to return correct *comp and use good luminance calc
6448  1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free
6449  1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR
6450  1.02 support for (subset of) HDR files, float interface for preferred access to them
6451  1.01 fix bug: possible bug in handling right-side up bmps... not sure
6452  fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
6453  1.00 interface to zlib that skips zlib header
6454  0.99 correct handling of alpha in palette
6455  0.98 TGA loader by lonesock; dynamically add loaders (untested)
6456  0.97 jpeg errors on too large a file; also catch another malloc failure
6457  0.96 fix detection of invalid v value - particleman@mollyrocket forum
6458  0.95 during header scan, seek to markers in case of padding
6459  0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same
6460  0.93 handle jpegtran output; verbose errors
6461  0.92 read 4,8,16,24,32-bit BMP files of several formats
6462  0.91 output 24-bit Windows 3.0 BMP files
6463  0.90 fix a few more warnings; bump version number to approach 1.0
6464  0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd
6465  0.60 fix compiling as c++
6466  0.59 fix warnings: merge Dave Moore's -Wall fixes
6467  0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian
6468  0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
6469  0.56 fix bug: zlib uncompressed mode len vs. nlen
6470  0.55 fix bug: restart_interval not initialized to 0
6471  0.54 allow NULL for 'int *comp'
6472  0.53 fix bug in png 3->4; speedup png decoding
6473  0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments
6474  0.51 obey req_comp requests, 1-component jpegs return as 1-component,
6475  on 'test' only check type, not whether we support this variant
6476  0.50 (2006-11-19)
6477  first released version
6478 */
STBIDEF stbi_uc * stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
STBIDEF int stbi_is_hdr(char const *filename)
#define ftell
Definition: file_stream_transforms.h:54
const GLint * first
Definition: glext.h:6478
STBIDEF char * stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
GLenum mode
Definition: glext.h:6857
GLuint GLfloat * val
Definition: glext.h:7847
static const unsigned int color_table[]
Definition: console.c:49
GLboolean GLenum GLenum GLvoid * values
Definition: glext.h:6318
GLuint buffer
Definition: glext.h:6555
int strncmp(const char *s1, const char *s2, size_t n)
Definition: compat_ctype.c:179
f32 py
Definition: gx_regdef.h:5101
GLenum target
Definition: glext.h:7982
Definition: stb_image.h:396
GLfixed GLfixed x2
Definition: glsym_gl.h:1051
const GLfloat * tc
Definition: glext.h:9832
Definition: libretro.h:2275
GLdouble GLdouble GLdouble r
Definition: glext.h:6406
GLuint res
Definition: glext.h:10520
GLdouble GLdouble t
Definition: glext.h:6398
static overlayled_t * cur
Definition: led_overlay.c:18
GLenum GLsizei len
Definition: glext.h:7389
STBIDEF void stbi_hdr_to_ldr_scale(float scale)
Definition: stb_image.h:398
GLsizeiptr size
Definition: glext.h:6559
STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
GLint limit
Definition: glext.h:11233
#define SEEK_CUR
Definition: zconf.h:439
GLfloat f
Definition: glext.h:8207
const portMappingElt code
Definition: portlistingparse.c:17
STBIDEF char * stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen)
GLenum GLuint id
Definition: glext.h:6233
STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
GLenum GLenum GLenum GLenum GLenum scale
Definition: glext.h:9939
#define next(ls)
Definition: llex.c:32
GLdouble s
Definition: glext.h:6390
struct passwd out
Definition: missing_libc_functions.c:51
GLdouble GLdouble z
Definition: glext.h:6514
STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
typedef void(__stdcall *PFN_DESTRUCTION_CALLBACK)(void *pData)
#define exp(a)
Definition: math.h:32
GLenum GLenum GLenum input
Definition: glext.h:9938
Definition: ibxm.h:9
static bool read(ElfFile &file, const std::string &filename)
Definition: main.cpp:146
STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
GLsizei GLsizei GLenum GLenum const GLvoid * data
Definition: glext.h:6303
STBIDEF int stbi_is_hdr_from_file(FILE *f)
const GLubyte * c
Definition: glext.h:9812
GLboolean GLboolean GLboolean b
Definition: glext.h:6844
GLuint GLuint GLsizei count
Definition: glext.h:6292
float4 p3
Definition: remote.h:1
int strcmp(const char *s1, const char *s2)
Definition: compat_ctype.c:116
GLuint GLuint num
Definition: glext.h:10525
GLfloat GLfloat GLfloat v2
Definition: glext.h:6703
STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
STBIDEF stbi_uc * stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
#define NULL
Pointer to 0.
Definition: gctypes.h:65
GLenum type
Definition: glext.h:6233
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro pixld1_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl mov asr adds SRC_WIDTH_FIXED bpl add asl else error unsupported endif endm macro pixld2_s mem_operand if mov asr add asl add asl mov asr sub UNIT_X add asl mov asr add asl add asl mov asr add UNIT_X add asl else pixld1_s mem_operand pixld1_s mem_operand endif endm macro pixld0_s mem_operand if asr adds SRC_WIDTH_FIXED bpl add asl elseif asr adds SRC_WIDTH_FIXED bpl add asl endif endm macro pixld_s_internal mem_operand if mem_operand pixld2_s mem_operand pixdeinterleave basereg elseif mem_operand elseif mem_operand elseif mem_operand elseif mem_operand pixld0_s mem_operand else pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else pixld0_s mem_operand pixld0_s mem_operand endif elseif mem_operand else error unsupported mem_operand if bpp mem_operand endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld [DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld if[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro fetch_mask_pixblock pixld mask_basereg pixblock_size MASK endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp local skip1(dst_w_bpp<=(lowbit *8)) &&((lowbit *8)<(pixblock_size *dst_w_bpp)) .if lowbit< 16 tst DST_R
Definition: pixman-arm-neon-asm.h:469
#define fclose
Definition: file_stream_transforms.h:53
version
Definition: setup.py:6
#define SEEK_SET
Definition: zconf.h:438
STBIDEF float * stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
signed short int16_t
Definition: stdint.h:122
float4 p1
Definition: notHere.h:1
#define float2fixed(x)
Definition: rjpeg.c:2185
GLfloat bias
Definition: glext.h:8812
STBIDEF void stbi_ldr_to_hdr_gamma(float gamma)
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp
Definition: pixman-arm-neon-asm.h:139
GLint GLint GLint GLint GLint GLint y
Definition: glext.h:6295
#define feof
Definition: file_stream_transforms.h:64
Definition: stb_image.h:399
GLenum src
Definition: glext.h:6980
f32 px
Definition: gx_regdef.h:5100
GLenum GLint GLuint mask
Definition: glext.h:6668
GLint GLint GLint GLint GLint x
Definition: glext.h:6295
GLuint in
Definition: glext.h:10523
GLuint64EXT * result
Definition: glext.h:12211
GLdouble GLdouble GLdouble GLdouble q
Definition: glext.h:6414
GLenum GLenum GLvoid * row
Definition: glext.h:6316
GLuint GLdouble GLdouble GLint GLint order
Definition: glext.h:12101
GLint GLint GLsizei GLsizei GLsizei depth
Definition: glext.h:6293
STBIDEF char * stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen)
Definition: inftrees.h:27
GLfloat GLfloat p
Definition: glext.h:9809
STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
signed int int32_t
Definition: stdint.h:123
STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
Definition: stb_image.h:395
GLfixed GLfixed GLfixed y2
Definition: glsym_gl.h:1051
STBIDEF void stbi_hdr_to_ldr_gamma(float gamma)
#define FILE
Definition: file_stream_transforms.h:35
STBIDEF float * stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
#define pow(x, y)
Definition: math.h:22
const GLdouble * v
Definition: glext.h:6391
GLenum GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const GLvoid * bits
Definition: glext.h:11836
std::string output
Definition: Config.FromFile.cpp:44
u32 col
Definition: gx_regdef.h:5093
STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
GLboolean GLboolean g
Definition: glext.h:6844
Definition: ibxm.c:40
GLuint color
Definition: glext.h:6883
STBIDEF void stbi_ldr_to_hdr_scale(float scale)
GLint j
Definition: nx_glsym.h:307
STBIDEF stbi_uc * stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
float4 p2
Definition: local.h:1
STBIDEF stbi_uc * stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
STBIDEF char * stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
GLint GLint GLint GLint GLint GLint GLint GLbitfield GLenum filter
Definition: glext.h:7672
GLint GLint GLsizei width
Definition: glext.h:6293
Definition: ffmpeg_fft.c:36
#define STBIDEF
Definition: stb_image.h:411
Ιστορικό Εικόνα Πληροφορίες Όλοι Οι Χρήστες Χειρίζονται Το Μενού Αριστερό Αναλογικό Αριστερό Αναλογικό Αριστερό Αναλογικό Y Αριστερό Αναλογικό Δεξί Αναλογικό X Δεξί Αναλογικό Δεξί Αναλογικό Y Δεξί Αναλογικό Σκανδάλη Όπλου Όπλο Aux A Όπλο Aux C Όπλο Select Όπλο D pad Κάτω Όπλο D pad Δεξιά Νεκρή Ζώνη Αναλογικού Σύνδεση Όλων Λήξη Χρόνου Σύνδεσης Hide Unbound Core Input Descriptors Κατάλογος Συσκευών Κατάλογος Ποντικιού Duty Cycle Keyboard Gamepad Mapping Enable Κουμπί D pad κάτω Κουμπί Κουμπί L(πίσω)" ) MSG_HASH( MENU_ENUM_LABEL_VALUE_INPUT_JOYPAD_LEFT
#define CASE(round_direction)
GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint GLdouble GLdouble w2
Definition: glext.h:9211
GLsizei GLfixed GLfixed yorig
Definition: glsym_gl.h:1002
static char * skip(char **buf, const char *delimiters)
Definition: civetweb.c:2018
GLsizei const GLfloat * value
Definition: glext.h:6709
#define fread
Definition: file_stream_transforms.h:56
GLuint GLsizei const GLuint const GLintptr const GLsizeiptr * sizes
Definition: glsym_gl.h:634
unsigned char stbi_uc
Definition: stb_image.h:402
int compress(Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen)
Definition: compress.c:57
const GLchar * marker
Definition: glsym_es2.h:111
GLsizei stride
Definition: glext.h:6488
#define fopen
Definition: file_stream_transforms.h:52
GLsizei GLfixed xorig
Definition: glsym_gl.h:1002
Definition: stb_image.h:421
Definition: stb_image.h:397
GLubyte GLubyte GLubyte GLubyte w
Definition: glext.h:6742
GLfloat GLfloat GLfloat GLfloat h
Definition: glext.h:8390
Definition: video4linux2.c:51
#define FAST_BITS
Definition: rjpeg.c:172
GLintptr offset
Definition: glext.h:6560
GLint left
Definition: glext.h:8393
GLbitfield flags
Definition: glext.h:7828
STBIDEF void stbi_image_free(void *retval_from_stbi_load)
unsigned short uint16_t
Definition: stdint.h:125
STBIDEF float * stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
void * memset(void *b, int c, size_t len)
Definition: string.c:7
GLenum GLuint GLenum GLsizei length
Definition: glext.h:6233
GLdouble n
Definition: glext.h:8396
unsigned int uint32_t
Definition: stdint.h:126
STBIDEF float * stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
int fopen_s(FILE **pFile, const char *filename, const char *mode)
Definition: StandAlone.cpp:1437
const GLfloat * m
Definition: glext.h:11755
STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp)
const char *const str
Definition: portlistingparse.c:18
GLboolean GLboolean GLboolean GLboolean a
Definition: glext.h:6844
STBIDEF const char * stbi_failure_reason(void)
#define fseek
Definition: file_stream_transforms.h:55
GLint GLint GLsizei GLsizei height
Definition: glext.h:6293
void * memcpy(void *dst, const void *src, size_t len)
Definition: string.c:26
STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)