1 // stb_c_lexer.h - v0.08 - public domain Sean Barrett 2013
2 // lexer for making little C-like languages with recursive-descent parsers
4 // This file provides both the interface and the implementation.
5 // To instantiate the implementation,
6 // #define STB_C_LEXER_IMPLEMENTATION
7 // in *ONE* source file, before #including this file.
9 // The default configuration is fairly close to a C lexer, although
10 // suffixes on integer constants are not handled (you can override this).
13 // 0.08 fix bad pointer comparison
14 // 0.07 fix mishandling of hexadecimal constants parsed by strtol
15 // 0.06 fix missing next character after ending quote mark (Andreas Fredriksson)
16 // 0.05 refixed get_location because github version had lost the fix
17 // 0.04 fix octal parsing bug
18 // 0.03 added STB_C_LEX_DISCARD_PREPROCESSOR option
19 // refactor API to simplify (only one struct instead of two)
20 // change literal enum names to have 'lit' at the end
21 // 0.02 first public release
24 // - haven't tested compiling as C++
25 // - haven't tested the float parsing path
26 // - haven't tested the non-default-config paths (e.g. non-stdlib)
27 // - only tested default-config paths by eyeballing output of self-parse
29 // - haven't implemented multiline strings
30 // - haven't implemented octal/hex character constants
31 // - haven't implemented support for unicode CLEX_char
32 // - need to expand error reporting so you don't just get "CLEX_parse_error"
35 // Arpad Goretity (bugfix)
39 // This software is dual-licensed to the public domain and under the following
40 // license: you are granted a perpetual, irrevocable license to copy, modify,
41 // publish, and distribute this file as you see fit.
43 #ifndef STB_C_LEXER_DEFINITIONS
44 // to change the default parsing rules, copy the following lines
45 // into your C/C++ file *before* including this, and then replace
46 // the Y's with N's for the ones you don't want.
49 #define STB_C_LEX_C_DECIMAL_INTS Y // "0|[1-9][0-9]*" CLEX_intlit
50 #define STB_C_LEX_C_HEX_INTS Y // "0x[0-9a-fA-F]+" CLEX_intlit
51 #define STB_C_LEX_C_OCTAL_INTS Y // "[0-7]+" CLEX_intlit
52 #define STB_C_LEX_C_DECIMAL_FLOATS Y // "[0-9]*(.[0-9]*([eE]-?[0-9]+)?) CLEX_floatlit
53 #define STB_C_LEX_C_IDENTIFIERS Y // "[_a-zA-Z][_a-zA-Z0-9]*" CLEX_id
54 #define STB_C_LEX_C_DQ_STRINGS Y // double-quote-delimited strings with escapes CLEX_dqstring
55 #define STB_C_LEX_C_SQ_STRINGS N // single-quote-delimited strings with escapes CLEX_ssstring
56 #define STB_C_LEX_C_CHARS Y // single-quote-delimited character with escape CLEX_charlits
57 #define STB_C_LEX_C_COMMENTS Y // "/* comment */"
58 #define STB_C_LEX_CPP_COMMENTS Y // "// comment to end of line\n"
59 #define STB_C_LEX_C_COMPARISONS Y // "==" CLEX_eq "!=" CLEX_noteq "<=" CLEX_lesseq ">=" CLEX_greatereq
60 #define STB_C_LEX_C_LOGICAL Y // "&&" CLEX_andand "||" CLEX_oror
61 #define STB_C_LEX_C_SHIFTS Y // "<<" CLEX_shl ">>" CLEX_shr
62 #define STB_C_LEX_C_INCREMENTS Y // "++" CLEX_plusplus "--" CLEX_minusminus
63 #define STB_C_LEX_C_ARROW Y // "->" CLEX_arrow
64 #define STB_C_LEX_EQUAL_ARROW N // "=>" CLEX_eqarrow
65 #define STB_C_LEX_C_BITWISEEQ Y // "&=" CLEX_andeq "|=" CLEX_oreq "^=" CLEX_xoreq
66 #define STB_C_LEX_C_ARITHEQ Y // "+=" CLEX_pluseq "-=" CLEX_minuseq
67 // "*=" CLEX_muleq "/=" CLEX_diveq "%=" CLEX_modeq
68 // if both STB_C_LEX_SHIFTS & STB_C_LEX_ARITHEQ:
69 // "<<=" CLEX_shleq ">>=" CLEX_shreq
71 #define STB_C_LEX_PARSE_SUFFIXES N // letters after numbers are parsed as part of those numbers, and must be in suffix list below
72 #define STB_C_LEX_DECIMAL_SUFFIXES "" // decimal integer suffixes e.g. "uUlL" -- these are returned as-is in string storage
73 #define STB_C_LEX_HEX_SUFFIXES "" // e.g. "uUlL"
74 #define STB_C_LEX_OCTAL_SUFFIXES "" // e.g. "uUlL"
75 #define STB_C_LEX_FLOAT_SUFFIXES "" //
77 #define STB_C_LEX_0_IS_EOF N // if Y, ends parsing at '\0'; if N, returns '\0' as token
78 #define STB_C_LEX_INTEGERS_AS_DOUBLES N // parses integers as doubles so they can be larger than 'int', but only if STB_C_LEX_STDLIB==N
79 #define STB_C_LEX_MULTILINE_DSTRINGS N // allow newlines in double-quoted strings
80 #define STB_C_LEX_MULTILINE_SSTRINGS N // allow newlines in single-quoted strings
81 #define STB_C_LEX_USE_STDLIB Y // use strtod,strtol for parsing #s; otherwise inaccurate hack
82 #define STB_C_LEX_DOLLAR_IDENTIFIER Y // allow $ as an identifier character
83 #define STB_C_LEX_FLOAT_NO_DECIMAL Y // allow floats that have no decimal point if they have an exponent
85 #define STB_C_LEX_DEFINE_ALL_TOKEN_NAMES N // if Y, all CLEX_ token names are defined, even if never returned
86 // leaving it as N should help you catch config bugs
88 #define STB_C_LEX_DISCARD_PREPROCESSOR Y // discard C-preprocessor directives (e.g. after prepocess
89 // still have #line, #pragma, etc)
91 //#define STB_C_LEX_ISWHITE(str) ... // return length in bytes of whitespace characters if first char is whitespace
93 #define STB_C_LEXER_DEFINITIONS // This line prevents the header file from replacing your definitions
98 #ifndef INCLUDE_STB_C_LEXER_H
99 #define INCLUDE_STB_C_LEXER_H
107 char *string_storage
;
108 int string_storage_len
;
110 // lexer parse location for error messages
111 char *where_firstchar
;
112 char *where_lastchar
;
114 // lexer token variables
132 extern void stb_c_lexer_init(stb_lexer
*lexer
, const char *input_stream
, const char *input_stream_end
, char *string_store
, int store_length
);
133 // this function initialize the 'lexer' structure
135 // - input_stream points to the file to parse, loaded into memory
136 // - input_stream_end points to the end of the file, or NULL if you use 0-for-EOF
137 // - string_store is storage the lexer can use for storing parsed strings and identifiers
138 // - store_length is the length of that storage
140 extern int stb_c_lexer_get_token(stb_lexer
*lexer
);
141 // this function returns non-zero if a token is parsed, or 0 if at EOF
143 // - lexer->token is the token ID, which is unicode code point for a single-char token, < 0 for a multichar or eof or error
144 // - lexer->real_number is a double constant value for CLEX_floatlit, or CLEX_intlit if STB_C_LEX_INTEGERS_AS_DOUBLES
145 // - lexer->int_number is an integer constant for CLEX_intlit if !STB_C_LEX_INTEGERS_AS_DOUBLES, or character for CLEX_charlit
146 // - lexer->string is a 0-terminated string for CLEX_dqstring or CLEX_sqstring or CLEX_identifier
147 // - lexer->string_len is the byte length of lexer->string
149 extern void stb_c_lexer_get_location(const stb_lexer
*lexer
, const char *where
, stb_lex_location
*loc
);
150 // this inefficient function returns the line number and character offset of a
151 // given location in the file as returned by stb_lex_token. Because it's inefficient,
152 // you should only call it for errors, not for every token.
153 // For error messages of invalid tokens, you typically want the location of the start
154 // of the token (which caused the token to be invalid). For bugs involving legit
155 // tokens, you can report the first or the range.
157 // - loc->line_number is the line number in the file, counting from 1, of the location
158 // - loc->line_offset is the char-offset in the line, counting from 0, of the location
165 #endif // INCLUDE_STB_C_LEXER_H
167 #ifdef STB_C_LEXER_IMPLEMENTATION
169 #if defined(Y) || defined(N)
170 #error "Can only use stb_c_lexer in contexts where the preprocessor symbols 'Y' and 'N' are not defined"
174 // Hacky definitions so we can easily #if on them
178 #if STB_C_LEX_USE_STDLIB(x)
179 #define STB__CLEX_use_stdlib
183 #if STB_C_LEX_INTEGERS_AS_DOUBLES(x)
184 typedef double stb__clex_int
;
185 #define intfield real_number
186 #define STB__clex_int_as_double
188 typedef long stb__clex_int
;
189 #define intfield int_number
192 // Convert these config options to simple conditional #defines so we can more
193 // easily test them once we've change the meaning of Y/N
195 #if STB_C_LEX_PARSE_SUFFIXES(x)
196 #define STB__clex_parse_suffixes
199 #if STB_C_LEX_C_DECIMAL_INTS(x) || STB_C_LEX_C_HEX_INTS(x) || STB_C_LEX_DEFINE_ALL_TOKEN_NAMES(x)
200 #define STB__clex_define_int
203 #if (STB_C_LEX_C_ARITHEQ(x) && STB_C_LEX_C_SHIFTS(x)) || STB_C_LEX_DEFINE_ALL_TOKEN_NAMES(x)
204 #define STB__clex_define_shifts
207 #if STB_C_LEX_C_HEX_INTS(x)
208 #define STB__clex_hex_ints
211 #if STB_C_LEX_C_DECIMAL_INTS(x)
212 #define STB__clex_decimal_ints
215 #if STB_C_LEX_C_OCTAL_INTS(x)
216 #define STB__clex_octal_ints
219 #if STB_C_LEX_C_DECIMAL_FLOATS(x)
220 #define STB__clex_decimal_floats
223 #if STB_C_LEX_DISCARD_PREPROCESSOR(x)
224 #define STB__clex_discard_preprocessor
227 // Now pick a definition of Y/N that's conducive to
228 // defining the enum of token names.
229 #if STB_C_LEX_DEFINE_ALL_TOKEN_NAMES(x) || defined(STB_C_LEXER_SELF_TEST)
245 #ifdef STB__clex_define_int
249 STB_C_LEX_C_DECIMAL_FLOATS( CLEX_floatlit
)
250 STB_C_LEX_C_IDENTIFIERS( CLEX_id
)
251 STB_C_LEX_C_DQ_STRINGS( CLEX_dqstring
)
252 STB_C_LEX_C_SQ_STRINGS( CLEX_sqstring
)
253 STB_C_LEX_C_CHARS( CLEX_charlit
)
254 STB_C_LEX_C_COMPARISONS( CLEX_eq
)
255 STB_C_LEX_C_COMPARISONS( CLEX_noteq
)
256 STB_C_LEX_C_COMPARISONS( CLEX_lesseq
)
257 STB_C_LEX_C_COMPARISONS( CLEX_greatereq
)
258 STB_C_LEX_C_LOGICAL( CLEX_andand
)
259 STB_C_LEX_C_LOGICAL( CLEX_oror
)
260 STB_C_LEX_C_SHIFTS( CLEX_shl
)
261 STB_C_LEX_C_SHIFTS( CLEX_shr
)
262 STB_C_LEX_C_INCREMENTS( CLEX_plusplus
)
263 STB_C_LEX_C_INCREMENTS( CLEX_minusminus
)
264 STB_C_LEX_C_ARITHEQ( CLEX_pluseq
)
265 STB_C_LEX_C_ARITHEQ( CLEX_minuseq
)
266 STB_C_LEX_C_ARITHEQ( CLEX_muleq
)
267 STB_C_LEX_C_ARITHEQ( CLEX_diveq
)
268 STB_C_LEX_C_ARITHEQ( CLEX_modeq
)
269 STB_C_LEX_C_BITWISEEQ( CLEX_andeq
)
270 STB_C_LEX_C_BITWISEEQ( CLEX_oreq
)
271 STB_C_LEX_C_BITWISEEQ( CLEX_xoreq
)
272 STB_C_LEX_C_ARROW( CLEX_arrow
)
273 STB_C_LEX_EQUAL_ARROW( CLEX_eqarrow
)
275 #ifdef STB__clex_define_shifts
276 CLEX_shleq
, CLEX_shreq
,
279 CLEX_first_unused_token
285 // Now for the rest of the file we'll use the basic definition where
286 // where Y expands to its contents and N expands to nothing
291 void stb_c_lexer_init(stb_lexer
*lexer
, const char *input_stream
, const char *input_stream_end
, char *string_store
, int store_length
)
293 lexer
->input_stream
= (char *) input_stream
;
294 lexer
->eof
= (char *) input_stream_end
;
295 lexer
->parse_point
= (char *) input_stream
;
296 lexer
->string_storage
= string_store
;
297 lexer
->string_storage_len
= store_length
;
301 void stb_c_lexer_get_location(const stb_lexer
*lexer
, const char *where
, stb_lex_location
*loc
)
303 char *p
= lexer
->input_stream
;
306 while (*p
&& p
< where
) {
307 if (*p
== '\n' || *p
== '\r') {
308 p
+= (p
[0]+p
[1] == '\r'+'\n' ? 2 : 1); // skip newline
316 loc
->line_number
= line_number
;
317 loc
->line_offset
= char_offset
;
320 // main helper function for returning a parsed token
321 static int stb__clex_token(stb_lexer
*lexer
, int token
, char *start
, char *end
)
323 lexer
->token
= token
;
324 lexer
->where_firstchar
= start
;
325 lexer
->where_lastchar
= end
;
326 lexer
->parse_point
= end
+1;
330 // helper function for returning eof
331 static int stb__clex_eof(stb_lexer
*lexer
)
333 lexer
->token
= CLEX_eof
;
337 static int stb__clex_iswhite(int x
)
339 return x
== ' ' || x
== '\t' || x
== '\r' || x
== '\n' || x
== '\f';
342 static const char *stb__strchr(const char *str
, int ch
)
350 // parse suffixes at the end of a number
351 static int stb__clex_parse_suffixes(stb_lexer
*lexer
, long tokenid
, char *start
, char *cur
, const char *suffixes
)
353 #ifdef STB__clex_parse_suffixes
354 lexer
->string
= lexer
->string_storage
;
355 lexer
->string_len
= 0;
357 while ((*cur
>= 'a' && *cur
<= 'z') || (*cur
>= 'A' && *cur
<= 'Z')) {
358 if (stb__strchr(suffixes
, *cur
) == 0)
359 return stb__clex_token(lexer
, CLEX_parse_error
, start
, cur
);
360 if (lexer
->string_len
+1 >= lexer
->string_storage_len
)
361 return stb__clex_token(lexer
, CLEX_parse_error
, start
, cur
);
362 lexer
->string
[lexer
->string_len
++] = *cur
++;
365 suffixes
= suffixes
; // attempt to suppress warnings
367 return stb__clex_token(lexer
, tokenid
, start
, cur
-1);
370 #ifndef STB__CLEX_use_stdlib
371 static double stb__clex_parse_float(char *p
, char **q
)
374 while (*p
>= '0' && *p
<= '9')
375 value
= value
*10 + (*p
++ - '0');
377 double powten
=1, addend
= 0;
379 while (*p
>= '0' && *p
<= '9') {
380 addend
= addend
+ 10*(*p
++ - '0');
383 value
+= addend
/ powten
;
385 if (*p
== 'e' || *p
== 'E') {
386 int sign
= p
[1] == '-';
390 while (*p
>= '0' && *p
<= '9')
391 exponent
= exponent
*10 + (*p
++ - '0');
392 // can't use pow() from stdlib, so do it slow way
393 while (exponent
-- > 0)
405 static int stb__clex_parse_char(char *p
, char **q
)
408 *q
= p
+2; // tentatively guess we'll parse two characters
410 case '\\': return '\\';
411 case '\'': return '\'';
412 case '"': return '"';
413 case 't': return '\t';
414 case 'f': return '\f';
415 case 'n': return '\n';
416 case 'r': return '\r';
417 case '0': return '\0'; // @TODO ocatal constants
418 case 'x': case 'X': return -1; // @TODO hex constants
419 case 'u': return -1; // @TODO unicode constants
423 return (unsigned char) *p
;
426 static int stb__clex_parse_string(stb_lexer
*lexer
, char *p
, int type
)
429 char delim
= *p
++; // grab the " or ' for later matching
430 char *out
= lexer
->string_storage
;
431 char *outend
= lexer
->string_storage
+ lexer
->string_storage_len
;
432 while (*p
!= delim
) {
436 n
= stb__clex_parse_char(p
, &q
);
438 return stb__clex_token(lexer
, CLEX_parse_error
, start
, q
);
441 // @OPTIMIZE: could speed this up by looping-while-not-backslash
442 n
= (unsigned char) *p
++;
445 return stb__clex_token(lexer
, CLEX_parse_error
, start
, p
);
446 // @TODO expand unicode escapes to UTF8
450 lexer
->string
= lexer
->string_storage
;
451 lexer
->string_len
= out
- lexer
->string_storage
;
452 return stb__clex_token(lexer
, type
, start
, p
);
455 int stb_c_lexer_get_token(stb_lexer
*lexer
)
457 char *p
= lexer
->parse_point
;
459 // skip whitespace and comments
461 #ifdef STB_C_LEX_ISWHITE
462 while (p
!= lexer
->stream_end
) {
464 n
= STB_C_LEX_ISWHITE(p
);
466 if (lexer
->eof
&& lexer
->eof
- lexer
->parse_point
< n
)
467 return stb__clex_token(tok
, CLEX_parse_error
, p
,lexer
->eof
-1);
471 while (p
!= lexer
->eof
&& stb__clex_iswhite(*p
))
475 STB_C_LEX_CPP_COMMENTS(
476 if (p
!= lexer
->eof
&& p
[0] == '/' && p
[1] == '/') {
477 while (p
!= lexer
->eof
&& *p
!= '\r' && *p
!= '\n')
483 STB_C_LEX_C_COMMENTS(
484 if (p
!= lexer
->eof
&& p
[0] == '/' && p
[1] == '*') {
487 while (p
!= lexer
->eof
&& (p
[0] != '*' || p
[1] != '/'))
490 return stb__clex_token(lexer
, CLEX_parse_error
, start
, p
-1);
496 #ifdef STB__clex_discard_preprocessor
497 // @TODO this discards everything after a '#', regardless
498 // of where in the line the # is, rather than requiring it
499 // be at the start. (because this parser doesn't otherwise
500 // check for line breaks!)
501 if (p
!= lexer
->eof
&& p
[0] == '#') {
502 while (p
!= lexer
->eof
&& *p
!= '\r' && *p
!= '\n')
512 return stb__clex_eof(lexer
);
516 if ( (*p
>= 'a' && *p
<= 'z')
517 || (*p
>= 'A' && *p
<= 'Z')
518 || *p
== '_' || (unsigned char) *p
>= 128 // >= 128 is UTF8 char
519 STB_C_LEX_DOLLAR_IDENTIFIER( || *p
== '$' ) )
522 lexer
->string
= lexer
->string_storage
;
523 lexer
->string_len
= n
;
525 if (n
+1 >= lexer
->string_storage_len
)
526 return stb__clex_token(lexer
, CLEX_parse_error
, p
, p
+n
);
527 lexer
->string
[n
] = p
[n
];
530 (p
[n
] >= 'a' && p
[n
] <= 'z')
531 || (p
[n
] >= 'A' && p
[n
] <= 'Z')
532 || (p
[n
] >= '0' && p
[n
] <= '9') // allow digits in middle of identifier
533 || p
[n
] == '_' || (unsigned char) p
[n
] >= 128
534 STB_C_LEX_DOLLAR_IDENTIFIER( || p
[n
] == '$' )
536 lexer
->string
[n
] = 0;
537 return stb__clex_token(lexer
, CLEX_id
, p
, p
+n
-1);
543 return stb__clex_eof(tok
);
547 // not an identifier, return the character as itself
548 return stb__clex_token(lexer
, *p
, p
, p
);
551 if (p
+1 != lexer
->eof
) {
552 STB_C_LEX_C_INCREMENTS(if (p
[1] == '+') return stb__clex_token(lexer
, CLEX_plusplus
, p
,p
+1);)
553 STB_C_LEX_C_ARITHEQ( if (p
[1] == '=') return stb__clex_token(lexer
, CLEX_pluseq
, p
,p
+1);)
557 if (p
+1 != lexer
->eof
) {
558 STB_C_LEX_C_INCREMENTS(if (p
[1] == '-') return stb__clex_token(lexer
, CLEX_minusminus
, p
,p
+1);)
559 STB_C_LEX_C_ARITHEQ( if (p
[1] == '=') return stb__clex_token(lexer
, CLEX_minuseq
, p
,p
+1);)
560 STB_C_LEX_C_ARROW( if (p
[1] == '>') return stb__clex_token(lexer
, CLEX_arrow
, p
,p
+1);)
564 if (p
+1 != lexer
->eof
) {
565 STB_C_LEX_C_LOGICAL( if (p
[1] == '&') return stb__clex_token(lexer
, CLEX_andand
, p
,p
+1);)
566 STB_C_LEX_C_BITWISEEQ(if (p
[1] == '=') return stb__clex_token(lexer
, CLEX_andeq
, p
,p
+1);)
570 if (p
+1 != lexer
->eof
) {
571 STB_C_LEX_C_LOGICAL( if (p
[1] == '|') return stb__clex_token(lexer
, CLEX_oror
, p
,p
+1);)
572 STB_C_LEX_C_BITWISEEQ(if (p
[1] == '=') return stb__clex_token(lexer
, CLEX_oreq
, p
,p
+1);)
576 if (p
+1 != lexer
->eof
) {
577 STB_C_LEX_C_COMPARISONS(if (p
[1] == '=') return stb__clex_token(lexer
, CLEX_eq
, p
,p
+1);)
578 STB_C_LEX_EQUAL_ARROW( if (p
[1] == '>') return stb__clex_token(lexer
, CLEX_eqarrow
, p
,p
+1);)
582 STB_C_LEX_C_COMPARISONS(if (p
+1 != lexer
->eof
&& p
[1] == '=') return stb__clex_token(lexer
, CLEX_noteq
, p
,p
+1);)
585 STB_C_LEX_C_BITWISEEQ(if (p
+1 != lexer
->eof
&& p
[1] == '=') return stb__clex_token(lexer
, CLEX_xoreq
, p
,p
+1));
588 STB_C_LEX_C_ARITHEQ(if (p
+1 != lexer
->eof
&& p
[1] == '=') return stb__clex_token(lexer
, CLEX_modeq
, p
,p
+1));
591 STB_C_LEX_C_ARITHEQ(if (p
+1 != lexer
->eof
&& p
[1] == '=') return stb__clex_token(lexer
, CLEX_muleq
, p
,p
+1));
594 STB_C_LEX_C_ARITHEQ(if (p
+1 != lexer
->eof
&& p
[1] == '=') return stb__clex_token(lexer
, CLEX_diveq
, p
,p
+1));
597 if (p
+1 != lexer
->eof
) {
598 STB_C_LEX_C_COMPARISONS(if (p
[1] == '=') return stb__clex_token(lexer
, CLEX_lesseq
, p
,p
+1);)
599 STB_C_LEX_C_SHIFTS( if (p
[1] == '<') {
600 STB_C_LEX_C_ARITHEQ(if (p
+2 != lexer
->eof
&& p
[2] == '=')
601 return stb__clex_token(lexer
, CLEX_shleq
, p
,p
+2);)
602 return stb__clex_token(lexer
, CLEX_shl
, p
,p
+1);
608 if (p
+1 != lexer
->eof
) {
609 STB_C_LEX_C_COMPARISONS(if (p
[1] == '=') return stb__clex_token(lexer
, CLEX_greatereq
, p
,p
+1);)
610 STB_C_LEX_C_SHIFTS( if (p
[1] == '>') {
611 STB_C_LEX_C_ARITHEQ(if (p
+2 != lexer
->eof
&& p
[2] == '=')
612 return stb__clex_token(lexer
, CLEX_shreq
, p
,p
+2);)
613 return stb__clex_token(lexer
, CLEX_shr
, p
,p
+1);
620 STB_C_LEX_C_DQ_STRINGS(return stb__clex_parse_string(lexer
, p
, CLEX_dqstring
);)
623 STB_C_LEX_C_SQ_STRINGS(return stb__clex_parse_string(lexer
, p
, CLEX_sqstring
);)
627 lexer
->int_number
= stb__clex_parse_char(p
+1, &p
);
628 if (lexer
->int_number
< 0)
629 return stb__clex_token(lexer
, CLEX_parse_error
, start
,start
);
630 if (p
== lexer
->eof
|| *p
!= '\'')
631 return stb__clex_token(lexer
, CLEX_parse_error
, start
,p
);
632 return stb__clex_token(lexer
, CLEX_charlit
, start
, p
+1);
637 #ifdef STB__clex_hex_ints
638 if (p
+1 != lexer
->eof
) {
639 if (p
[1] == 'x' || p
[1] == 'X') {
641 #ifdef STB__CLEX_use_stdlib
642 lexer
->int_number
= strtol((char *) p
, (char **) &q
, 16);
645 while (q
!= lexer
->eof
) {
646 if (*q
>= '0' && *q
<= '9')
647 n
= n
*16 + (*q
- '0');
648 else if (*q
>= 'a' && *q
<= 'f')
649 n
= n
*16 + (*q
- 'a') + 10;
650 else if (*q
>= 'A' && *q
<= 'F')
651 n
= n
*16 + (*q
- 'A') + 10;
656 lexer
->int_field
= n
; // int_field is macro that expands to real_number/int_number depending on type of n
659 return stb__clex_token(lexer
, CLEX_parse_error
, p
-2,p
-1);
660 return stb__clex_parse_suffixes(lexer
, CLEX_intlit
, p
,q
, STB_C_LEX_HEX_SUFFIXES
);
663 #endif // STB__clex_hex_ints
664 // can't test for octal because we might parse '0.0' as float or as '0' '.' '0',
665 // so have to do float first
668 case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
670 #ifdef STB__clex_decimal_floats
673 while (q
!= lexer
->eof
&& (*q
>= '0' && *q
<= '9'))
675 if (q
!= lexer
->eof
) {
676 if (*q
== '.' STB_C_LEX_FLOAT_NO_DECIMAL(|| *q
== 'e' || *q
== 'E')) {
677 #ifdef STB__CLEX_use_stdlib
678 lexer
->real_number
= strtod((char *) p
, (char**) &q
);
680 lexer
->real_number
= stb__clex_parse_float(p
, &q
);
683 return stb__clex_parse_suffixes(lexer
, CLEX_floatlit
, p
,q
, STB_C_LEX_FLOAT_SUFFIXES
);
688 #endif // STB__clex_decimal_floats
690 #ifdef STB__clex_octal_ints
693 #ifdef STB__CLEX_use_stdlib
694 lexer
->int_number
= strtol((char *) p
, (char **) &q
, 8);
697 while (q
!= lexer
->eof
) {
698 if (*q
>= '0' && *q
<= '7')
704 if (q
!= lexer
->eof
&& (*q
== '8' || *q
=='9'))
705 return stb__clex_token(tok
, CLEX_parse_error
, p
, q
);
706 lexer
->int_field
= n
;
708 return stb__clex_parse_suffixes(lexer
, CLEX_intlit
, p
,q
, STB_C_LEX_OCTAL_SUFFIXES
);
710 #endif // STB__clex_octal_ints
712 #ifdef STB__clex_decimal_ints
715 #ifdef STB__CLEX_use_stdlib
716 lexer
->int_number
= strtol((char *) p
, (char **) &q
, 10);
719 while (q
!= lexer
->eof
) {
720 if (*q
>= '0' && *q
<= '9')
721 n
= n
*10 + (q
- '0');
726 lexer
->int_field
= n
;
728 return stb__clex_parse_suffixes(lexer
, CLEX_intlit
, p
,q
, STB_C_LEX_OCTAL_SUFFIXES
);
730 #endif // STB__clex_decimal_ints
734 #endif // STB_C_LEXER_IMPLEMENTATION
736 #ifdef STB_C_LEXER_SELF_TEST
740 static void print_token(stb_lexer
*lexer
)
742 switch (lexer
->token
) {
743 case CLEX_id
: printf("_%s", lexer
->string
); break;
744 case CLEX_eq
: printf("=="); break;
745 case CLEX_noteq
: printf("!="); break;
746 case CLEX_lesseq
: printf("<="); break;
747 case CLEX_greatereq
: printf(">="); break;
748 case CLEX_andand
: printf("&&"); break;
749 case CLEX_oror
: printf("||"); break;
750 case CLEX_shl
: printf("<<"); break;
751 case CLEX_shr
: printf(">>"); break;
752 case CLEX_plusplus
: printf("++"); break;
753 case CLEX_minusminus
: printf("--"); break;
754 case CLEX_arrow
: printf("->"); break;
755 case CLEX_andeq
: printf("&="); break;
756 case CLEX_oreq
: printf("|="); break;
757 case CLEX_xoreq
: printf("^="); break;
758 case CLEX_pluseq
: printf("+="); break;
759 case CLEX_minuseq
: printf("-="); break;
760 case CLEX_muleq
: printf("*="); break;
761 case CLEX_diveq
: printf("/="); break;
762 case CLEX_modeq
: printf("%%="); break;
763 case CLEX_shleq
: printf("<<="); break;
764 case CLEX_shreq
: printf(">>="); break;
765 case CLEX_eqarrow
: printf("=>"); break;
766 case CLEX_dqstring
: printf("\"%s\"", lexer
->string
); break;
767 case CLEX_sqstring
: printf("'\"%s\"'", lexer
->string
); break;
768 case CLEX_charlit
: printf("'%s'", lexer
->string
); break;
769 #if defined(STB__clex_int_as_double) && !defined(STB__CLEX_use_stdlib)
770 case CLEX_intlit
: printf("#%g", lexer
->real_number
); break;
772 case CLEX_intlit
: printf("#%ld", lexer
->int_number
); break;
774 case CLEX_floatlit
: printf("%g", lexer
->real_number
); break;
776 if (lexer
->token
>= 0 && lexer
->token
< 256)
777 printf("%c", (int) lexer
->token
);
779 printf("<<<UNKNOWN TOKEN %ld >>>\n", lexer
->token
);
787 multiline comments */
794 printf("test",1); // https://github.com/nothings/stb/issues/13
797 int main(int argc
, char **argv
)
799 FILE *f
= fopen("stb_c_lexer.h","rb");
800 char *text
= (char *) malloc(1 << 20);
801 int len
= f
? fread(text
, 1, 1<<20, f
) : -1;
804 fprintf(stderr
, "Error opening file\n");
809 stb_c_lexer_init(&lex
, text
, text
+len
, (char *) malloc(1<<16), 1<<16);
810 while (stb_c_lexer_get_token(&lex
)) {
811 if (lex
.token
== CLEX_parse_error
) {
812 printf("\n<<<PARSE ERROR>>>\n");