Lexer actually lexes filenames now, and odats are made out of mapvariants
[henge/webcc.git] / src / apc / lexer.c
1 /*!@file
2 \brief lexical analyzer implementation for APC
3 \details The lexer manages two FIFO stacks. One for maintaining tokens, the
4 other for maintaining a list of files to be scanned. During
5 execution, the lexer will return a token from its token queue if any
6 are present. If not, the lexer will will pop an element from its
7 file queue to 'scanner' to be tokenized. If the file queue is empty,
8 the lexer will instead call 'parsedir' to traverse the directory tree
9 and tokenize the results. If 'parsedir' does not generate any new
10 tokens, we are done.
11 \author Jordan Lavatai
12 \date Aug 2016
13 ----------------------------------------------------------------------------*/
14 /* Standard */
15 #include <stdio.h>
16 #include <string.h>
17 #include <stdint.h>
18 #include <errno.h>
19 /* Posix */
20 #include <unistd.h>
21 #include <unitypes.h>
22 #include <unistr.h>
23 #include <uniconv.h>
24 #include <uniname.h>
25 #include <unistdio.h>
26 #include <stdlib.h>
27 #include <limits.h> //realpath, NAME_MAX, FPATH_MAX
28 #include <dirent.h>
29
30 /* Local */
31 #include "parser.tab.h"
32 #ifndef DE_STACKSIZE
33 #define DE_STACKSIZE 1024
34 #endif
35 #ifndef TK_STACKSIZE
36 #define TK_STACKSIZE 1024
37 #endif
38 #ifndef MAX_SETNAME_LEN //max setname length
39 #define MAX_SETNAME_LEN 32
40 #endif
41 #ifndef MAX_ELENAME_LEN //max setname length
42 #define MAX_ELENAME_LEN 32
43 #endif
44 #define FNAME_MAX 1024
45 #define FPATH_MAX 8192
46
47 /* Public */
48 int lexer_init(void);
49 int lexer(void);
50 int lexer_lexfile(const uint8_t*);
51 void lexer_pushtok(int, YYSTYPE);
52 uint8_t const* lexer_get_current_filepath(void);
53 int lexer_lexfilename(uint8_t*);
54 struct dirent* lexer_direntpa[DE_STACKSIZE],** lexer_direntpp,** lexer_direntpb;
55 /* Private */
56 extern //lexer_fsm.rl
57 int lexer_lexstring(uint8_t*, int);
58 extern //lexer_fsm.rl
59 int lexer_setstr(uint8_t*, int);
60 extern //scanner.c
61 int scanner_init(void);
62 extern //scanner.c
63 int scanner(void);
64 static inline
65 int dredge_current_depth(void);
66 extern //bison
67 YYSTYPE yylval;
68 static
69 uint8_t const* current_filename;
70 static
71 uint8_t prev_setname[MAX_SETNAME_LEN];
72 static
73 uint8_t prev_elename[MAX_ELENAME_LEN];
74 static
75 uint8_t map_key[] = "~";
76 static
77 struct tok
78 { YYSTYPE lval; //token val
79 int tok_t; //token type
80 } token_stack[TK_STACKSIZE], *tsp, *tsx;
81
82 /* Directory Entity Array/Stack
83 Simple array for keeping track of dirents yet to be processed by the scanner.
84 If this list is empty and there are no tokens, the lexer is done.
85 This array is populated by the scanner as an array, and popped locally by the
86 lexer as a stack, and is popped as a FIFO stack.
87 */
88 #define DE_STACK (lexer_direntpa)
89 #define DE_STACKP (lexer_direntpp)
90 #define DE_STACKB (lexer_direntpb)
91 #define DE_LEN() (DE_STACKP - DE_STACKB)
92 #define DE_INIT() (DE_STACKP = DE_STACKB = DE_STACK)
93 #define DE_POP() (*DE_STACKB++)
94
95 /* Token Stack
96 This is a FIFO stack whose pointers are a union of either a pointer to an
97 integer, or a pointer to two integers (a struct tok). This way, integers may
98 be added or removed from the stack either singularly (IPUSH/IPOP), or as a
99 full token of two integers (PUSH/POP).
100 An alignment error will occur if IPOP or IPUSH are used a non-even number of
101 times in a sequence!
102 */
103 #define TK_STACK (token_stack)
104 #define TK_STACKP (tsp)
105 #define TK_STACKX (tsx)
106 #define TK_LEN() (TK_STACKX - TK_STACKP)
107 #define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK)
108 #define TK_POP() (*TK_STACKP++)
109 #define TK_PUSH(T,L) (*TK_STACKX++ = (struct tok){L,T})
110
111 /* Initializer
112 The initializer returns boolean true if an error occurs, which may be handled
113 with standard errno.
114 */
115 int lexer_init
116 ()
117 { TK_INIT();
118 DE_INIT();
119 return scanner_init();
120 }
121
122 /* Lexer
123 If the token buffer is empty, 'lexer' will initialize the token buffer and
124 call 'lexer_scandir'. If SCAN_ERROR is returned, an error is printed
125 before sending a null return to bison. If 0 tokens are generated, the error
126 printing is skipped. In all other cases, 'yylval' is set, and the token's
127 integer representation is returned.
128 */
129 int lexer
130 #define $($)#$
131 #define SCAN_ERROR -1
132 #define TK_EMPTY (TK_STACKP == TK_STACKX)
133 #define FAIL(...) \
134 do { \
135 fprintf(stderr,__VA_ARGS__); \
136 goto done; \
137 } while (0)
138 ()
139 { struct tok token;
140 start:
141 while (DE_LEN() > 0)//lex any directory entries in our stack
142 {
143 if (lexer_lexfile(DE_POP()->d_name) == 0)
144 FAIL("Lexer failed to tokenize [%s]\n",(*DE_STACKB)->d_name);
145 }
146 if (TK_EMPTY) //if there are no tokens,
147 { TK_INIT(); //initialize the token stack back to 0
148 switch (scanner())
149 { case SCAN_ERROR: //if an error occurred,
150 FAIL("Scanner error\n");
151 case 0: //if the the scanner finds no dirents,
152 goto done; //then we are done
153 default: //if we found some elements to scan,
154 goto start; //start over and lex them
155 }
156 }
157 token = TK_POP();
158 yylval = token.lval;
159 return token.tok_t;
160 done:
161 yylval.val = 0;
162 return 0;
163 }
164
165
166 /* Token Receiver
167 This receiver takes a struct tok and pushes it to the FIFO stack.
168 */
169 void lexer_pushtok
170 #define $($)#$ //stringifier
171 #define ERR_TK "Fatal: Generated over " $(TK_STACKSIZE) " tokens in one pass."
172 ( int tok, YYSTYPE lval )
173 { if (TK_LEN() >= TK_STACKSIZE)
174 { fprintf(stderr, ERR_TK);
175 exit(EXIT_FAILURE);
176 }
177 TK_PUSH(tok, lval);
178 }
179
180 /* Lexical analysis of a file
181 Strips a filename to its base name, then sends it to lexer_lex
182 */
183 int lexer_lexfile
184 #define HIDDEN_WARNING "%s is hidden and will not be parsed!\n", filename
185 ( const uint8_t *filename
186 )
187 { static uint8_t fname[FNAME_MAX];
188 uint8_t *last_period = NULL, *iter;
189
190 if (*filename == '.')
191 { fprintf (stderr, HIDDEN_WARNING);
192 return 0;
193 }
194 /* Copy the filename and remove its suffix */
195 u8_strncpy(fname,filename,FNAME_MAX);
196 last_period = NULL;
197 for (iter = fname; *iter; iter++) //find the last '.' char
198 if (*iter == '.')
199 last_period = iter;
200 if (last_period) //if we found one,
201 *last_period = 0; //truncate the string there
202 /* Register the current_filename */
203 current_filename = filename;
204 printf("lexer_lexfilename(%s)\n",fname);
205 return lexer_lexfilename(fname);
206 }
207
208 uint8_t const* lexer_get_current_filepath
209 ()
210 { static uint8_t current_path[FPATH_MAX];
211 static uint8_t const* last_filename;
212 if ((!last_filename || last_filename != current_filename) &&
213 ((uint8_t*) realpath(current_filename, current_path) != (uint8_t*) current_path))
214 { perror("realpath: ");
215 return NULL;
216 }
217 return (const uint8_t*)current_path;
218 }
219
220 /* Returns 1 on success, 0 on failure */
221 int
222 lexer_ismapfile(uint8_t* str)
223 {
224 int i, len;
225
226 len = u8_strlen(str);
227 for(i = 0; i < len; i++)
228 if(str[i] == '~')
229 return 1;
230 }
231
232
233 /* Scan filename and push the its tokens
234 onto the stack */
235 int lexer_lexfilename
236 (uint8_t* str)
237 #define REF(STR) (STR[0] <= 0x39 && STR[0] >= 0x30)
238 #define DEL_FTOK(STR) (STR = u8_strchr(STR, '_') + 1)
239 #define NEXT_TOK(STR) (u8_strchr(STR, '_') + 1)
240 #define SET_CURR_SETNAME(STR) \
241 do { \
242 printf("setting curr_setname of str(%s)\n", STR); \
243 setname_end = u8_chr(STR, FNAME_MAX, '_'); \
244 setname_len = setname_end - str; \
245 u8_move(curr_setname, STR, setname_len); \
246 printf("curr_setname is now %s\n",curr_setname); \
247 } while (0)
248 #define SET_CURR_ELENAME(STR) \
249 do { \
250 printf("setting curr_elename of str(%s)\n", STR); \
251 setname_end = u8_chr(STR, FNAME_MAX, '_') + 1; \
252 if(REF(setname_end)) \
253 setname_end = u8_chr(setname_end, FNAME_MAX, '_') + 1; \
254 elename_end = u8_chr(setname_end, FNAME_MAX, '_'); \
255 elename_len = elename_end - setname_end; \
256 u8_move(curr_elename, setname_end, elename_len); \
257 printf("curr_elename is now %s\n", curr_elename); \
258 } while (0)
259
260 #define SETNAME_MATCHES() (u8_strcmp(curr_setname, prev_setname) == 0)
261 #define ELENAME_MATCHES() (u8_strcmp(curr_elename, prev_elename) == 0)
262 #define UPDATE_PREV_SETNAME(STR) \
263 do { \
264 printf("updating prev_setname from (%s)", prev_setname); \
265 u8_set(prev_setname , (ucs4_t) 0, MAX_SETNAME_LEN ); \
266 u8_move(prev_setname, curr_setname, setname_len); \
267 printf(" to %s\n", prev_setname); \
268 } while (0)
269 #define UPDATE_PREV_ELENAME(STR) \
270 do { \
271 u8_set(prev_elename , (ucs4_t) 0, MAX_ELENAME_LEN ); \
272 u8_move(prev_elename, curr_elename, elename_len); \
273 } while (0)
274 #define PREV_MAPFILE() (TK_STACKX - 5)->tok_t == MOPEN || (TK_STACKX-3)->tok_t == MOPEN
275 #define SET_MAPSTR(STR) (STR = u8_strstr(STR, map_key))
276
277 { int ntok, len, newstrt;
278 uint8_t *filepath;
279 typedef enum filetypes {
280 error = 0,
281 set_model,
282 set_map,
283 ele_model,
284 ele_map,
285 ele_vlink,
286 set_olink,
287 set_vlink
288 } filetypes;
289
290 ntok = 0;
291
292 printf("|---- Begin lexerfilename on %s ----|\n", str);
293
294 if(*str == 0)
295 perror("Lexfilename:: str is NULL so fail\n");
296
297 /* Determine the filetype of str */
298 len = u8_strlen(str);
299 newstrt = lexer_setstr(str,len);
300
301 str = str + newstrt;
302
303 len = u8_strlen(str);
304
305 ntok += lexer_lexstring(str, len);
306
307 /* Need to add map variant name 'default' if user did not specify a
308 map variant name */
309 /* if(filetype == ele_map) */
310 /* { if(!u8_strchr(str, '_')) //map variant name not provided */
311 /* { yylval.str = "default"; */
312 /* lexer_pushtok(NAME, yylval); */
313 /* ntok++; */
314 /* printf("Pushing default ele_map name\n"); */
315 /* } */
316 /* } */
317
318 /* Pass back filepath as end of statment operator */
319 filepath = u8_strdup(lexer_get_current_filepath());
320 yylval.str = filepath;
321 lexer_pushtok(NAME, yylval);
322 printf("Pushing filepath %s\n", filepath);
323 ntok++;
324
325 printf("|---- Ending lexer_lexfilename on %s, %d tokens were lexed ----|\n", str, ntok);
326 return ntok;
327 }
328
329 int
330 lexer_lexelemap
331 ( uint8_t* str)
332 { int setname_len, elename_len, strlen;
333 uint8_t* setname_end, *elename_end, *newstrt;
334 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
335 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
336
337 newstrt = str;
338
339 SET_CURR_SETNAME(newstrt);
340 SET_CURR_ELENAME(newstrt);
341 if(PREV_MAPFILE())
342 { printf("*previous file was mapfile*\n");
343 SET_MAPSTR(newstrt);
344 }
345 else
346 {
347 if(SETNAME_MATCHES())
348 { DEL_FTOK(newstrt);
349 if(REF(newstrt))
350 DEL_FTOK(newstrt);
351 printf("setname matches\n");
352 if(ELENAME_MATCHES())
353 DEL_FTOK(newstrt);
354 if(REF(str))
355 DEL_FTOK(newstrt);
356 }
357 }
358 UPDATE_PREV_ELENAME(newstrt);
359 UPDATE_PREV_SETNAME(newstrt);
360
361 return newstrt - str;
362
363
364 }
365
366 int
367 lexer_lexelemodel
368 (uint8_t* str)
369 { int setname_len, elename_len;
370 uint8_t* setname_end, *elename_end, *newstrt;
371 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
372 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
373
374 printf("In lexelemodel, str is %s\n", str);
375
376 newstrt = str;
377
378 SET_CURR_SETNAME(newstrt);
379 SET_CURR_ELENAME(newstrt);
380 if(SETNAME_MATCHES())
381 { printf("in ele_model: setname matches\n");
382 DEL_FTOK(newstrt);
383 printf("newstrt is now %s\n", newstrt);
384 if(REF(newstrt))
385 DEL_FTOK(newstrt);
386 if(ELENAME_MATCHES())
387 { printf("in ele_model: elename matches\n");
388 DEL_FTOK(newstrt);
389 if(REF(newstrt))
390 DEL_FTOK(newstrt);
391 }
392 }
393 UPDATE_PREV_ELENAME(newstrt);
394 UPDATE_PREV_SETNAME(newstrt);
395
396 return newstrt - str;
397 }
398
399 int
400 lexer_lexsetmap
401 (uint8_t* str)
402 { int setname_len, elename_len;
403 uint8_t* setname_end, *elename_end, *newstrt;
404 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
405 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
406
407 newstrt = str;
408
409 SET_CURR_SETNAME(newstrt);
410 if(PREV_MAPFILE())
411 SET_MAPSTR(newstrt);
412 else
413 if( SETNAME_MATCHES())
414 DEL_FTOK(newstrt);
415 if(REF(newstrt))
416 DEL_FTOK(newstrt);
417
418 UPDATE_PREV_SETNAME(newstrt);
419
420 return newstrt - str;
421 }
422
423 int
424 lexer_lexsetmodel
425 (uint8_t* str)
426 { int setname_len, elename_len;
427 uint8_t* setname_end, *elename_end, *newstrt;
428 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
429 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
430
431 newstrt = str;
432
433 SET_CURR_SETNAME(newstrt);
434 if( SETNAME_MATCHES())
435 DEL_FTOK(newstrt);
436 if(REF(newstrt))
437 DEL_FTOK(newstrt);
438 UPDATE_PREV_SETNAME(newstrt);
439
440 return newstrt - str;
441
442 }
443
444 int
445 lexer_lexsetvlink
446 (uint8_t* str)
447 { int setname_len, elename_len;
448 uint8_t* setname_end, *elename_end, *newstrt;
449 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
450 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
451
452 newstrt = str;
453
454 SET_CURR_SETNAME(newstrt);
455 if( SETNAME_MATCHES())
456 DEL_FTOK(newstrt);
457 if(REF((NEXT_TOK(newstrt)))) //if NAME REF REF
458 DEL_FTOK(newstrt);
459 UPDATE_PREV_SETNAME(newstrt);
460
461 return newstrt - str;
462
463 }
464
465 int
466 lexer_lexelevlink
467 (uint8_t* str)
468 { int setname_len, elename_len;
469 uint8_t* setname_end, *elename_end, *newstrt;
470 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
471 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
472
473 newstrt = str;
474
475 SET_CURR_SETNAME(newstrt);
476 SET_CURR_ELENAME(newstrt);
477 if(SETNAME_MATCHES())
478 { DEL_FTOK(newstrt);
479 if(REF(NEXT_TOK(newstrt))) //NAME REF REF, where is set_label
480 DEL_FTOK(newstrt);
481 }
482
483 return newstrt - str;
484 }
485
486 int
487 lexer_lexsetolink
488 (uint8_t* str)
489 { int setname_len, elename_len;
490 uint8_t* setname_end, *elename_end;
491 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
492 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
493
494 return 0;
495
496 //do nothing
497 }
498
499 int
500 lexer_lexeleolink
501 (uint8_t* str)
502 { int setname_len, elename_len;
503 uint8_t* setname_end, *elename_end, *newstrt;
504 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
505 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
506
507 newstrt = str;
508
509 SET_CURR_SETNAME(newstrt);
510 printf("prev_setname %s, curr_setname %s\n", prev_setname, curr_setname);
511 if(SETNAME_MATCHES())
512 { DEL_FTOK(newstrt);
513 if(REF(newstrt))
514 DEL_FTOK(newstrt);
515 }
516
517 return newstrt - str;
518
519
520 }
521
522
523 /**************************/
524 /****Abandon All Hope******/
525 /**************************/
526 /*** ***/
527 /*** ***/
528 /*** ***/
529 /*** ***/
530
531
532
533 /* int lexer_lexmapfile */
534 /* #define INC_X() */
535 /* (int height, int width) */
536 /* { */
537 /* int x, y; */
538
539 /* /\* Give scanner_scanpixels a buffer and a len. Iterate through */
540 /* buf with buf[n]. If n == 0, do nothing. if n has a value, push x, */
541 /* push y, push (z = n << 24), push (ref_id = n >> 8) *\/ */
542 /* //scanner_scanpixels() */
543
544 /* for(i = 0; i < len; i++) */
545 /* if(buf[i] == 0) */
546 /* if(x == width) */
547 /* x = 0; */
548 /* else */
549
550
551
552
553 /* } */
554 /* fname_bytes = (uint8_t*)(DE_POP()->d_name); */
555 /* printf("d_name is %s\n", fname_bytes); */
556 /* for (fnp = filename, i = 0; i < FNAME_MAX; i += unit_size, fnp++) */
557 /* { unit_size = u8_mblen(fname_bytes + i, min(4, FNAME_MAX - i)); */
558 /* if (u8_mbtouc(fnp, fname_bytes + i, unit_size) == -1) //add ucs4 char to the filename */
559 /* FAIL("Lexer failed to convert ^%s to unicode\n", (fname_bytes + i)); */
560 /* if (*fnp == 0) //added a terminating char */
561 /* break; */
562 /* } */
563 /* if(u8_mbtouc(filename, DE_POP()->d_name, FNAME_MAXy) == -1) */
564 /* FAIL("Lexer failed to convert d_name into uint8_t\n"); */
565 /* ulc_fprintf(stdout, "filename is %11U\n c", filename); */