minor changes to assignment to uninitialized variable
[henge/apc.git] / src / lexer.c
1 /*!@file
2 \brief lexical analyzer implementation for APC
3 \details The lexer manages two FIFO stacks. One for maintaining tokens, the
4 other for maintaining a list of files to be scanned. During
5 execution, the lexer will return a token from its token queue if any
6 are present. If not, the lexer will will pop an element from its
7 file queue to 'scanner' to be tokenized. If the file queue is empty,
8 the lexer will instead call 'parsedir' to traverse the directory tree
9 and tokenize the results. If 'parsedir' does not generate any new
10 tokens, we are done.
11 \author Jordan Lavatai
12 \date Aug 2016
13 ----------------------------------------------------------------------------*/
14 /* Standard */
15 #include <stdio.h>
16 #include <string.h>
17 #include <stdint.h>
18 #include <errno.h>
19 /* Posix */
20 #include <unistd.h>
21 #include <unitypes.h>
22 #include <unistr.h>
23 #include <uniconv.h>
24 #include <uniname.h>
25 #include <unistdio.h>
26 #include <stdlib.h>
27 #include <limits.h> //realpath, NAME_MAX, FPATH_MAX
28 #include <dirent.h>
29
30 /* Local */
31 #include "parser.tab.h"
32 #ifndef DE_STACKSIZE
33 #define DE_STACKSIZE 1024
34 #endif
35 #ifndef TK_STACKSIZE
36 #define TK_STACKSIZE 1024
37 #endif
38
39
40 /* Public */
41 int lexer_init(void);
42 int lexer(void);
43 int lexer_lexfile(const uint8_t*);
44 void lexer_pushtok(int, YYSTYPE);
45 uint8_t const* lexer_get_current_filepath(void);
46 int lexer_lexfilename(uint8_t*);
47 struct dirent* lexer_direntpa[DE_STACKSIZE],** lexer_direntpp,** lexer_direntpb;
48 /* Private */
49 extern //lexer_fsm.rl
50 int lexer_lexstring(uint8_t*, int);
51 extern //lexer_fsm.rl
52 int lexer_setstr(uint8_t*, int);
53 extern //scanner.c
54 int scanner_init(void);
55 extern //scanner.c
56 int scanner(void);
57 extern //bison
58 YYSTYPE yylval;
59 static
60 uint8_t const* current_filename;
61
62 static
63 struct tok
64 { YYSTYPE lval; //token val
65 int tok_t; //token type
66 } token_stack[TK_STACKSIZE], *tsp, *tsx;
67
68 /* Directory Entity Array/Stack
69 Simple array for keeping track of dirents yet to be processed by the scanner.
70 If this list is empty and there are no tokens, the lexer is done.
71 This array is populated by the scanner as an array, and popped locally by the
72 lexer as a stack, and is popped as a FIFO stack.
73 */
74 #define DE_STACK (lexer_direntpa)
75 #define DE_STACKP (lexer_direntpp)
76 #define DE_STACKB (lexer_direntpb)
77 #define DE_LEN() (DE_STACKP - DE_STACKB)
78 #define DE_INIT() (DE_STACKP = DE_STACKB = DE_STACK)
79 #define DE_POP() (*DE_STACKB++)
80
81 /* Token Stack
82 This is a FIFO stack whose pointers are a union of either a pointer to an
83 integer, or a pointer to two integers (a struct tok). This way, integers may
84 be added or removed from the stack either singularly (IPUSH/IPOP), or as a
85 full token of two integers (PUSH/POP).
86 An alignment error will occur if IPOP or IPUSH are used a non-even number of
87 times in a sequence!
88 */
89 #define TK_STACK (token_stack)
90 #define TK_STACKP (tsp)
91 #define TK_STACKX (tsx)
92 #define TK_LEN() (TK_STACKX - TK_STACKP)
93 #define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK)
94 #define TK_POP() (*TK_STACKP++)
95 #define TK_PUSH(T,L) (*TK_STACKX++ = (struct tok){L,T})
96
97 /* Initializer
98 The initializer returns boolean true if an error occurs, which may be handled
99 with standard errno.
100 */
101 int lexer_init
102 ()
103 { TK_INIT();
104 DE_INIT();
105 return scanner_init();
106 }
107
108 /* Lexer
109 If the token buffer is empty, 'lexer' will initialize the token buffer and
110 call 'lexer_scandir'. If SCAN_ERROR is returned, an error is printed
111 before sending a null return to bison. If 0 tokens are generated, the error
112 printing is skipped. In all other cases, 'yylval' is set, and the token's
113 integer representation is returned.
114 */
115 int lexer
116 #define $($)#$
117 #define SCAN_ERROR -1
118 #define TK_EMPTY (TK_STACKP == TK_STACKX)
119 #define FAIL(...) \
120 do { \
121 fprintf(stderr,__VA_ARGS__); \
122 goto done; \
123 } while (0)
124 ()
125 { struct tok token;
126 start:
127 while (DE_LEN() > 0)//lex any directory entries in our stack
128 {
129 if (lexer_lexfile(DE_POP()->d_name) == 0)
130 FAIL("Lexer failed to tokenize [%s]\n",(*DE_STACKB)->d_name);
131 }
132 if (TK_EMPTY) //if there are no tokens,
133 { TK_INIT(); //initialize the token stack back to 0
134 switch (scanner())
135 { case SCAN_ERROR: //if an error occurred,
136 FAIL("Scanner error\n");
137 case 0: //if the the scanner finds no dirents,
138 goto done; //then we are done
139 default: //if we found some elements to scan,
140 goto start; //start over and lex them
141 }
142 }
143 token = TK_POP();
144 yylval = token.lval;
145 return token.tok_t;
146 done:
147 yylval.val = 0;
148 return 0;
149 }
150
151
152 /* Token Receiver
153 This receiver takes a struct tok and pushes it to the FIFO stack.
154 */
155 void lexer_pushtok
156 #define $($)#$ //stringifier
157 #define ERR_TK "Fatal: Generated over " $(TK_STACKSIZE) " tokens in one pass."
158 ( int tok, YYSTYPE lval )
159 { if (TK_LEN() >= TK_STACKSIZE)
160 { fprintf(stderr, ERR_TK);
161 exit(EXIT_FAILURE);
162 }
163 TK_PUSH(tok, lval);
164 }
165
166 /* Lexical analysis of a file
167 Strips a filename to its base name, then sends it to lexer_lex
168 */
169 int lexer_lexfile
170 #define HIDDEN_WARNING "%s is hidden and will not be parsed!\n", filename
171 ( const uint8_t *filename
172 )
173 { static uint8_t fname[FNAME_MAX];
174 uint8_t *last_period = NULL, *iter;
175
176 if (*filename == '.')
177 { fprintf (stderr, HIDDEN_WARNING);
178 return 0;
179 }
180 /* Copy the filename and remove its suffix */
181 u8_strncpy(fname,filename,FNAME_MAX);
182 last_period = NULL;
183 for (iter = fname; *iter; iter++) //find the last '.' char
184 if (*iter == '.')
185 last_period = iter;
186 if (last_period) //if we found one,
187 *last_period = 0; //truncate the string there
188 /* Register the current_filename */
189 current_filename = filename;
190 printf("lexer_lexfilename(%s)\n",fname);
191 return lexer_lexfilename(fname);
192 }
193
194 uint8_t const* lexer_get_current_filepath
195 ()
196 { static uint8_t current_path[FPATH_MAX];
197 static uint8_t const* last_filename;
198 if ((!last_filename || last_filename != current_filename) &&
199 ((uint8_t*) realpath(current_filename, current_path) != (uint8_t*) current_path))
200 { perror("realpath: ");
201 return NULL;
202 }
203 return (const uint8_t*)current_path;
204 }
205
206 /* Scan filename and push the its tokens
207 onto the stack */
208 int lexer_lexfilename
209 (uint8_t* str)
210 { int ntok, len;
211 uint8_t *filepath;
212
213
214 printf("|---- Begin lexerfilename on %s ----|\n", str);
215
216 if(*str == 0)
217 perror("Lexfilename:: str is NULL so fail\n");
218
219 /* Determine the filetype of str */
220 len = u8_strlen(str);
221
222 ntok = lexer_lexstring(str, len);
223
224 /* Pass back filepath as end of statment operator */
225 filepath = u8_strdup(lexer_get_current_filepath());
226 yylval.str = filepath;
227 lexer_pushtok(NAME, yylval);
228 printf("Pushing filepath %s\n", filepath);
229 ntok++;
230
231 printf("|---- Ending lexer_lexfilename on %s, %d tokens were lexed ----|\n", str, ntok);
232 return ntok;
233 }
234
235 /**************************/
236 /****Abandon All Hope******/
237 /**************************/
238 /*** ***/
239 /*** ***/
240 /*** ***/
241 /*** ***/
242
243 #if 0
244 int
245 lexer_lexelemap
246 ( uint8_t* str)
247 { int setname_len, elename_len, strlen;
248 uint8_t* setname_end, *elename_end, *newstrt;
249 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
250 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
251
252 newstrt = str;
253
254 SET_CURR_SETNAME(newstrt);
255 SET_CURR_ELENAME(newstrt);
256 if(PREV_MAPFILE())
257 { printf("Lexer_lexelemap:: previous file was mapfile*\n");
258 SET_MAPSTR(newstrt);
259 }
260 else
261 {
262 if(SETNAME_MATCHES())
263 { DEL_FTOK(newstrt);
264 if(REF(newstrt))
265 DEL_FTOK(newstrt);
266 printf("Lexer_lexelemap:: setname matches\n");
267 if(ELENAME_MATCHES())
268 DEL_FTOK(newstrt);
269 if(REF(newstrt))
270 DEL_FTOK(newstrt);
271 }
272 }
273
274 UPDATE_PREV_ELENAME(newstrt);
275 UPDATE_PREV_SETNAME(newstrt);
276
277 return newstrt - str;
278
279
280 }
281
282 int
283 lexer_lexelemodel
284 (uint8_t* str)
285 { int setname_len, elename_len;
286 uint8_t* setname_end, *elename_end, *newstrt;
287 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
288 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
289
290 printf("Lexer_lexelemodel:: Begin str is %s\n", str);
291
292 newstrt = str;
293
294 SET_CURR_SETNAME(newstrt);
295 SET_CURR_ELENAME(newstrt);
296 if(SETNAME_MATCHES())
297 { printf("Lexer_lexelemodel:: curr_setname(%s) matches prev_setname (%s)\n", curr_setname, prev_setname);
298 DEL_FTOK(newstrt);
299 printf("Lexer_lexelemodel:: Deleted setname, newstrt is now %s\n", newstrt);
300 if(REF(newstrt))
301 DEL_FTOK(newstrt);
302 if(ELENAME_MATCHES())
303 { printf("Lexer_lexelemodel:: elename matches\n");
304 DEL_FTOK(newstrt);
305 if(REF(newstrt))
306 DEL_FTOK(newstrt);
307 }
308 }
309 UPDATE_PREV_ELENAME(newstrt);
310 UPDATE_PREV_SETNAME(newstrt);
311
312 return newstrt - str;
313 }
314
315 int
316 lexer_lexsetmap
317 (uint8_t* str)
318 { int setname_len, elename_len;
319 uint8_t* setname_end, *elename_end, *newstrt;
320 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
321 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
322
323 newstrt = str;
324
325 SET_CURR_SETNAME(newstrt);
326 if(PREV_MAPFILE())
327 SET_MAPSTR(newstrt);
328 else
329 if( SETNAME_MATCHES())
330 DEL_FTOK(newstrt);
331 if(REF(newstrt))
332 DEL_FTOK(newstrt);
333
334 UPDATE_PREV_SETNAME(newstrt);
335
336 return newstrt - str;
337 }
338
339 int
340 lexer_lexsetmodel
341 (uint8_t* str)
342 { int setname_len, elename_len;
343 uint8_t* setname_end, *elename_end, *newstrt;
344 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
345 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
346
347 newstrt = str;
348
349 SET_CURR_SETNAME(newstrt);
350 if( SETNAME_MATCHES())
351 DEL_FTOK(newstrt);
352 if(REF(newstrt))
353 DEL_FTOK(newstrt);
354 UPDATE_PREV_SETNAME(newstrt);
355
356 return newstrt - str;
357
358 }
359
360 int
361 lexer_lexsetvlink
362 (uint8_t* str)
363 { int setname_len, elename_len;
364 uint8_t* setname_end, *elename_end, *newstrt;
365 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
366 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
367
368 newstrt = str;
369
370 SET_CURR_SETNAME(newstrt);
371 if( SETNAME_MATCHES())
372 DEL_FTOK(newstrt);
373 if(REF((NEXT_TOK(newstrt)))) //if NAME REF REF
374 DEL_FTOK(newstrt);
375 UPDATE_PREV_SETNAME(newstrt);
376
377 return newstrt - str;
378
379 }
380
381 int
382 lexer_lexelevlink
383 (uint8_t* str)
384 { int setname_len, elename_len;
385 uint8_t* setname_end, *elename_end, *newstrt;
386 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
387 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
388
389 newstrt = str;
390
391 SET_CURR_SETNAME(newstrt);
392 SET_CURR_ELENAME(newstrt);
393 if(SETNAME_MATCHES())
394 { DEL_FTOK(newstrt);
395 if(REF(NEXT_TOK(newstrt))) //NAME REF REF, where is set_label
396 DEL_FTOK(newstrt);
397 }
398
399 return newstrt - str;
400 }
401
402 int
403 lexer_lexsetolink
404 (uint8_t* str)
405 { int setname_len, elename_len;
406 uint8_t* setname_end, *elename_end;
407 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
408 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
409
410 return 0;
411
412 //do nothing
413 }
414
415 int
416 lexer_lexeleolink
417 (uint8_t* str)
418 { int setname_len, elename_len;
419 uint8_t* setname_end, *elename_end, *newstrt;
420 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
421 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
422
423 newstrt = str;
424
425 SET_CURR_SETNAME(newstrt);
426 printf("prev_setname %s, curr_setname %s\n", prev_setname, curr_setname);
427 if(SETNAME_MATCHES())
428 { DEL_FTOK(newstrt);
429 if(REF(newstrt))
430 DEL_FTOK(newstrt);
431 }
432
433 return newstrt - str;
434
435
436 }
437
438
439
440 #define REF(STR) (STR[0] <= 0x39 && STR[0] >= 0x30)
441 #define DEL_FTOK(STR) (STR = u8_strchr(STR, '_') + 1)
442 #define NEXT_TOK(STR) (u8_strchr(STR, '_') + 1)
443 #define SET_CURR_SETNAME(STR) \
444 do { \
445 printf("Lexer_lexX:: setting curr_setname of str(%s)\n", STR); \
446 setname_end = u8_chr(STR, FNAME_MAX, '_'); \
447 setname_len = setname_end - str; \
448 u8_move(curr_setname, STR, setname_len); \
449 printf("Lexer_lexX:: curr_setname is now %s\n",curr_setname); \
450 } while (0)
451 #define SET_CURR_ELENAME(STR) \
452 do { \
453 printf("Lexer_lexX:: setting curr_elename of str(%s)\n", STR); \
454 setname_end = u8_chr(STR, FNAME_MAX, '_') + 1; \
455 if(REF(setname_end)) \
456 setname_end = u8_chr(setname_end, FNAME_MAX, '_') + 1; \
457 elename_end = u8_chr(setname_end, FNAME_MAX, '_'); \
458 elename_len = elename_end - setname_end; \
459 u8_move(curr_elename, setname_end, elename_len); \
460 printf("Lexer_lexX:: curr_elename is now %s\n", curr_elename); \
461 } while (0)
462
463 #define SETNAME_MATCHES() (u8_strcmp(curr_setname, prev_setname) == 0)
464 #define ELENAME_MATCHES() (u8_strcmp(curr_elename, prev_elename) == 0)
465 #define UPDATE_PREV_SETNAME(STR) \
466 do { \
467 printf("Lexer_lexX:: updating prev_setname from (%s)", prev_setname); \
468 u8_set(prev_setname , (ucs4_t) 0, MAX_SETNAME_LEN ); \
469 u8_move(prev_setname, curr_setname, setname_len); \
470 printf(" to %s\n", prev_setname); \
471 } while (0)
472 #define UPDATE_PREV_ELENAME(STR) \
473 do { \
474 u8_set(prev_elename , (ucs4_t) 0, MAX_ELENAME_LEN ); \
475 u8_move(prev_elename, curr_elename, elename_len); \
476 } while (0)
477 #define PREV_MAPFILE() (TK_STACKX - 5)->tok_t == MOPEN || (TK_STACKX-3)->tok_t == MOPEN
478 #define SET_MAPSTR(STR) (STR = u8_strstr(STR, map_key))
479
480
481 #endif
482
483
484 /* int lexer_lexmapfile */
485 /* #define INC_X() */
486 /* (int height, int width) */
487 /* { */
488 /* int x, y; */
489
490 /* /\* Give scanner_scanpixels a buffer and a len. Iterate through */
491 /* buf with buf[n]. If n == 0, do nothing. if n has a value, push x, */
492 /* push y, push (z = n << 24), push (ref_id = n >> 8) *\/ */
493 /* //scanner_scanpixels() */
494
495 /* for(i = 0; i < len; i++) */
496 /* if(buf[i] == 0) */
497 /* if(x == width) */
498 /* x = 0; */
499 /* else */
500
501
502
503
504 /* } */
505 /* fname_bytes = (uint8_t*)(DE_POP()->d_name); */
506 /* printf("d_name is %s\n", fname_bytes); */
507 /* for (fnp = filename, i = 0; i < FNAME_MAX; i += unit_size, fnp++) */
508 /* { unit_size = u8_mblen(fname_bytes + i, min(4, FNAME_MAX - i)); */
509 /* if (u8_mbtouc(fnp, fname_bytes + i, unit_size) == -1) //add ucs4 char to the filename */
510 /* FAIL("Lexer failed to convert ^%s to unicode\n", (fname_bytes + i)); */
511 /* if (*fnp == 0) //added a terminating char */
512 /* break; */
513 /* } */
514 /* if(u8_mbtouc(filename, DE_POP()->d_name, FNAME_MAXy) == -1) */
515 /* FAIL("Lexer failed to convert d_name into uint8_t\n"); */
516 /* ulc_fprintf(stdout, "filename is %11U\n c", filename); */