added everything to src
[henge/apc.git] / src / lexer.c
1 /*!@file
2 \brief lexical analyzer implementation for APC
3 \details The lexer manages two FIFO stacks. One for maintaining tokens, the
4 other for maintaining a list of files to be scanned. During
5 execution, the lexer will return a token from its token queue if any
6 are present. If not, the lexer will will pop an element from its
7 file queue to 'scanner' to be tokenized. If the file queue is empty,
8 the lexer will instead call 'parsedir' to traverse the directory tree
9 and tokenize the results. If 'parsedir' does not generate any new
10 tokens, we are done.
11 \author Jordan Lavatai
12 \date Aug 2016
13 ----------------------------------------------------------------------------*/
14 /* Standard */
15 #include <stdio.h>
16 #include <string.h>
17 #include <stdint.h>
18 #include <errno.h>
19 /* Posix */
20 #include <unistd.h>
21 #include <unitypes.h>
22 #include <unistr.h>
23 #include <uniconv.h>
24 #include <uniname.h>
25 #include <unistdio.h>
26 #include <stdlib.h>
27 #include <limits.h> //realpath, NAME_MAX, FPATH_MAX
28 #include <dirent.h>
29
30 /* Local */
31 #include "parser.tab.h"
32 #ifndef DE_STACKSIZE
33 #define DE_STACKSIZE 1024
34 #endif
35 #ifndef TK_STACKSIZE
36 #define TK_STACKSIZE 1024
37 #endif
38
39
40 /* Public */
41 int lexer_init(void);
42 int lexer(void);
43 int lexer_lexfile(const uint8_t*);
44 void lexer_pushtok(int, YYSTYPE);
45 uint8_t const* lexer_get_current_filepath(void);
46 int lexer_lexfilename(uint8_t*);
47 struct dirent* lexer_direntpa[DE_STACKSIZE],** lexer_direntpp,** lexer_direntpb;
48 /* Private */
49 extern //lexer_fsm.rl
50 int lexer_lexstring(uint8_t*, int);
51 extern //lexer_fsm.rl
52 int lexer_setstr(uint8_t*, int);
53 extern //scanner.c
54 int scanner_init(void);
55 extern //scanner.c
56 int scanner(void);
57 static inline
58 int dredge_current_depth(void);
59 extern //bison
60 YYSTYPE yylval;
61 static
62 uint8_t const* current_filename;
63
64 static
65 struct tok
66 { YYSTYPE lval; //token val
67 int tok_t; //token type
68 } token_stack[TK_STACKSIZE], *tsp, *tsx;
69
70 /* Directory Entity Array/Stack
71 Simple array for keeping track of dirents yet to be processed by the scanner.
72 If this list is empty and there are no tokens, the lexer is done.
73 This array is populated by the scanner as an array, and popped locally by the
74 lexer as a stack, and is popped as a FIFO stack.
75 */
76 #define DE_STACK (lexer_direntpa)
77 #define DE_STACKP (lexer_direntpp)
78 #define DE_STACKB (lexer_direntpb)
79 #define DE_LEN() (DE_STACKP - DE_STACKB)
80 #define DE_INIT() (DE_STACKP = DE_STACKB = DE_STACK)
81 #define DE_POP() (*DE_STACKB++)
82
83 /* Token Stack
84 This is a FIFO stack whose pointers are a union of either a pointer to an
85 integer, or a pointer to two integers (a struct tok). This way, integers may
86 be added or removed from the stack either singularly (IPUSH/IPOP), or as a
87 full token of two integers (PUSH/POP).
88 An alignment error will occur if IPOP or IPUSH are used a non-even number of
89 times in a sequence!
90 */
91 #define TK_STACK (token_stack)
92 #define TK_STACKP (tsp)
93 #define TK_STACKX (tsx)
94 #define TK_LEN() (TK_STACKX - TK_STACKP)
95 #define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK)
96 #define TK_POP() (*TK_STACKP++)
97 #define TK_PUSH(T,L) (*TK_STACKX++ = (struct tok){L,T})
98
99 /* Initializer
100 The initializer returns boolean true if an error occurs, which may be handled
101 with standard errno.
102 */
103 int lexer_init
104 ()
105 { TK_INIT();
106 DE_INIT();
107 return scanner_init();
108 }
109
110 /* Lexer
111 If the token buffer is empty, 'lexer' will initialize the token buffer and
112 call 'lexer_scandir'. If SCAN_ERROR is returned, an error is printed
113 before sending a null return to bison. If 0 tokens are generated, the error
114 printing is skipped. In all other cases, 'yylval' is set, and the token's
115 integer representation is returned.
116 */
117 int lexer
118 #define $($)#$
119 #define SCAN_ERROR -1
120 #define TK_EMPTY (TK_STACKP == TK_STACKX)
121 #define FAIL(...) \
122 do { \
123 fprintf(stderr,__VA_ARGS__); \
124 goto done; \
125 } while (0)
126 ()
127 { struct tok token;
128 start:
129 while (DE_LEN() > 0)//lex any directory entries in our stack
130 {
131 if (lexer_lexfile(DE_POP()->d_name) == 0)
132 FAIL("Lexer failed to tokenize [%s]\n",(*DE_STACKB)->d_name);
133 }
134 if (TK_EMPTY) //if there are no tokens,
135 { TK_INIT(); //initialize the token stack back to 0
136 switch (scanner())
137 { case SCAN_ERROR: //if an error occurred,
138 FAIL("Scanner error\n");
139 case 0: //if the the scanner finds no dirents,
140 goto done; //then we are done
141 default: //if we found some elements to scan,
142 goto start; //start over and lex them
143 }
144 }
145 token = TK_POP();
146 yylval = token.lval;
147 return token.tok_t;
148 done:
149 yylval.val = 0;
150 return 0;
151 }
152
153
154 /* Token Receiver
155 This receiver takes a struct tok and pushes it to the FIFO stack.
156 */
157 void lexer_pushtok
158 #define $($)#$ //stringifier
159 #define ERR_TK "Fatal: Generated over " $(TK_STACKSIZE) " tokens in one pass."
160 ( int tok, YYSTYPE lval )
161 { if (TK_LEN() >= TK_STACKSIZE)
162 { fprintf(stderr, ERR_TK);
163 exit(EXIT_FAILURE);
164 }
165 TK_PUSH(tok, lval);
166 }
167
168 /* Lexical analysis of a file
169 Strips a filename to its base name, then sends it to lexer_lex
170 */
171 int lexer_lexfile
172 #define HIDDEN_WARNING "%s is hidden and will not be parsed!\n", filename
173 ( const uint8_t *filename
174 )
175 { static uint8_t fname[FNAME_MAX];
176 uint8_t *last_period = NULL, *iter;
177
178 if (*filename == '.')
179 { fprintf (stderr, HIDDEN_WARNING);
180 return 0;
181 }
182 /* Copy the filename and remove its suffix */
183 u8_strncpy(fname,filename,FNAME_MAX);
184 last_period = NULL;
185 for (iter = fname; *iter; iter++) //find the last '.' char
186 if (*iter == '.')
187 last_period = iter;
188 if (last_period) //if we found one,
189 *last_period = 0; //truncate the string there
190 /* Register the current_filename */
191 current_filename = filename;
192 printf("lexer_lexfilename(%s)\n",fname);
193 return lexer_lexfilename(fname);
194 }
195
196 uint8_t const* lexer_get_current_filepath
197 ()
198 { static uint8_t current_path[FPATH_MAX];
199 static uint8_t const* last_filename;
200 if ((!last_filename || last_filename != current_filename) &&
201 ((uint8_t*) realpath(current_filename, current_path) != (uint8_t*) current_path))
202 { perror("realpath: ");
203 return NULL;
204 }
205 return (const uint8_t*)current_path;
206 }
207
208 /* Scan filename and push the its tokens
209 onto the stack */
210 int lexer_lexfilename
211 (uint8_t* str)
212 { int ntok, len;
213 uint8_t *filepath;
214
215
216 printf("|---- Begin lexerfilename on %s ----|\n", str);
217
218 if(*str == 0)
219 perror("Lexfilename:: str is NULL so fail\n");
220
221 /* Determine the filetype of str */
222 len = u8_strlen(str);
223
224 ntok += lexer_lexstring(str, len);
225
226 /* Pass back filepath as end of statment operator */
227 filepath = u8_strdup(lexer_get_current_filepath());
228 yylval.str = filepath;
229 lexer_pushtok(NAME, yylval);
230 printf("Pushing filepath %s\n", filepath);
231 ntok++;
232
233 printf("|---- Ending lexer_lexfilename on %s, %d tokens were lexed ----|\n", str, ntok);
234 return ntok;
235 }
236
237 /**************************/
238 /****Abandon All Hope******/
239 /**************************/
240 /*** ***/
241 /*** ***/
242 /*** ***/
243 /*** ***/
244
245 #if 0
246 int
247 lexer_lexelemap
248 ( uint8_t* str)
249 { int setname_len, elename_len, strlen;
250 uint8_t* setname_end, *elename_end, *newstrt;
251 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
252 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
253
254 newstrt = str;
255
256 SET_CURR_SETNAME(newstrt);
257 SET_CURR_ELENAME(newstrt);
258 if(PREV_MAPFILE())
259 { printf("Lexer_lexelemap:: previous file was mapfile*\n");
260 SET_MAPSTR(newstrt);
261 }
262 else
263 {
264 if(SETNAME_MATCHES())
265 { DEL_FTOK(newstrt);
266 if(REF(newstrt))
267 DEL_FTOK(newstrt);
268 printf("Lexer_lexelemap:: setname matches\n");
269 if(ELENAME_MATCHES())
270 DEL_FTOK(newstrt);
271 if(REF(newstrt))
272 DEL_FTOK(newstrt);
273 }
274 }
275
276 UPDATE_PREV_ELENAME(newstrt);
277 UPDATE_PREV_SETNAME(newstrt);
278
279 return newstrt - str;
280
281
282 }
283
284 int
285 lexer_lexelemodel
286 (uint8_t* str)
287 { int setname_len, elename_len;
288 uint8_t* setname_end, *elename_end, *newstrt;
289 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
290 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
291
292 printf("Lexer_lexelemodel:: Begin str is %s\n", str);
293
294 newstrt = str;
295
296 SET_CURR_SETNAME(newstrt);
297 SET_CURR_ELENAME(newstrt);
298 if(SETNAME_MATCHES())
299 { printf("Lexer_lexelemodel:: curr_setname(%s) matches prev_setname (%s)\n", curr_setname, prev_setname);
300 DEL_FTOK(newstrt);
301 printf("Lexer_lexelemodel:: Deleted setname, newstrt is now %s\n", newstrt);
302 if(REF(newstrt))
303 DEL_FTOK(newstrt);
304 if(ELENAME_MATCHES())
305 { printf("Lexer_lexelemodel:: elename matches\n");
306 DEL_FTOK(newstrt);
307 if(REF(newstrt))
308 DEL_FTOK(newstrt);
309 }
310 }
311 UPDATE_PREV_ELENAME(newstrt);
312 UPDATE_PREV_SETNAME(newstrt);
313
314 return newstrt - str;
315 }
316
317 int
318 lexer_lexsetmap
319 (uint8_t* str)
320 { int setname_len, elename_len;
321 uint8_t* setname_end, *elename_end, *newstrt;
322 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
323 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
324
325 newstrt = str;
326
327 SET_CURR_SETNAME(newstrt);
328 if(PREV_MAPFILE())
329 SET_MAPSTR(newstrt);
330 else
331 if( SETNAME_MATCHES())
332 DEL_FTOK(newstrt);
333 if(REF(newstrt))
334 DEL_FTOK(newstrt);
335
336 UPDATE_PREV_SETNAME(newstrt);
337
338 return newstrt - str;
339 }
340
341 int
342 lexer_lexsetmodel
343 (uint8_t* str)
344 { int setname_len, elename_len;
345 uint8_t* setname_end, *elename_end, *newstrt;
346 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
347 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
348
349 newstrt = str;
350
351 SET_CURR_SETNAME(newstrt);
352 if( SETNAME_MATCHES())
353 DEL_FTOK(newstrt);
354 if(REF(newstrt))
355 DEL_FTOK(newstrt);
356 UPDATE_PREV_SETNAME(newstrt);
357
358 return newstrt - str;
359
360 }
361
362 int
363 lexer_lexsetvlink
364 (uint8_t* str)
365 { int setname_len, elename_len;
366 uint8_t* setname_end, *elename_end, *newstrt;
367 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
368 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
369
370 newstrt = str;
371
372 SET_CURR_SETNAME(newstrt);
373 if( SETNAME_MATCHES())
374 DEL_FTOK(newstrt);
375 if(REF((NEXT_TOK(newstrt)))) //if NAME REF REF
376 DEL_FTOK(newstrt);
377 UPDATE_PREV_SETNAME(newstrt);
378
379 return newstrt - str;
380
381 }
382
383 int
384 lexer_lexelevlink
385 (uint8_t* str)
386 { int setname_len, elename_len;
387 uint8_t* setname_end, *elename_end, *newstrt;
388 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
389 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
390
391 newstrt = str;
392
393 SET_CURR_SETNAME(newstrt);
394 SET_CURR_ELENAME(newstrt);
395 if(SETNAME_MATCHES())
396 { DEL_FTOK(newstrt);
397 if(REF(NEXT_TOK(newstrt))) //NAME REF REF, where is set_label
398 DEL_FTOK(newstrt);
399 }
400
401 return newstrt - str;
402 }
403
404 int
405 lexer_lexsetolink
406 (uint8_t* str)
407 { int setname_len, elename_len;
408 uint8_t* setname_end, *elename_end;
409 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
410 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
411
412 return 0;
413
414 //do nothing
415 }
416
417 int
418 lexer_lexeleolink
419 (uint8_t* str)
420 { int setname_len, elename_len;
421 uint8_t* setname_end, *elename_end, *newstrt;
422 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
423 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
424
425 newstrt = str;
426
427 SET_CURR_SETNAME(newstrt);
428 printf("prev_setname %s, curr_setname %s\n", prev_setname, curr_setname);
429 if(SETNAME_MATCHES())
430 { DEL_FTOK(newstrt);
431 if(REF(newstrt))
432 DEL_FTOK(newstrt);
433 }
434
435 return newstrt - str;
436
437
438 }
439
440
441
442 #define REF(STR) (STR[0] <= 0x39 && STR[0] >= 0x30)
443 #define DEL_FTOK(STR) (STR = u8_strchr(STR, '_') + 1)
444 #define NEXT_TOK(STR) (u8_strchr(STR, '_') + 1)
445 #define SET_CURR_SETNAME(STR) \
446 do { \
447 printf("Lexer_lexX:: setting curr_setname of str(%s)\n", STR); \
448 setname_end = u8_chr(STR, FNAME_MAX, '_'); \
449 setname_len = setname_end - str; \
450 u8_move(curr_setname, STR, setname_len); \
451 printf("Lexer_lexX:: curr_setname is now %s\n",curr_setname); \
452 } while (0)
453 #define SET_CURR_ELENAME(STR) \
454 do { \
455 printf("Lexer_lexX:: setting curr_elename of str(%s)\n", STR); \
456 setname_end = u8_chr(STR, FNAME_MAX, '_') + 1; \
457 if(REF(setname_end)) \
458 setname_end = u8_chr(setname_end, FNAME_MAX, '_') + 1; \
459 elename_end = u8_chr(setname_end, FNAME_MAX, '_'); \
460 elename_len = elename_end - setname_end; \
461 u8_move(curr_elename, setname_end, elename_len); \
462 printf("Lexer_lexX:: curr_elename is now %s\n", curr_elename); \
463 } while (0)
464
465 #define SETNAME_MATCHES() (u8_strcmp(curr_setname, prev_setname) == 0)
466 #define ELENAME_MATCHES() (u8_strcmp(curr_elename, prev_elename) == 0)
467 #define UPDATE_PREV_SETNAME(STR) \
468 do { \
469 printf("Lexer_lexX:: updating prev_setname from (%s)", prev_setname); \
470 u8_set(prev_setname , (ucs4_t) 0, MAX_SETNAME_LEN ); \
471 u8_move(prev_setname, curr_setname, setname_len); \
472 printf(" to %s\n", prev_setname); \
473 } while (0)
474 #define UPDATE_PREV_ELENAME(STR) \
475 do { \
476 u8_set(prev_elename , (ucs4_t) 0, MAX_ELENAME_LEN ); \
477 u8_move(prev_elename, curr_elename, elename_len); \
478 } while (0)
479 #define PREV_MAPFILE() (TK_STACKX - 5)->tok_t == MOPEN || (TK_STACKX-3)->tok_t == MOPEN
480 #define SET_MAPSTR(STR) (STR = u8_strstr(STR, map_key))
481
482
483 #endif
484
485
486 /* int lexer_lexmapfile */
487 /* #define INC_X() */
488 /* (int height, int width) */
489 /* { */
490 /* int x, y; */
491
492 /* /\* Give scanner_scanpixels a buffer and a len. Iterate through */
493 /* buf with buf[n]. If n == 0, do nothing. if n has a value, push x, */
494 /* push y, push (z = n << 24), push (ref_id = n >> 8) *\/ */
495 /* //scanner_scanpixels() */
496
497 /* for(i = 0; i < len; i++) */
498 /* if(buf[i] == 0) */
499 /* if(x == width) */
500 /* x = 0; */
501 /* else */
502
503
504
505
506 /* } */
507 /* fname_bytes = (uint8_t*)(DE_POP()->d_name); */
508 /* printf("d_name is %s\n", fname_bytes); */
509 /* for (fnp = filename, i = 0; i < FNAME_MAX; i += unit_size, fnp++) */
510 /* { unit_size = u8_mblen(fname_bytes + i, min(4, FNAME_MAX - i)); */
511 /* if (u8_mbtouc(fnp, fname_bytes + i, unit_size) == -1) //add ucs4 char to the filename */
512 /* FAIL("Lexer failed to convert ^%s to unicode\n", (fname_bytes + i)); */
513 /* if (*fnp == 0) //added a terminating char */
514 /* break; */
515 /* } */
516 /* if(u8_mbtouc(filename, DE_POP()->d_name, FNAME_MAXy) == -1) */
517 /* FAIL("Lexer failed to convert d_name into uint8_t\n"); */
518 /* ulc_fprintf(stdout, "filename is %11U\n c", filename); */