fixes
[henge/apc.git] / src / lexer.c
1 /*!@file
2 \brief lexical analyzer implementation for APC
3 \details The lexer manages two FIFO stacks. One for maintaining tokens, the
4 other for maintaining a list of files to be scanned. During
5 execution, the lexer will return a token from its token queue if any
6 are present. If not, the lexer will will pop an element from its
7 file queue to 'scanner' to be tokenized. If the file queue is empty,
8 the lexer will instead call 'parsedir' to traverse the directory tree
9 and tokenize the results. If 'parsedir' does not generate any new
10 tokens, we are done.
11 \author Jordan Lavatai
12 \date Aug 2016
13 ----------------------------------------------------------------------------*/
14 /* Standard */
15 #include <stdio.h>
16 #include <string.h>
17 #include <stdint.h>
18 #include <errno.h>
19 /* Posix */
20 #include <unistd.h>
21 #include <unitypes.h>
22 #include <unistr.h>
23 #include <uniconv.h>
24 #include <uniname.h>
25 #include <unistdio.h>
26 #include <stdlib.h>
27 #include <limits.h> //realpath, NAME_MAX, FPATH_MAX
28 #include <dirent.h>
29
30 /* Local */
31 #include "apc.h"
32 #include "parser.tab.h"
33 #ifndef DE_STACKSIZE
34 #define DE_STACKSIZE 1024
35 #endif
36 #ifndef TK_STACKSIZE
37 #define TK_STACKSIZE 1024
38 #endif
39
40
41 /* Public */
42 int lexer_init(void);
43 int lexer(void);
44 int lexer_lexfile(const uint8_t*);
45 void lexer_pushtok(int, YYSTYPE);
46 uint8_t const* lexer_get_current_filepath(void);
47 int lexer_lexfilename(uint8_t*);
48 struct dirent* lexer_direntpa[DE_STACKSIZE],** lexer_direntpp,** lexer_direntpb;
49 /* Private */
50 extern //lexer_fsm.rl
51 int lexer_lexstring(uint8_t*, int);
52 extern //lexer_fsm.rl
53 int lexer_setstr(uint8_t*, int);
54 extern //scanner.c
55 int scanner_init(void);
56 extern //scanner.c
57 int scanner(void);
58 extern //bison
59 YYSTYPE yylval;
60 static
61 uint8_t const* current_filename;
62
63 static
64 struct tok
65 { YYSTYPE lval; //token val
66 int tok_t; //token type
67 } token_stack[TK_STACKSIZE], *tsp, *tsx;
68
69 /* Directory Entity Array/Stack
70 Simple array for keeping track of dirents yet to be processed by the scanner.
71 If this list is empty and there are no tokens, the lexer is done.
72 This array is populated by the scanner as an array, and popped locally by the
73 lexer as a stack, and is popped as a FIFO stack.
74 */
75 #define DE_STACK (lexer_direntpa)
76 #define DE_STACKP (lexer_direntpp)
77 #define DE_STACKB (lexer_direntpb)
78 #define DE_LEN() (DE_STACKP - DE_STACKB)
79 #define DE_INIT() (DE_STACKP = DE_STACKB = DE_STACK)
80 #define DE_POP() (*DE_STACKB++)
81
82 /* Token Stack
83 This is a FIFO stack whose pointers are a union of either a pointer to an
84 integer, or a pointer to two integers (a struct tok). This way, integers may
85 be added or removed from the stack either singularly (IPUSH/IPOP), or as a
86 full token of two integers (PUSH/POP).
87 An alignment error will occur if IPOP or IPUSH are used a non-even number of
88 times in a sequence!
89 */
90 #define TK_STACK (token_stack)
91 #define TK_STACKP (tsp)
92 #define TK_STACKX (tsx)
93 #define TK_LEN() (TK_STACKX - TK_STACKP)
94 #define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK)
95 #define TK_POP() (*TK_STACKP++)
96 #define TK_PUSH(T,L) (*TK_STACKX++ = (struct tok){L,T})
97
98 /* Initializer
99 The initializer returns boolean true if an error occurs, which may be handled
100 with standard errno.
101 */
102 int lexer_init
103 ()
104 { TK_INIT();
105 DE_INIT();
106 return scanner_init();
107 }
108
109 /* Lexer
110 If the token buffer is empty, 'lexer' will initialize the token buffer and
111 call 'lexer_scandir'. If SCAN_ERROR is returned, an error is printed
112 before sending a null return to bison. If 0 tokens are generated, the error
113 printing is skipped. In all other cases, 'yylval' is set, and the token's
114 integer representation is returned.
115 */
116 #define $($)#$
117 #define SCAN_ERROR -1
118 #define TK_EMPTY (TK_STACKP == TK_STACKX)
119 #define FAIL(...) \
120 do { \
121 fprintf(stderr,__VA_ARGS__); \
122 goto done; \
123 } while (0)
124 int lexer
125 ()
126 { struct tok token;
127 start:
128 while (DE_LEN() > 0)//lex any directory entries in our stack
129 {
130 if (lexer_lexfile((uint8_t*)DE_POP()->d_name) == 0)
131 FAIL("Lexer failed to tokenize [%s]\n",(*DE_STACKB)->d_name);
132 }
133 if (TK_EMPTY) //if there are no tokens,
134 { TK_INIT(); //initialize the token stack back to 0
135 switch (scanner())
136 { case SCAN_ERROR: //if an error occurred,
137 FAIL("Scanner error\n");
138 case 0: //if the the scanner finds no dirents,
139 goto done; //then we are done
140 default: //if we found some elements to scan,
141 goto start; //start over and lex them
142 }
143 }
144 token = TK_POP();
145 yylval = token.lval;
146 return token.tok_t;
147 done:
148 yylval.val = 0;
149 return 0;
150 }
151
152
153 /* Token Receiver
154 This receiver takes a struct tok and pushes it to the FIFO stack.
155 */
156 #define $($)#$ //stringifier
157 #define ERR_TK "Fatal: Generated over " $(TK_STACKSIZE) " tokens in one pass."
158 void lexer_pushtok
159 ( int tok,
160 YYSTYPE lval
161 )
162 { if (TK_LEN() >= TK_STACKSIZE)
163 { fprintf(stderr, ERR_TK);
164 exit(EXIT_FAILURE);
165 }
166 TK_PUSH(tok, lval);
167 }
168
169 /* Lexical analysis of a file
170 Strips a filename to its base name, then sends it to lexer_lex
171 */
172 #define HIDDEN_WARNING "%s is hidden and will not be parsed!\n", filename
173 int lexer_lexfile
174 ( const uint8_t *filename
175 )
176 { static uint8_t fname[FNAME_MAX];
177 uint8_t *last_period = NULL, *iter;
178
179 if (*filename == '.')
180 { fprintf (stderr, HIDDEN_WARNING);
181 return 0;
182 }
183 /* Copy the filename and remove its suffix */
184 u8_strncpy(fname,filename,FNAME_MAX);
185 last_period = NULL;
186 for (iter = fname; *iter; iter++) //find the last '.' char
187 if (*iter == '.')
188 last_period = iter;
189 if (last_period) //if we found one,
190 *last_period = 0; //truncate the string there
191 /* Register the current_filename */
192 current_filename = filename;
193 printf("lexer_lexfilename(%s)\n",fname);
194 return lexer_lexfilename(fname);
195 }
196
197 uint8_t const* lexer_get_current_filepath
198 ()
199 { static uint8_t current_path[FPATH_MAX];
200 static uint8_t const* last_filename;
201 if ((!last_filename || last_filename != current_filename) &&
202 ((uint8_t*) realpath((char*)current_filename, (char*)current_path) != (uint8_t*) current_path))
203 { perror("realpath: ");
204 return NULL;
205 }
206 return (const uint8_t*)current_path;
207 }
208
209 /* Scan filename and push the its tokens
210 onto the stack */
211 int lexer_lexfilename
212 ( uint8_t* str
213 )
214 { int ntok, len;
215 uint8_t *filepath;
216
217
218 printf("|---- Begin lexerfilename on %s ----|\n", str);
219
220 if(*str == 0)
221 perror("Lexfilename:: str is NULL so fail\n");
222
223 /* Determine the filetype of str */
224 len = u8_strlen(str);
225
226 ntok = lexer_lexstring(str, len);
227
228 /* Pass back filepath as end of statment operator */
229 filepath = u8_strdup(lexer_get_current_filepath());
230 yylval.str = filepath;
231 lexer_pushtok(NAME, yylval);
232 printf("Pushing filepath %s\n", filepath);
233 ntok++;
234
235 printf("|---- Ending lexer_lexfilename on %s, %d tokens were lexed ----|\n", str, ntok);
236 return ntok;
237 }
238
239 /**************************/
240 /****Abandon All Hope******/
241 /**************************/
242 /*** ***/
243 /*** ***/
244 /*** ***/
245 /*** ***/
246
247 #if 0
248 int
249 lexer_lexelemap
250 ( uint8_t* str)
251 { int setname_len, elename_len, strlen;
252 uint8_t* setname_end, *elename_end, *newstrt;
253 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
254 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
255
256 newstrt = str;
257
258 SET_CURR_SETNAME(newstrt);
259 SET_CURR_ELENAME(newstrt);
260 if(PREV_MAPFILE())
261 { printf("Lexer_lexelemap:: previous file was mapfile*\n");
262 SET_MAPSTR(newstrt);
263 }
264 else
265 {
266 if(SETNAME_MATCHES())
267 { DEL_FTOK(newstrt);
268 if(REF(newstrt))
269 DEL_FTOK(newstrt);
270 printf("Lexer_lexelemap:: setname matches\n");
271 if(ELENAME_MATCHES())
272 DEL_FTOK(newstrt);
273 if(REF(newstrt))
274 DEL_FTOK(newstrt);
275 }
276 }
277
278 UPDATE_PREV_ELENAME(newstrt);
279 UPDATE_PREV_SETNAME(newstrt);
280
281 return newstrt - str;
282
283
284 }
285
286 int
287 lexer_lexelemodel
288 (uint8_t* str)
289 { int setname_len, elename_len;
290 uint8_t* setname_end, *elename_end, *newstrt;
291 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
292 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
293
294 printf("Lexer_lexelemodel:: Begin str is %s\n", str);
295
296 newstrt = str;
297
298 SET_CURR_SETNAME(newstrt);
299 SET_CURR_ELENAME(newstrt);
300 if(SETNAME_MATCHES())
301 { printf("Lexer_lexelemodel:: curr_setname(%s) matches prev_setname (%s)\n", curr_setname, prev_setname);
302 DEL_FTOK(newstrt);
303 printf("Lexer_lexelemodel:: Deleted setname, newstrt is now %s\n", newstrt);
304 if(REF(newstrt))
305 DEL_FTOK(newstrt);
306 if(ELENAME_MATCHES())
307 { printf("Lexer_lexelemodel:: elename matches\n");
308 DEL_FTOK(newstrt);
309 if(REF(newstrt))
310 DEL_FTOK(newstrt);
311 }
312 }
313 UPDATE_PREV_ELENAME(newstrt);
314 UPDATE_PREV_SETNAME(newstrt);
315
316 return newstrt - str;
317 }
318
319 int
320 lexer_lexsetmap
321 (uint8_t* str)
322 { int setname_len, elename_len;
323 uint8_t* setname_end, *elename_end, *newstrt;
324 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
325 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
326
327 newstrt = str;
328
329 SET_CURR_SETNAME(newstrt);
330 if(PREV_MAPFILE())
331 SET_MAPSTR(newstrt);
332 else
333 if( SETNAME_MATCHES())
334 DEL_FTOK(newstrt);
335 if(REF(newstrt))
336 DEL_FTOK(newstrt);
337
338 UPDATE_PREV_SETNAME(newstrt);
339
340 return newstrt - str;
341 }
342
343 int
344 lexer_lexsetmodel
345 (uint8_t* str)
346 { int setname_len, elename_len;
347 uint8_t* setname_end, *elename_end, *newstrt;
348 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
349 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
350
351 newstrt = str;
352
353 SET_CURR_SETNAME(newstrt);
354 if( SETNAME_MATCHES())
355 DEL_FTOK(newstrt);
356 if(REF(newstrt))
357 DEL_FTOK(newstrt);
358 UPDATE_PREV_SETNAME(newstrt);
359
360 return newstrt - str;
361
362 }
363
364 int
365 lexer_lexsetvlink
366 (uint8_t* str)
367 { int setname_len, elename_len;
368 uint8_t* setname_end, *elename_end, *newstrt;
369 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
370 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
371
372 newstrt = str;
373
374 SET_CURR_SETNAME(newstrt);
375 if( SETNAME_MATCHES())
376 DEL_FTOK(newstrt);
377 if(REF((NEXT_TOK(newstrt)))) //if NAME REF REF
378 DEL_FTOK(newstrt);
379 UPDATE_PREV_SETNAME(newstrt);
380
381 return newstrt - str;
382
383 }
384
385 int
386 lexer_lexelevlink
387 (uint8_t* str)
388 { int setname_len, elename_len;
389 uint8_t* setname_end, *elename_end, *newstrt;
390 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
391 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
392
393 newstrt = str;
394
395 SET_CURR_SETNAME(newstrt);
396 SET_CURR_ELENAME(newstrt);
397 if(SETNAME_MATCHES())
398 { DEL_FTOK(newstrt);
399 if(REF(NEXT_TOK(newstrt))) //NAME REF REF, where is set_label
400 DEL_FTOK(newstrt);
401 }
402
403 return newstrt - str;
404 }
405
406 int
407 lexer_lexsetolink
408 (uint8_t* str)
409 { int setname_len, elename_len;
410 uint8_t* setname_end, *elename_end;
411 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
412 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
413
414 return 0;
415
416 //do nothing
417 }
418
419 int
420 lexer_lexeleolink
421 (uint8_t* str)
422 { int setname_len, elename_len;
423 uint8_t* setname_end, *elename_end, *newstrt;
424 uint8_t curr_setname[MAX_SETNAME_LEN] = {0};
425 uint8_t curr_elename[MAX_ELENAME_LEN] = {0};
426
427 newstrt = str;
428
429 SET_CURR_SETNAME(newstrt);
430 printf("prev_setname %s, curr_setname %s\n", prev_setname, curr_setname);
431 if(SETNAME_MATCHES())
432 { DEL_FTOK(newstrt);
433 if(REF(newstrt))
434 DEL_FTOK(newstrt);
435 }
436
437 return newstrt - str;
438
439
440 }
441
442
443
444 #define REF(STR) (STR[0] <= 0x39 && STR[0] >= 0x30)
445 #define DEL_FTOK(STR) (STR = u8_strchr(STR, '_') + 1)
446 #define NEXT_TOK(STR) (u8_strchr(STR, '_') + 1)
447 #define SET_CURR_SETNAME(STR) \
448 do { \
449 printf("Lexer_lexX:: setting curr_setname of str(%s)\n", STR); \
450 setname_end = u8_chr(STR, FNAME_MAX, '_'); \
451 setname_len = setname_end - str; \
452 u8_move(curr_setname, STR, setname_len); \
453 printf("Lexer_lexX:: curr_setname is now %s\n",curr_setname); \
454 } while (0)
455 #define SET_CURR_ELENAME(STR) \
456 do { \
457 printf("Lexer_lexX:: setting curr_elename of str(%s)\n", STR); \
458 setname_end = u8_chr(STR, FNAME_MAX, '_') + 1; \
459 if(REF(setname_end)) \
460 setname_end = u8_chr(setname_end, FNAME_MAX, '_') + 1; \
461 elename_end = u8_chr(setname_end, FNAME_MAX, '_'); \
462 elename_len = elename_end - setname_end; \
463 u8_move(curr_elename, setname_end, elename_len); \
464 printf("Lexer_lexX:: curr_elename is now %s\n", curr_elename); \
465 } while (0)
466
467 #define SETNAME_MATCHES() (u8_strcmp(curr_setname, prev_setname) == 0)
468 #define ELENAME_MATCHES() (u8_strcmp(curr_elename, prev_elename) == 0)
469 #define UPDATE_PREV_SETNAME(STR) \
470 do { \
471 printf("Lexer_lexX:: updating prev_setname from (%s)", prev_setname); \
472 u8_set(prev_setname , (ucs4_t) 0, MAX_SETNAME_LEN ); \
473 u8_move(prev_setname, curr_setname, setname_len); \
474 printf(" to %s\n", prev_setname); \
475 } while (0)
476 #define UPDATE_PREV_ELENAME(STR) \
477 do { \
478 u8_set(prev_elename , (ucs4_t) 0, MAX_ELENAME_LEN ); \
479 u8_move(prev_elename, curr_elename, elename_len); \
480 } while (0)
481 #define PREV_MAPFILE() (TK_STACKX - 5)->tok_t == MOPEN || (TK_STACKX-3)->tok_t == MOPEN
482 #define SET_MAPSTR(STR) (STR = u8_strstr(STR, map_key))
483
484
485 #endif
486
487
488 /* int lexer_lexmapfile */
489 /* #define INC_X() */
490 /* (int height, int width) */
491 /* { */
492 /* int x, y; */
493
494 /* /\* Give scanner_scanpixels a buffer and a len. Iterate through */
495 /* buf with buf[n]. If n == 0, do nothing. if n has a value, push x, */
496 /* push y, push (z = n << 24), push (ref_id = n >> 8) *\/ */
497 /* //scanner_scanpixels() */
498
499 /* for(i = 0; i < len; i++) */
500 /* if(buf[i] == 0) */
501 /* if(x == width) */
502 /* x = 0; */
503 /* else */
504
505
506
507
508 /* } */
509 /* fname_bytes = (uint8_t*)(DE_POP()->d_name); */
510 /* printf("d_name is %s\n", fname_bytes); */
511 /* for (fnp = filename, i = 0; i < FNAME_MAX; i += unit_size, fnp++) */
512 /* { unit_size = u8_mblen(fname_bytes + i, min(4, FNAME_MAX - i)); */
513 /* if (u8_mbtouc(fnp, fname_bytes + i, unit_size) == -1) //add ucs4 char to the filename */
514 /* FAIL("Lexer failed to convert ^%s to unicode\n", (fname_bytes + i)); */
515 /* if (*fnp == 0) //added a terminating char */
516 /* break; */
517 /* } */
518 /* if(u8_mbtouc(filename, DE_POP()->d_name, FNAME_MAXy) == -1) */
519 /* FAIL("Lexer failed to convert d_name into uint8_t\n"); */
520 /* ulc_fprintf(stdout, "filename is %11U\n c", filename); */