2 \brief lexical analyzer implementation for APC
3 \details The lexer manages two FIFO stacks. One for maintaining tokens, the
4 other for maintaining a list of files to be scanned. During
5 execution, the lexer will return a token from its token queue if any
6 are present. If not, the lexer will will pop an element from its
7 file queue to 'scanner' to be tokenized. If the file queue is empty,
8 the lexer will instead call 'parsedir' to traverse the directory tree
9 and tokenize the results. If 'parsedir' does not generate any new
11 \author Jordan Lavatai
13 ----------------------------------------------------------------------------*/
27 #include <limits.h> //realpath, NAME_MAX, FPATH_MAX
31 #include "parser.tab.h"
33 #define DE_STACKSIZE 1024
36 #define TK_STACKSIZE 1024
43 int lexer_lexfile(const uint8_t*);
44 void lexer_pushtok(int, YYSTYPE
);
45 uint8_t const* lexer_get_current_filepath(void);
46 int lexer_lexfilename(uint8_t*);
47 struct dirent
* lexer_direntpa
[DE_STACKSIZE
],** lexer_direntpp
,** lexer_direntpb
;
50 int lexer_lexstring(uint8_t*, int);
52 int lexer_setstr(uint8_t*, int);
54 int scanner_init(void);
60 uint8_t const* current_filename
;
64 { YYSTYPE lval
; //token val
65 int tok_t
; //token type
66 } token_stack
[TK_STACKSIZE
], *tsp
, *tsx
;
68 /* Directory Entity Array/Stack
69 Simple array for keeping track of dirents yet to be processed by the scanner.
70 If this list is empty and there are no tokens, the lexer is done.
71 This array is populated by the scanner as an array, and popped locally by the
72 lexer as a stack, and is popped as a FIFO stack.
74 #define DE_STACK (lexer_direntpa)
75 #define DE_STACKP (lexer_direntpp)
76 #define DE_STACKB (lexer_direntpb)
77 #define DE_LEN() (DE_STACKP - DE_STACKB)
78 #define DE_INIT() (DE_STACKP = DE_STACKB = DE_STACK)
79 #define DE_POP() (*DE_STACKB++)
82 This is a FIFO stack whose pointers are a union of either a pointer to an
83 integer, or a pointer to two integers (a struct tok). This way, integers may
84 be added or removed from the stack either singularly (IPUSH/IPOP), or as a
85 full token of two integers (PUSH/POP).
86 An alignment error will occur if IPOP or IPUSH are used a non-even number of
89 #define TK_STACK (token_stack)
90 #define TK_STACKP (tsp)
91 #define TK_STACKX (tsx)
92 #define TK_LEN() (TK_STACKX - TK_STACKP)
93 #define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK)
94 #define TK_POP() (*TK_STACKP++)
95 #define TK_PUSH(T,L) (*TK_STACKX++ = (struct tok){L,T})
98 The initializer returns boolean true if an error occurs, which may be handled
105 return scanner_init();
109 If the token buffer is empty, 'lexer' will initialize the token buffer and
110 call 'lexer_scandir'. If SCAN_ERROR is returned, an error is printed
111 before sending a null return to bison. If 0 tokens are generated, the error
112 printing is skipped. In all other cases, 'yylval' is set, and the token's
113 integer representation is returned.
117 #define SCAN_ERROR -1
118 #define TK_EMPTY (TK_STACKP == TK_STACKX)
121 fprintf(stderr,__VA_ARGS__); \
127 while (DE_LEN() > 0)//lex any directory entries in our stack
129 if (lexer_lexfile(DE_POP()->d_name
) == 0)
130 FAIL("Lexer failed to tokenize [%s]\n",(*DE_STACKB
)->d_name
);
132 if (TK_EMPTY
) //if there are no tokens,
133 { TK_INIT(); //initialize the token stack back to 0
135 { case SCAN_ERROR
: //if an error occurred,
136 FAIL("Scanner error\n");
137 case 0: //if the the scanner finds no dirents,
138 goto done
; //then we are done
139 default: //if we found some elements to scan,
140 goto start
; //start over and lex them
153 This receiver takes a struct tok and pushes it to the FIFO stack.
156 #define $($)#$ //stringifier
157 #define ERR_TK "Fatal: Generated over " $(TK_STACKSIZE) " tokens in one pass."
158 ( int tok
, YYSTYPE lval
)
159 { if (TK_LEN() >= TK_STACKSIZE
)
160 { fprintf(stderr
, ERR_TK
);
166 /* Lexical analysis of a file
167 Strips a filename to its base name, then sends it to lexer_lex
170 #define HIDDEN_WARNING "%s is hidden and will not be parsed!\n", filename
171 ( const uint8_t *filename
173 { static uint8_t fname
[FNAME_MAX
];
174 uint8_t *last_period
= NULL
, *iter
;
176 if (*filename
== '.')
177 { fprintf (stderr
, HIDDEN_WARNING
);
180 /* Copy the filename and remove its suffix */
181 u8_strncpy(fname
,filename
,FNAME_MAX
);
183 for (iter
= fname
; *iter
; iter
++) //find the last '.' char
186 if (last_period
) //if we found one,
187 *last_period
= 0; //truncate the string there
188 /* Register the current_filename */
189 current_filename
= filename
;
190 printf("lexer_lexfilename(%s)\n",fname
);
191 return lexer_lexfilename(fname
);
194 uint8_t const* lexer_get_current_filepath
196 { static uint8_t current_path
[FPATH_MAX
];
197 static uint8_t const* last_filename
;
198 if ((!last_filename
|| last_filename
!= current_filename
) &&
199 ((uint8_t*) realpath(current_filename
, current_path
) != (uint8_t*) current_path
))
200 { perror("realpath: ");
203 return (const uint8_t*)current_path
;
206 /* Scan filename and push the its tokens
208 int lexer_lexfilename
214 printf("|---- Begin lexerfilename on %s ----|\n", str
);
217 perror("Lexfilename:: str is NULL so fail\n");
219 /* Determine the filetype of str */
220 len
= u8_strlen(str
);
222 ntok
= lexer_lexstring(str
, len
);
224 /* Pass back filepath as end of statment operator */
225 filepath
= u8_strdup(lexer_get_current_filepath());
226 yylval
.str
= filepath
;
227 lexer_pushtok(NAME
, yylval
);
228 printf("Pushing filepath %s\n", filepath
);
231 printf("|---- Ending lexer_lexfilename on %s, %d tokens were lexed ----|\n", str
, ntok
);
235 /**************************/
236 /****Abandon All Hope******/
237 /**************************/
247 { int setname_len
, elename_len
, strlen
;
248 uint8_t* setname_end
, *elename_end
, *newstrt
;
249 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
250 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
254 SET_CURR_SETNAME(newstrt
);
255 SET_CURR_ELENAME(newstrt
);
257 { printf("Lexer_lexelemap:: previous file was mapfile*\n");
262 if(SETNAME_MATCHES())
266 printf("Lexer_lexelemap:: setname matches\n");
267 if(ELENAME_MATCHES())
274 UPDATE_PREV_ELENAME(newstrt
);
275 UPDATE_PREV_SETNAME(newstrt
);
277 return newstrt
- str
;
285 { int setname_len
, elename_len
;
286 uint8_t* setname_end
, *elename_end
, *newstrt
;
287 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
288 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
290 printf("Lexer_lexelemodel:: Begin str is %s\n", str
);
294 SET_CURR_SETNAME(newstrt
);
295 SET_CURR_ELENAME(newstrt
);
296 if(SETNAME_MATCHES())
297 { printf("Lexer_lexelemodel:: curr_setname(%s) matches prev_setname (%s)\n", curr_setname
, prev_setname
);
299 printf("Lexer_lexelemodel:: Deleted setname, newstrt is now %s\n", newstrt
);
302 if(ELENAME_MATCHES())
303 { printf("Lexer_lexelemodel:: elename matches\n");
309 UPDATE_PREV_ELENAME(newstrt
);
310 UPDATE_PREV_SETNAME(newstrt
);
312 return newstrt
- str
;
318 { int setname_len
, elename_len
;
319 uint8_t* setname_end
, *elename_end
, *newstrt
;
320 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
321 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
325 SET_CURR_SETNAME(newstrt
);
329 if( SETNAME_MATCHES())
334 UPDATE_PREV_SETNAME(newstrt
);
336 return newstrt
- str
;
342 { int setname_len
, elename_len
;
343 uint8_t* setname_end
, *elename_end
, *newstrt
;
344 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
345 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
349 SET_CURR_SETNAME(newstrt
);
350 if( SETNAME_MATCHES())
354 UPDATE_PREV_SETNAME(newstrt
);
356 return newstrt
- str
;
363 { int setname_len
, elename_len
;
364 uint8_t* setname_end
, *elename_end
, *newstrt
;
365 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
366 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
370 SET_CURR_SETNAME(newstrt
);
371 if( SETNAME_MATCHES())
373 if(REF((NEXT_TOK(newstrt
)))) //if NAME REF REF
375 UPDATE_PREV_SETNAME(newstrt
);
377 return newstrt
- str
;
384 { int setname_len
, elename_len
;
385 uint8_t* setname_end
, *elename_end
, *newstrt
;
386 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
387 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
391 SET_CURR_SETNAME(newstrt
);
392 SET_CURR_ELENAME(newstrt
);
393 if(SETNAME_MATCHES())
395 if(REF(NEXT_TOK(newstrt
))) //NAME REF REF, where is set_label
399 return newstrt
- str
;
405 { int setname_len
, elename_len
;
406 uint8_t* setname_end
, *elename_end
;
407 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
408 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
418 { int setname_len
, elename_len
;
419 uint8_t* setname_end
, *elename_end
, *newstrt
;
420 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
421 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
425 SET_CURR_SETNAME(newstrt
);
426 printf("prev_setname %s, curr_setname %s\n", prev_setname
, curr_setname
);
427 if(SETNAME_MATCHES())
433 return newstrt
- str
;
440 #define REF(STR) (STR[0] <= 0x39 && STR[0] >= 0x30)
441 #define DEL_FTOK(STR) (STR = u8_strchr(STR, '_') + 1)
442 #define NEXT_TOK(STR) (u8_strchr(STR, '_') + 1)
443 #define SET_CURR_SETNAME(STR) \
445 printf("Lexer_lexX:: setting curr_setname of str(%s)\n", STR); \
446 setname_end = u8_chr(STR, FNAME_MAX, '_'); \
447 setname_len = setname_end - str; \
448 u8_move(curr_setname, STR, setname_len); \
449 printf("Lexer_lexX:: curr_setname is now %s\n",curr_setname); \
451 #define SET_CURR_ELENAME(STR) \
453 printf("Lexer_lexX:: setting curr_elename of str(%s)\n", STR); \
454 setname_end = u8_chr(STR, FNAME_MAX, '_') + 1; \
455 if(REF(setname_end)) \
456 setname_end = u8_chr(setname_end, FNAME_MAX, '_') + 1; \
457 elename_end = u8_chr(setname_end, FNAME_MAX, '_'); \
458 elename_len = elename_end - setname_end; \
459 u8_move(curr_elename, setname_end, elename_len); \
460 printf("Lexer_lexX:: curr_elename is now %s\n", curr_elename); \
463 #define SETNAME_MATCHES() (u8_strcmp(curr_setname, prev_setname) == 0)
464 #define ELENAME_MATCHES() (u8_strcmp(curr_elename, prev_elename) == 0)
465 #define UPDATE_PREV_SETNAME(STR) \
467 printf("Lexer_lexX:: updating prev_setname from (%s)", prev_setname); \
468 u8_set(prev_setname , (ucs4_t) 0, MAX_SETNAME_LEN ); \
469 u8_move(prev_setname, curr_setname, setname_len); \
470 printf(" to %s\n", prev_setname); \
472 #define UPDATE_PREV_ELENAME(STR) \
474 u8_set(prev_elename , (ucs4_t) 0, MAX_ELENAME_LEN ); \
475 u8_move(prev_elename, curr_elename, elename_len); \
477 #define PREV_MAPFILE() (TK_STACKX - 5)->tok_t == MOPEN || (TK_STACKX-3)->tok_t == MOPEN
478 #define SET_MAPSTR(STR) (STR = u8_strstr(STR, map_key))
484 /* int lexer_lexmapfile */
485 /* #define INC_X() */
486 /* (int height, int width) */
490 /* /\* Give scanner_scanpixels a buffer and a len. Iterate through */
491 /* buf with buf[n]. If n == 0, do nothing. if n has a value, push x, */
492 /* push y, push (z = n << 24), push (ref_id = n >> 8) *\/ */
493 /* //scanner_scanpixels() */
495 /* for(i = 0; i < len; i++) */
496 /* if(buf[i] == 0) */
505 /* fname_bytes = (uint8_t*)(DE_POP()->d_name); */
506 /* printf("d_name is %s\n", fname_bytes); */
507 /* for (fnp = filename, i = 0; i < FNAME_MAX; i += unit_size, fnp++) */
508 /* { unit_size = u8_mblen(fname_bytes + i, min(4, FNAME_MAX - i)); */
509 /* if (u8_mbtouc(fnp, fname_bytes + i, unit_size) == -1) //add ucs4 char to the filename */
510 /* FAIL("Lexer failed to convert ^%s to unicode\n", (fname_bytes + i)); */
511 /* if (*fnp == 0) //added a terminating char */
514 /* if(u8_mbtouc(filename, DE_POP()->d_name, FNAME_MAXy) == -1) */
515 /* FAIL("Lexer failed to convert d_name into uint8_t\n"); */
516 /* ulc_fprintf(stdout, "filename is %11U\n c", filename); */