2 \brief lexical analyzer implementation for APC
3 \details The lexer manages two FIFO stacks. One for maintaining tokens, the
4 other for maintaining a list of files to be scanned. During
5 execution, the lexer will return a token from its token queue if any
6 are present. If not, the lexer will will pop an element from its
7 file queue to 'scanner' to be tokenized. If the file queue is empty,
8 the lexer will instead call 'parsedir' to traverse the directory tree
9 and tokenize the results. If 'parsedir' does not generate any new
11 \author Jordan Lavatai
13 ----------------------------------------------------------------------------*/
27 #include <limits.h> //realpath, NAME_MAX, FPATH_MAX
32 #include "parser.tab.h"
34 #define DE_STACKSIZE 1024
37 #define TK_STACKSIZE 1024
44 int lexer_lexfile(const uint8_t*);
45 void lexer_pushtok(int, YYSTYPE
);
46 uint8_t const* lexer_get_current_filepath(void);
47 int lexer_lexfilename(uint8_t*);
48 struct dirent
* lexer_direntpa
[DE_STACKSIZE
],** lexer_direntpp
,** lexer_direntpb
;
51 int lexer_lexstring(uint8_t*, int);
53 int lexer_setstr(uint8_t*, int);
55 int scanner_init(void);
61 uint8_t const* current_filename
;
65 { YYSTYPE lval
; //token val
66 int tok_t
; //token type
67 } token_stack
[TK_STACKSIZE
], *tsp
, *tsx
;
69 /* Directory Entity Array/Stack
70 Simple array for keeping track of dirents yet to be processed by the scanner.
71 If this list is empty and there are no tokens, the lexer is done.
72 This array is populated by the scanner as an array, and popped locally by the
73 lexer as a stack, and is popped as a FIFO stack.
75 #define DE_STACK (lexer_direntpa)
76 #define DE_STACKP (lexer_direntpp)
77 #define DE_STACKB (lexer_direntpb)
78 #define DE_LEN() (DE_STACKP - DE_STACKB)
79 #define DE_INIT() (DE_STACKP = DE_STACKB = DE_STACK)
80 #define DE_POP() (*DE_STACKB++)
83 This is a FIFO stack whose pointers are a union of either a pointer to an
84 integer, or a pointer to two integers (a struct tok). This way, integers may
85 be added or removed from the stack either singularly (IPUSH/IPOP), or as a
86 full token of two integers (PUSH/POP).
87 An alignment error will occur if IPOP or IPUSH are used a non-even number of
90 #define TK_STACK (token_stack)
91 #define TK_STACKP (tsp)
92 #define TK_STACKX (tsx)
93 #define TK_LEN() (TK_STACKX - TK_STACKP)
94 #define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK)
95 #define TK_POP() (*TK_STACKP++)
96 #define TK_PUSH(T,L) (*TK_STACKX++ = (struct tok){L,T})
99 The initializer returns boolean true if an error occurs, which may be handled
106 return scanner_init();
110 If the token buffer is empty, 'lexer' will initialize the token buffer and
111 call 'lexer_scandir'. If SCAN_ERROR is returned, an error is printed
112 before sending a null return to bison. If 0 tokens are generated, the error
113 printing is skipped. In all other cases, 'yylval' is set, and the token's
114 integer representation is returned.
118 #define SCAN_ERROR -1
119 #define TK_EMPTY (TK_STACKP == TK_STACKX)
122 fprintf(stderr,__VA_ARGS__); \
128 while (DE_LEN() > 0)//lex any directory entries in our stack
130 if (lexer_lexfile((uint8_t*)DE_POP()->d_name
) == 0)
131 FAIL("Lexer failed to tokenize [%s]\n",(*DE_STACKB
)->d_name
);
133 if (TK_EMPTY
) //if there are no tokens,
134 { TK_INIT(); //initialize the token stack back to 0
136 { case SCAN_ERROR
: //if an error occurred,
137 FAIL("Scanner error\n");
138 case 0: //if the the scanner finds no dirents,
139 goto done
; //then we are done
140 default: //if we found some elements to scan,
141 goto start
; //start over and lex them
154 This receiver takes a struct tok and pushes it to the FIFO stack.
157 #define $($)#$ //stringifier
158 #define ERR_TK "Fatal: Generated over " $(TK_STACKSIZE) " tokens in one pass."
159 ( int tok
, YYSTYPE lval
)
160 { if (TK_LEN() >= TK_STACKSIZE
)
161 { fprintf(stderr
, ERR_TK
);
167 /* Lexical analysis of a file
168 Strips a filename to its base name, then sends it to lexer_lex
171 #define HIDDEN_WARNING "%s is hidden and will not be parsed!\n", filename
172 ( const uint8_t *filename
174 { static uint8_t fname
[FNAME_MAX
];
175 uint8_t *last_period
= NULL
, *iter
;
177 if (*filename
== '.')
178 { fprintf (stderr
, HIDDEN_WARNING
);
181 /* Copy the filename and remove its suffix */
182 u8_strncpy(fname
,filename
,FNAME_MAX
);
184 for (iter
= fname
; *iter
; iter
++) //find the last '.' char
187 if (last_period
) //if we found one,
188 *last_period
= 0; //truncate the string there
189 /* Register the current_filename */
190 current_filename
= filename
;
191 printf("lexer_lexfilename(%s)\n",fname
);
192 return lexer_lexfilename(fname
);
195 uint8_t const* lexer_get_current_filepath
197 { static uint8_t current_path
[FPATH_MAX
];
198 static uint8_t const* last_filename
;
199 if ((!last_filename
|| last_filename
!= current_filename
) &&
200 ((uint8_t*) realpath((char*)current_filename
, (char*)current_path
) != (uint8_t*) current_path
))
201 { perror("realpath: ");
204 return (const uint8_t*)current_path
;
207 /* Scan filename and push the its tokens
209 int lexer_lexfilename
215 printf("|---- Begin lexerfilename on %s ----|\n", str
);
218 perror("Lexfilename:: str is NULL so fail\n");
220 /* Determine the filetype of str */
221 len
= u8_strlen(str
);
223 ntok
= lexer_lexstring(str
, len
);
225 /* Pass back filepath as end of statment operator */
226 filepath
= u8_strdup(lexer_get_current_filepath());
227 yylval
.str
= filepath
;
228 lexer_pushtok(NAME
, yylval
);
229 printf("Pushing filepath %s\n", filepath
);
232 printf("|---- Ending lexer_lexfilename on %s, %d tokens were lexed ----|\n", str
, ntok
);
236 /**************************/
237 /****Abandon All Hope******/
238 /**************************/
248 { int setname_len
, elename_len
, strlen
;
249 uint8_t* setname_end
, *elename_end
, *newstrt
;
250 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
251 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
255 SET_CURR_SETNAME(newstrt
);
256 SET_CURR_ELENAME(newstrt
);
258 { printf("Lexer_lexelemap:: previous file was mapfile*\n");
263 if(SETNAME_MATCHES())
267 printf("Lexer_lexelemap:: setname matches\n");
268 if(ELENAME_MATCHES())
275 UPDATE_PREV_ELENAME(newstrt
);
276 UPDATE_PREV_SETNAME(newstrt
);
278 return newstrt
- str
;
286 { int setname_len
, elename_len
;
287 uint8_t* setname_end
, *elename_end
, *newstrt
;
288 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
289 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
291 printf("Lexer_lexelemodel:: Begin str is %s\n", str
);
295 SET_CURR_SETNAME(newstrt
);
296 SET_CURR_ELENAME(newstrt
);
297 if(SETNAME_MATCHES())
298 { printf("Lexer_lexelemodel:: curr_setname(%s) matches prev_setname (%s)\n", curr_setname
, prev_setname
);
300 printf("Lexer_lexelemodel:: Deleted setname, newstrt is now %s\n", newstrt
);
303 if(ELENAME_MATCHES())
304 { printf("Lexer_lexelemodel:: elename matches\n");
310 UPDATE_PREV_ELENAME(newstrt
);
311 UPDATE_PREV_SETNAME(newstrt
);
313 return newstrt
- str
;
319 { int setname_len
, elename_len
;
320 uint8_t* setname_end
, *elename_end
, *newstrt
;
321 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
322 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
326 SET_CURR_SETNAME(newstrt
);
330 if( SETNAME_MATCHES())
335 UPDATE_PREV_SETNAME(newstrt
);
337 return newstrt
- str
;
343 { int setname_len
, elename_len
;
344 uint8_t* setname_end
, *elename_end
, *newstrt
;
345 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
346 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
350 SET_CURR_SETNAME(newstrt
);
351 if( SETNAME_MATCHES())
355 UPDATE_PREV_SETNAME(newstrt
);
357 return newstrt
- str
;
364 { int setname_len
, elename_len
;
365 uint8_t* setname_end
, *elename_end
, *newstrt
;
366 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
367 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
371 SET_CURR_SETNAME(newstrt
);
372 if( SETNAME_MATCHES())
374 if(REF((NEXT_TOK(newstrt
)))) //if NAME REF REF
376 UPDATE_PREV_SETNAME(newstrt
);
378 return newstrt
- str
;
385 { int setname_len
, elename_len
;
386 uint8_t* setname_end
, *elename_end
, *newstrt
;
387 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
388 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
392 SET_CURR_SETNAME(newstrt
);
393 SET_CURR_ELENAME(newstrt
);
394 if(SETNAME_MATCHES())
396 if(REF(NEXT_TOK(newstrt
))) //NAME REF REF, where is set_label
400 return newstrt
- str
;
406 { int setname_len
, elename_len
;
407 uint8_t* setname_end
, *elename_end
;
408 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
409 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
419 { int setname_len
, elename_len
;
420 uint8_t* setname_end
, *elename_end
, *newstrt
;
421 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
422 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
426 SET_CURR_SETNAME(newstrt
);
427 printf("prev_setname %s, curr_setname %s\n", prev_setname
, curr_setname
);
428 if(SETNAME_MATCHES())
434 return newstrt
- str
;
441 #define REF(STR) (STR[0] <= 0x39 && STR[0] >= 0x30)
442 #define DEL_FTOK(STR) (STR = u8_strchr(STR, '_') + 1)
443 #define NEXT_TOK(STR) (u8_strchr(STR, '_') + 1)
444 #define SET_CURR_SETNAME(STR) \
446 printf("Lexer_lexX:: setting curr_setname of str(%s)\n", STR); \
447 setname_end = u8_chr(STR, FNAME_MAX, '_'); \
448 setname_len = setname_end - str; \
449 u8_move(curr_setname, STR, setname_len); \
450 printf("Lexer_lexX:: curr_setname is now %s\n",curr_setname); \
452 #define SET_CURR_ELENAME(STR) \
454 printf("Lexer_lexX:: setting curr_elename of str(%s)\n", STR); \
455 setname_end = u8_chr(STR, FNAME_MAX, '_') + 1; \
456 if(REF(setname_end)) \
457 setname_end = u8_chr(setname_end, FNAME_MAX, '_') + 1; \
458 elename_end = u8_chr(setname_end, FNAME_MAX, '_'); \
459 elename_len = elename_end - setname_end; \
460 u8_move(curr_elename, setname_end, elename_len); \
461 printf("Lexer_lexX:: curr_elename is now %s\n", curr_elename); \
464 #define SETNAME_MATCHES() (u8_strcmp(curr_setname, prev_setname) == 0)
465 #define ELENAME_MATCHES() (u8_strcmp(curr_elename, prev_elename) == 0)
466 #define UPDATE_PREV_SETNAME(STR) \
468 printf("Lexer_lexX:: updating prev_setname from (%s)", prev_setname); \
469 u8_set(prev_setname , (ucs4_t) 0, MAX_SETNAME_LEN ); \
470 u8_move(prev_setname, curr_setname, setname_len); \
471 printf(" to %s\n", prev_setname); \
473 #define UPDATE_PREV_ELENAME(STR) \
475 u8_set(prev_elename , (ucs4_t) 0, MAX_ELENAME_LEN ); \
476 u8_move(prev_elename, curr_elename, elename_len); \
478 #define PREV_MAPFILE() (TK_STACKX - 5)->tok_t == MOPEN || (TK_STACKX-3)->tok_t == MOPEN
479 #define SET_MAPSTR(STR) (STR = u8_strstr(STR, map_key))
485 /* int lexer_lexmapfile */
486 /* #define INC_X() */
487 /* (int height, int width) */
491 /* /\* Give scanner_scanpixels a buffer and a len. Iterate through */
492 /* buf with buf[n]. If n == 0, do nothing. if n has a value, push x, */
493 /* push y, push (z = n << 24), push (ref_id = n >> 8) *\/ */
494 /* //scanner_scanpixels() */
496 /* for(i = 0; i < len; i++) */
497 /* if(buf[i] == 0) */
506 /* fname_bytes = (uint8_t*)(DE_POP()->d_name); */
507 /* printf("d_name is %s\n", fname_bytes); */
508 /* for (fnp = filename, i = 0; i < FNAME_MAX; i += unit_size, fnp++) */
509 /* { unit_size = u8_mblen(fname_bytes + i, min(4, FNAME_MAX - i)); */
510 /* if (u8_mbtouc(fnp, fname_bytes + i, unit_size) == -1) //add ucs4 char to the filename */
511 /* FAIL("Lexer failed to convert ^%s to unicode\n", (fname_bytes + i)); */
512 /* if (*fnp == 0) //added a terminating char */
515 /* if(u8_mbtouc(filename, DE_POP()->d_name, FNAME_MAXy) == -1) */
516 /* FAIL("Lexer failed to convert d_name into uint8_t\n"); */
517 /* ulc_fprintf(stdout, "filename is %11U\n c", filename); */