2 \brief lexical analyzer implementation for APC
3 \details The lexer manages two FIFO stacks. One for maintaining tokens, the
4 other for maintaining a list of files to be scanned. During
5 execution, the lexer will return a token from its token queue if any
6 are present. If not, the lexer will will pop an element from its
7 file queue to 'scanner' to be tokenized. If the file queue is empty,
8 the lexer will instead call 'parsedir' to traverse the directory tree
9 and tokenize the results. If 'parsedir' does not generate any new
11 \author Jordan Lavatai
13 ----------------------------------------------------------------------------*/
27 #include <limits.h> //realpath, NAME_MAX, FPATH_MAX
31 #include "parser.tab.h"
33 #define DE_STACKSIZE 1024
36 #define TK_STACKSIZE 1024
43 int lexer_lexfile(const uint8_t*);
44 void lexer_pushtok(int, YYSTYPE
);
45 uint8_t const* lexer_get_current_filepath(void);
46 int lexer_lexfilename(uint8_t*);
47 struct dirent
* lexer_direntpa
[DE_STACKSIZE
],** lexer_direntpp
,** lexer_direntpb
;
50 int lexer_lexstring(uint8_t*, int);
52 int lexer_setstr(uint8_t*, int);
54 int scanner_init(void);
58 int dredge_current_depth(void);
62 uint8_t const* current_filename
;
66 { YYSTYPE lval
; //token val
67 int tok_t
; //token type
68 } token_stack
[TK_STACKSIZE
], *tsp
, *tsx
;
70 /* Directory Entity Array/Stack
71 Simple array for keeping track of dirents yet to be processed by the scanner.
72 If this list is empty and there are no tokens, the lexer is done.
73 This array is populated by the scanner as an array, and popped locally by the
74 lexer as a stack, and is popped as a FIFO stack.
76 #define DE_STACK (lexer_direntpa)
77 #define DE_STACKP (lexer_direntpp)
78 #define DE_STACKB (lexer_direntpb)
79 #define DE_LEN() (DE_STACKP - DE_STACKB)
80 #define DE_INIT() (DE_STACKP = DE_STACKB = DE_STACK)
81 #define DE_POP() (*DE_STACKB++)
84 This is a FIFO stack whose pointers are a union of either a pointer to an
85 integer, or a pointer to two integers (a struct tok). This way, integers may
86 be added or removed from the stack either singularly (IPUSH/IPOP), or as a
87 full token of two integers (PUSH/POP).
88 An alignment error will occur if IPOP or IPUSH are used a non-even number of
91 #define TK_STACK (token_stack)
92 #define TK_STACKP (tsp)
93 #define TK_STACKX (tsx)
94 #define TK_LEN() (TK_STACKX - TK_STACKP)
95 #define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK)
96 #define TK_POP() (*TK_STACKP++)
97 #define TK_PUSH(T,L) (*TK_STACKX++ = (struct tok){L,T})
100 The initializer returns boolean true if an error occurs, which may be handled
107 return scanner_init();
111 If the token buffer is empty, 'lexer' will initialize the token buffer and
112 call 'lexer_scandir'. If SCAN_ERROR is returned, an error is printed
113 before sending a null return to bison. If 0 tokens are generated, the error
114 printing is skipped. In all other cases, 'yylval' is set, and the token's
115 integer representation is returned.
119 #define SCAN_ERROR -1
120 #define TK_EMPTY (TK_STACKP == TK_STACKX)
123 fprintf(stderr,__VA_ARGS__); \
129 while (DE_LEN() > 0)//lex any directory entries in our stack
131 if (lexer_lexfile(DE_POP()->d_name
) == 0)
132 FAIL("Lexer failed to tokenize [%s]\n",(*DE_STACKB
)->d_name
);
134 if (TK_EMPTY
) //if there are no tokens,
135 { TK_INIT(); //initialize the token stack back to 0
137 { case SCAN_ERROR
: //if an error occurred,
138 FAIL("Scanner error\n");
139 case 0: //if the the scanner finds no dirents,
140 goto done
; //then we are done
141 default: //if we found some elements to scan,
142 goto start
; //start over and lex them
155 This receiver takes a struct tok and pushes it to the FIFO stack.
158 #define $($)#$ //stringifier
159 #define ERR_TK "Fatal: Generated over " $(TK_STACKSIZE) " tokens in one pass."
160 ( int tok
, YYSTYPE lval
)
161 { if (TK_LEN() >= TK_STACKSIZE
)
162 { fprintf(stderr
, ERR_TK
);
168 /* Lexical analysis of a file
169 Strips a filename to its base name, then sends it to lexer_lex
172 #define HIDDEN_WARNING "%s is hidden and will not be parsed!\n", filename
173 ( const uint8_t *filename
175 { static uint8_t fname
[FNAME_MAX
];
176 uint8_t *last_period
= NULL
, *iter
;
178 if (*filename
== '.')
179 { fprintf (stderr
, HIDDEN_WARNING
);
182 /* Copy the filename and remove its suffix */
183 u8_strncpy(fname
,filename
,FNAME_MAX
);
185 for (iter
= fname
; *iter
; iter
++) //find the last '.' char
188 if (last_period
) //if we found one,
189 *last_period
= 0; //truncate the string there
190 /* Register the current_filename */
191 current_filename
= filename
;
192 printf("lexer_lexfilename(%s)\n",fname
);
193 return lexer_lexfilename(fname
);
196 uint8_t const* lexer_get_current_filepath
198 { static uint8_t current_path
[FPATH_MAX
];
199 static uint8_t const* last_filename
;
200 if ((!last_filename
|| last_filename
!= current_filename
) &&
201 ((uint8_t*) realpath(current_filename
, current_path
) != (uint8_t*) current_path
))
202 { perror("realpath: ");
205 return (const uint8_t*)current_path
;
208 /* Scan filename and push the its tokens
210 int lexer_lexfilename
216 printf("|---- Begin lexerfilename on %s ----|\n", str
);
219 perror("Lexfilename:: str is NULL so fail\n");
221 /* Determine the filetype of str */
222 len
= u8_strlen(str
);
224 ntok
+= lexer_lexstring(str
, len
);
226 /* Pass back filepath as end of statment operator */
227 filepath
= u8_strdup(lexer_get_current_filepath());
228 yylval
.str
= filepath
;
229 lexer_pushtok(NAME
, yylval
);
230 printf("Pushing filepath %s\n", filepath
);
233 printf("|---- Ending lexer_lexfilename on %s, %d tokens were lexed ----|\n", str
, ntok
);
237 /**************************/
238 /****Abandon All Hope******/
239 /**************************/
249 { int setname_len
, elename_len
, strlen
;
250 uint8_t* setname_end
, *elename_end
, *newstrt
;
251 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
252 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
256 SET_CURR_SETNAME(newstrt
);
257 SET_CURR_ELENAME(newstrt
);
259 { printf("Lexer_lexelemap:: previous file was mapfile*\n");
264 if(SETNAME_MATCHES())
268 printf("Lexer_lexelemap:: setname matches\n");
269 if(ELENAME_MATCHES())
276 UPDATE_PREV_ELENAME(newstrt
);
277 UPDATE_PREV_SETNAME(newstrt
);
279 return newstrt
- str
;
287 { int setname_len
, elename_len
;
288 uint8_t* setname_end
, *elename_end
, *newstrt
;
289 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
290 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
292 printf("Lexer_lexelemodel:: Begin str is %s\n", str
);
296 SET_CURR_SETNAME(newstrt
);
297 SET_CURR_ELENAME(newstrt
);
298 if(SETNAME_MATCHES())
299 { printf("Lexer_lexelemodel:: curr_setname(%s) matches prev_setname (%s)\n", curr_setname
, prev_setname
);
301 printf("Lexer_lexelemodel:: Deleted setname, newstrt is now %s\n", newstrt
);
304 if(ELENAME_MATCHES())
305 { printf("Lexer_lexelemodel:: elename matches\n");
311 UPDATE_PREV_ELENAME(newstrt
);
312 UPDATE_PREV_SETNAME(newstrt
);
314 return newstrt
- str
;
320 { int setname_len
, elename_len
;
321 uint8_t* setname_end
, *elename_end
, *newstrt
;
322 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
323 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
327 SET_CURR_SETNAME(newstrt
);
331 if( SETNAME_MATCHES())
336 UPDATE_PREV_SETNAME(newstrt
);
338 return newstrt
- str
;
344 { int setname_len
, elename_len
;
345 uint8_t* setname_end
, *elename_end
, *newstrt
;
346 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
347 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
351 SET_CURR_SETNAME(newstrt
);
352 if( SETNAME_MATCHES())
356 UPDATE_PREV_SETNAME(newstrt
);
358 return newstrt
- str
;
365 { int setname_len
, elename_len
;
366 uint8_t* setname_end
, *elename_end
, *newstrt
;
367 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
368 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
372 SET_CURR_SETNAME(newstrt
);
373 if( SETNAME_MATCHES())
375 if(REF((NEXT_TOK(newstrt
)))) //if NAME REF REF
377 UPDATE_PREV_SETNAME(newstrt
);
379 return newstrt
- str
;
386 { int setname_len
, elename_len
;
387 uint8_t* setname_end
, *elename_end
, *newstrt
;
388 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
389 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
393 SET_CURR_SETNAME(newstrt
);
394 SET_CURR_ELENAME(newstrt
);
395 if(SETNAME_MATCHES())
397 if(REF(NEXT_TOK(newstrt
))) //NAME REF REF, where is set_label
401 return newstrt
- str
;
407 { int setname_len
, elename_len
;
408 uint8_t* setname_end
, *elename_end
;
409 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
410 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
420 { int setname_len
, elename_len
;
421 uint8_t* setname_end
, *elename_end
, *newstrt
;
422 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
423 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
427 SET_CURR_SETNAME(newstrt
);
428 printf("prev_setname %s, curr_setname %s\n", prev_setname
, curr_setname
);
429 if(SETNAME_MATCHES())
435 return newstrt
- str
;
442 #define REF(STR) (STR[0] <= 0x39 && STR[0] >= 0x30)
443 #define DEL_FTOK(STR) (STR = u8_strchr(STR, '_') + 1)
444 #define NEXT_TOK(STR) (u8_strchr(STR, '_') + 1)
445 #define SET_CURR_SETNAME(STR) \
447 printf("Lexer_lexX:: setting curr_setname of str(%s)\n", STR); \
448 setname_end = u8_chr(STR, FNAME_MAX, '_'); \
449 setname_len = setname_end - str; \
450 u8_move(curr_setname, STR, setname_len); \
451 printf("Lexer_lexX:: curr_setname is now %s\n",curr_setname); \
453 #define SET_CURR_ELENAME(STR) \
455 printf("Lexer_lexX:: setting curr_elename of str(%s)\n", STR); \
456 setname_end = u8_chr(STR, FNAME_MAX, '_') + 1; \
457 if(REF(setname_end)) \
458 setname_end = u8_chr(setname_end, FNAME_MAX, '_') + 1; \
459 elename_end = u8_chr(setname_end, FNAME_MAX, '_'); \
460 elename_len = elename_end - setname_end; \
461 u8_move(curr_elename, setname_end, elename_len); \
462 printf("Lexer_lexX:: curr_elename is now %s\n", curr_elename); \
465 #define SETNAME_MATCHES() (u8_strcmp(curr_setname, prev_setname) == 0)
466 #define ELENAME_MATCHES() (u8_strcmp(curr_elename, prev_elename) == 0)
467 #define UPDATE_PREV_SETNAME(STR) \
469 printf("Lexer_lexX:: updating prev_setname from (%s)", prev_setname); \
470 u8_set(prev_setname , (ucs4_t) 0, MAX_SETNAME_LEN ); \
471 u8_move(prev_setname, curr_setname, setname_len); \
472 printf(" to %s\n", prev_setname); \
474 #define UPDATE_PREV_ELENAME(STR) \
476 u8_set(prev_elename , (ucs4_t) 0, MAX_ELENAME_LEN ); \
477 u8_move(prev_elename, curr_elename, elename_len); \
479 #define PREV_MAPFILE() (TK_STACKX - 5)->tok_t == MOPEN || (TK_STACKX-3)->tok_t == MOPEN
480 #define SET_MAPSTR(STR) (STR = u8_strstr(STR, map_key))
486 /* int lexer_lexmapfile */
487 /* #define INC_X() */
488 /* (int height, int width) */
492 /* /\* Give scanner_scanpixels a buffer and a len. Iterate through */
493 /* buf with buf[n]. If n == 0, do nothing. if n has a value, push x, */
494 /* push y, push (z = n << 24), push (ref_id = n >> 8) *\/ */
495 /* //scanner_scanpixels() */
497 /* for(i = 0; i < len; i++) */
498 /* if(buf[i] == 0) */
507 /* fname_bytes = (uint8_t*)(DE_POP()->d_name); */
508 /* printf("d_name is %s\n", fname_bytes); */
509 /* for (fnp = filename, i = 0; i < FNAME_MAX; i += unit_size, fnp++) */
510 /* { unit_size = u8_mblen(fname_bytes + i, min(4, FNAME_MAX - i)); */
511 /* if (u8_mbtouc(fnp, fname_bytes + i, unit_size) == -1) //add ucs4 char to the filename */
512 /* FAIL("Lexer failed to convert ^%s to unicode\n", (fname_bytes + i)); */
513 /* if (*fnp == 0) //added a terminating char */
516 /* if(u8_mbtouc(filename, DE_POP()->d_name, FNAME_MAXy) == -1) */
517 /* FAIL("Lexer failed to convert d_name into uint8_t\n"); */
518 /* ulc_fprintf(stdout, "filename is %11U\n c", filename); */