2 \brief lexical analyzer implementation for APC
3 \details The lexer manages two FIFO stacks. One for maintaining tokens, the
4 other for maintaining a list of files to be scanned. During
5 execution, the lexer will return a token from its token queue if any
6 are present. If not, the lexer will will pop an element from its
7 file queue to 'scanner' to be tokenized. If the file queue is empty,
8 the lexer will instead call 'parsedir' to traverse the directory tree
9 and tokenize the results. If 'parsedir' does not generate any new
11 \author Jordan Lavatai
13 ----------------------------------------------------------------------------*/
27 #include <limits.h> //realpath, NAME_MAX, FPATH_MAX
31 #include "parser.tab.h"
33 #define DE_STACKSIZE 1024
36 #define TK_STACKSIZE 1024
38 #ifndef MAX_SETNAME_LEN //max setname length
39 #define MAX_SETNAME_LEN 32
41 #ifndef MAX_ELENAME_LEN //max setname length
42 #define MAX_ELENAME_LEN 32
44 #define FNAME_MAX 1024
45 #define FPATH_MAX 8192
50 int lexer_lexfile(const uint8_t*);
51 void lexer_pushtok(int, YYSTYPE
);
52 uint8_t const* lexer_get_current_filepath(void);
53 int lexer_lexfilename(uint8_t*);
54 struct dirent
* lexer_direntpa
[DE_STACKSIZE
],** lexer_direntpp
,** lexer_direntpb
;
57 int lexer_lexstring(uint8_t*, int);
59 int lexer_setstr(uint8_t*, int);
61 int scanner_init(void);
65 int dredge_current_depth(void);
69 uint8_t const* current_filename
;
71 uint8_t prev_setname
[MAX_SETNAME_LEN
];
73 uint8_t prev_elename
[MAX_ELENAME_LEN
];
75 uint8_t map_key
[] = "~";
78 { YYSTYPE lval
; //token val
79 int tok_t
; //token type
80 } token_stack
[TK_STACKSIZE
], *tsp
, *tsx
;
82 /* Directory Entity Array/Stack
83 Simple array for keeping track of dirents yet to be processed by the scanner.
84 If this list is empty and there are no tokens, the lexer is done.
85 This array is populated by the scanner as an array, and popped locally by the
86 lexer as a stack, and is popped as a FIFO stack.
88 #define DE_STACK (lexer_direntpa)
89 #define DE_STACKP (lexer_direntpp)
90 #define DE_STACKB (lexer_direntpb)
91 #define DE_LEN() (DE_STACKP - DE_STACKB)
92 #define DE_INIT() (DE_STACKP = DE_STACKB = DE_STACK)
93 #define DE_POP() (*DE_STACKB++)
96 This is a FIFO stack whose pointers are a union of either a pointer to an
97 integer, or a pointer to two integers (a struct tok). This way, integers may
98 be added or removed from the stack either singularly (IPUSH/IPOP), or as a
99 full token of two integers (PUSH/POP).
100 An alignment error will occur if IPOP or IPUSH are used a non-even number of
103 #define TK_STACK (token_stack)
104 #define TK_STACKP (tsp)
105 #define TK_STACKX (tsx)
106 #define TK_LEN() (TK_STACKX - TK_STACKP)
107 #define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK)
108 #define TK_POP() (*TK_STACKP++)
109 #define TK_PUSH(T,L) (*TK_STACKX++ = (struct tok){L,T})
112 The initializer returns boolean true if an error occurs, which may be handled
119 return scanner_init();
123 If the token buffer is empty, 'lexer' will initialize the token buffer and
124 call 'lexer_scandir'. If SCAN_ERROR is returned, an error is printed
125 before sending a null return to bison. If 0 tokens are generated, the error
126 printing is skipped. In all other cases, 'yylval' is set, and the token's
127 integer representation is returned.
131 #define SCAN_ERROR -1
132 #define TK_EMPTY (TK_STACKP == TK_STACKX)
135 fprintf(stderr,__VA_ARGS__); \
141 while (DE_LEN() > 0)//lex any directory entries in our stack
143 if (lexer_lexfile(DE_POP()->d_name
) == 0)
144 FAIL("Lexer failed to tokenize [%s]\n",(*DE_STACKB
)->d_name
);
146 if (TK_EMPTY
) //if there are no tokens,
147 { TK_INIT(); //initialize the token stack back to 0
149 { case SCAN_ERROR
: //if an error occurred,
150 FAIL("Scanner error\n");
151 case 0: //if the the scanner finds no dirents,
152 goto done
; //then we are done
153 default: //if we found some elements to scan,
154 goto start
; //start over and lex them
167 This receiver takes a struct tok and pushes it to the FIFO stack.
170 #define $($)#$ //stringifier
171 #define ERR_TK "Fatal: Generated over " $(TK_STACKSIZE) " tokens in one pass."
172 ( int tok
, YYSTYPE lval
)
173 { if (TK_LEN() >= TK_STACKSIZE
)
174 { fprintf(stderr
, ERR_TK
);
180 /* Lexical analysis of a file
181 Strips a filename to its base name, then sends it to lexer_lex
184 #define HIDDEN_WARNING "%s is hidden and will not be parsed!\n", filename
185 ( const uint8_t *filename
187 { static uint8_t fname
[FNAME_MAX
];
188 uint8_t *last_period
= NULL
, *iter
;
190 if (*filename
== '.')
191 { fprintf (stderr
, HIDDEN_WARNING
);
194 /* Copy the filename and remove its suffix */
195 u8_strncpy(fname
,filename
,FNAME_MAX
);
197 for (iter
= fname
; *iter
; iter
++) //find the last '.' char
200 if (last_period
) //if we found one,
201 *last_period
= 0; //truncate the string there
202 /* Register the current_filename */
203 current_filename
= filename
;
204 printf("lexer_lexfilename(%s)\n",fname
);
205 return lexer_lexfilename(fname
);
208 uint8_t const* lexer_get_current_filepath
210 { static uint8_t current_path
[FPATH_MAX
];
211 static uint8_t const* last_filename
;
212 if ((!last_filename
|| last_filename
!= current_filename
) &&
213 ((uint8_t*) realpath(current_filename
, current_path
) != (uint8_t*) current_path
))
214 { perror("realpath: ");
217 return (const uint8_t*)current_path
;
220 /* Returns 1 on success, 0 on failure */
222 lexer_ismapfile(uint8_t* str
)
226 len
= u8_strlen(str
);
227 for(i
= 0; i
< len
; i
++)
233 /* Scan filename and push the its tokens
235 int lexer_lexfilename
237 #define REF(STR) (STR[0] <= 0x39 && STR[0] >= 0x30)
238 #define DEL_FTOK(STR) (STR = u8_strchr(STR, '_') + 1)
239 #define NEXT_TOK(STR) (u8_strchr(STR, '_') + 1)
240 #define SET_CURR_SETNAME(STR) \
242 printf("setting curr_setname of str(%s)\n", STR); \
243 setname_end = u8_chr(STR, FNAME_MAX, '_'); \
244 setname_len = setname_end - str; \
245 u8_move(curr_setname, STR, setname_len); \
246 printf("curr_setname is now %s\n",curr_setname); \
248 #define SET_CURR_ELENAME(STR) \
250 printf("setting curr_elename of str(%s)\n", STR); \
251 setname_end = u8_chr(STR, FNAME_MAX, '_') + 1; \
252 if(REF(setname_end)) \
253 setname_end = u8_chr(setname_end, FNAME_MAX, '_') + 1; \
254 elename_end = u8_chr(setname_end, FNAME_MAX, '_'); \
255 elename_len = elename_end - setname_end; \
256 u8_move(curr_elename, setname_end, elename_len); \
257 printf("curr_elename is now %s\n", curr_elename); \
260 #define SETNAME_MATCHES() (u8_strcmp(curr_setname, prev_setname) == 0)
261 #define ELENAME_MATCHES() (u8_strcmp(curr_elename, prev_elename) == 0)
262 #define UPDATE_PREV_SETNAME(STR) \
264 printf("updating prev_setname from (%s)", prev_setname); \
265 u8_set(prev_setname , (ucs4_t) 0, MAX_SETNAME_LEN ); \
266 u8_move(prev_setname, curr_setname, setname_len); \
267 printf(" to %s\n", prev_setname); \
269 #define UPDATE_PREV_ELENAME(STR) \
271 u8_set(prev_elename , (ucs4_t) 0, MAX_ELENAME_LEN ); \
272 u8_move(prev_elename, curr_elename, elename_len); \
274 #define PREV_MAPFILE() (TK_STACKX - 5)->tok_t == MOPEN || (TK_STACKX-3)->tok_t == MOPEN
275 #define SET_MAPSTR(STR) (STR = u8_strstr(STR, map_key))
277 { int ntok
, len
, newstrt
;
279 typedef enum filetypes
{
292 printf("|---- Begin lexerfilename on %s ----|\n", str
);
295 perror("Lexfilename:: str is NULL so fail\n");
297 /* Determine the filetype of str */
298 len
= u8_strlen(str
);
299 newstrt
= lexer_setstr(str
,len
);
303 len
= u8_strlen(str
);
305 ntok
+= lexer_lexstring(str
, len
);
307 /* Need to add map variant name 'default' if user did not specify a
309 /* if(filetype == ele_map) */
310 /* { if(!u8_strchr(str, '_')) //map variant name not provided */
311 /* { yylval.str = "default"; */
312 /* lexer_pushtok(NAME, yylval); */
314 /* printf("Pushing default ele_map name\n"); */
318 /* Pass back filepath as end of statment operator */
319 filepath
= u8_strdup(lexer_get_current_filepath());
320 yylval
.str
= filepath
;
321 lexer_pushtok(NAME
, yylval
);
322 printf("Pushing filepath %s\n", filepath
);
325 printf("|---- Ending lexer_lexfilename on %s, %d tokens were lexed ----|\n", str
, ntok
);
332 { int setname_len
, elename_len
, strlen
;
333 uint8_t* setname_end
, *elename_end
, *newstrt
;
334 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
335 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
339 SET_CURR_SETNAME(newstrt
);
340 SET_CURR_ELENAME(newstrt
);
342 { printf("*previous file was mapfile*\n");
347 if(SETNAME_MATCHES())
351 printf("setname matches\n");
352 if(ELENAME_MATCHES())
358 UPDATE_PREV_ELENAME(newstrt
);
359 UPDATE_PREV_SETNAME(newstrt
);
361 return newstrt
- str
;
369 { int setname_len
, elename_len
;
370 uint8_t* setname_end
, *elename_end
, *newstrt
;
371 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
372 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
374 printf("In lexelemodel, str is %s\n", str
);
378 SET_CURR_SETNAME(newstrt
);
379 SET_CURR_ELENAME(newstrt
);
380 if(SETNAME_MATCHES())
381 { printf("in ele_model: setname matches\n");
383 printf("newstrt is now %s\n", newstrt
);
386 if(ELENAME_MATCHES())
387 { printf("in ele_model: elename matches\n");
393 UPDATE_PREV_ELENAME(newstrt
);
394 UPDATE_PREV_SETNAME(newstrt
);
396 return newstrt
- str
;
402 { int setname_len
, elename_len
;
403 uint8_t* setname_end
, *elename_end
, *newstrt
;
404 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
405 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
409 SET_CURR_SETNAME(newstrt
);
413 if( SETNAME_MATCHES())
418 UPDATE_PREV_SETNAME(newstrt
);
420 return newstrt
- str
;
426 { int setname_len
, elename_len
;
427 uint8_t* setname_end
, *elename_end
, *newstrt
;
428 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
429 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
433 SET_CURR_SETNAME(newstrt
);
434 if( SETNAME_MATCHES())
438 UPDATE_PREV_SETNAME(newstrt
);
440 return newstrt
- str
;
447 { int setname_len
, elename_len
;
448 uint8_t* setname_end
, *elename_end
, *newstrt
;
449 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
450 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
454 SET_CURR_SETNAME(newstrt
);
455 if( SETNAME_MATCHES())
457 if(REF((NEXT_TOK(newstrt
)))) //if NAME REF REF
459 UPDATE_PREV_SETNAME(newstrt
);
461 return newstrt
- str
;
468 { int setname_len
, elename_len
;
469 uint8_t* setname_end
, *elename_end
, *newstrt
;
470 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
471 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
475 SET_CURR_SETNAME(newstrt
);
476 SET_CURR_ELENAME(newstrt
);
477 if(SETNAME_MATCHES())
479 if(REF(NEXT_TOK(newstrt
))) //NAME REF REF, where is set_label
483 return newstrt
- str
;
489 { int setname_len
, elename_len
;
490 uint8_t* setname_end
, *elename_end
;
491 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
492 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
502 { int setname_len
, elename_len
;
503 uint8_t* setname_end
, *elename_end
, *newstrt
;
504 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
505 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
509 SET_CURR_SETNAME(newstrt
);
510 printf("prev_setname %s, curr_setname %s\n", prev_setname
, curr_setname
);
511 if(SETNAME_MATCHES())
517 return newstrt
- str
;
523 /**************************/
524 /****Abandon All Hope******/
525 /**************************/
533 /* int lexer_lexmapfile */
534 /* #define INC_X() */
535 /* (int height, int width) */
539 /* /\* Give scanner_scanpixels a buffer and a len. Iterate through */
540 /* buf with buf[n]. If n == 0, do nothing. if n has a value, push x, */
541 /* push y, push (z = n << 24), push (ref_id = n >> 8) *\/ */
542 /* //scanner_scanpixels() */
544 /* for(i = 0; i < len; i++) */
545 /* if(buf[i] == 0) */
554 /* fname_bytes = (uint8_t*)(DE_POP()->d_name); */
555 /* printf("d_name is %s\n", fname_bytes); */
556 /* for (fnp = filename, i = 0; i < FNAME_MAX; i += unit_size, fnp++) */
557 /* { unit_size = u8_mblen(fname_bytes + i, min(4, FNAME_MAX - i)); */
558 /* if (u8_mbtouc(fnp, fname_bytes + i, unit_size) == -1) //add ucs4 char to the filename */
559 /* FAIL("Lexer failed to convert ^%s to unicode\n", (fname_bytes + i)); */
560 /* if (*fnp == 0) //added a terminating char */
563 /* if(u8_mbtouc(filename, DE_POP()->d_name, FNAME_MAXy) == -1) */
564 /* FAIL("Lexer failed to convert d_name into uint8_t\n"); */
565 /* ulc_fprintf(stdout, "filename is %11U\n c", filename); */