/*!@file \brief lexical analyzer implementation for APC \details The lexer manages two FIFO stacks. One for maintaining tokens, the other for maintaining a list of files to be scanned. During execution, the lexer will return a token from its token queue if any are present. If not, the lexer will will pop an element from its file queue to 'scanner' to be tokenized. If the file queue is empty, the lexer will instead call 'parsedir' to traverse the directory tree and tokenize the results. If 'parsedir' does not generate any new tokens, we are done. \author Jordan Lavatai \date Aug 2016 ----------------------------------------------------------------------------*/ /* Standard */ #include #include #include #include /* Posix */ #include #include #include #include #include #include #include #include //realpath, NAME_MAX, FPATH_MAX #include /* Local */ #include "apc.h" #include "parser.tab.h" #ifndef DE_STACKSIZE #define DE_STACKSIZE 1024 #endif #ifndef TK_STACKSIZE #define TK_STACKSIZE 1024 #endif /* Public */ int lexer_init(void); int lexer(void); int lexer_lexfile(const uint8_t*); void lexer_pushtok(int, YYSTYPE); uint8_t const* lexer_get_current_filepath(void); int lexer_lexfilename(uint8_t*); struct dirent* lexer_direntpa[DE_STACKSIZE],** lexer_direntpp,** lexer_direntpb; /* Private */ extern //lexer_fsm.rl int lexer_lexstring(uint8_t*, int); extern //lexer_fsm.rl int lexer_setstr(uint8_t*, int); extern //scanner.c int scanner_init(void); extern //scanner.c int scanner(void); extern //bison YYSTYPE yylval; static uint8_t const* current_filename; static struct tok { YYSTYPE lval; //token val int tok_t; //token type } token_stack[TK_STACKSIZE], *tsp, *tsx; /* Directory Entity Array/Stack Simple array for keeping track of dirents yet to be processed by the scanner. If this list is empty and there are no tokens, the lexer is done. This array is populated by the scanner as an array, and popped locally by the lexer as a stack, and is popped as a FIFO stack. */ #define DE_STACK (lexer_direntpa) #define DE_STACKP (lexer_direntpp) #define DE_STACKB (lexer_direntpb) #define DE_LEN() (DE_STACKP - DE_STACKB) #define DE_INIT() (DE_STACKP = DE_STACKB = DE_STACK) #define DE_POP() (*DE_STACKB++) /* Token Stack This is a FIFO stack whose pointers are a union of either a pointer to an integer, or a pointer to two integers (a struct tok). This way, integers may be added or removed from the stack either singularly (IPUSH/IPOP), or as a full token of two integers (PUSH/POP). An alignment error will occur if IPOP or IPUSH are used a non-even number of times in a sequence! */ #define TK_STACK (token_stack) #define TK_STACKP (tsp) #define TK_STACKX (tsx) #define TK_LEN() (TK_STACKX - TK_STACKP) #define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK) #define TK_POP() (*TK_STACKP++) #define TK_PUSH(T,L) (*TK_STACKX++ = (struct tok){L,T}) /* Initializer The initializer returns boolean true if an error occurs, which may be handled with standard errno. */ int lexer_init () { TK_INIT(); DE_INIT(); return scanner_init(); } /* Lexer If the token buffer is empty, 'lexer' will initialize the token buffer and call 'lexer_scandir'. If SCAN_ERROR is returned, an error is printed before sending a null return to bison. If 0 tokens are generated, the error printing is skipped. In all other cases, 'yylval' is set, and the token's integer representation is returned. */ #define $($)#$ #define SCAN_ERROR -1 #define TK_EMPTY (TK_STACKP == TK_STACKX) #define FAIL(...) \ do { \ fprintf(stderr,__VA_ARGS__); \ goto done; \ } while (0) int lexer () { struct tok token; start: while (DE_LEN() > 0)//lex any directory entries in our stack { if (lexer_lexfile((uint8_t*)DE_POP()->d_name) == 0) FAIL("Lexer failed to tokenize [%s]\n",(*DE_STACKB)->d_name); } if (TK_EMPTY) //if there are no tokens, { TK_INIT(); //initialize the token stack back to 0 switch (scanner()) { case SCAN_ERROR: //if an error occurred, FAIL("Scanner error\n"); case 0: //if the the scanner finds no dirents, goto done; //then we are done default: //if we found some elements to scan, goto start; //start over and lex them } } token = TK_POP(); yylval = token.lval; return token.tok_t; done: yylval.val = 0; return 0; } /* Token Receiver This receiver takes a struct tok and pushes it to the FIFO stack. */ #define $($)#$ //stringifier #define ERR_TK "Fatal: Generated over " $(TK_STACKSIZE) " tokens in one pass." void lexer_pushtok ( int tok, YYSTYPE lval ) { if (TK_LEN() >= TK_STACKSIZE) { fprintf(stderr, ERR_TK); exit(EXIT_FAILURE); } TK_PUSH(tok, lval); } /* Lexical analysis of a file Strips a filename to its base name, then sends it to lexer_lex */ #define HIDDEN_WARNING "%s is hidden and will not be parsed!\n", filename int lexer_lexfile ( const uint8_t *filename ) { static uint8_t fname[FNAME_MAX]; uint8_t *last_period = NULL, *iter; if (*filename == '.') { fprintf (stderr, HIDDEN_WARNING); return 0; } /* Copy the filename and remove its suffix */ u8_strncpy(fname,filename,FNAME_MAX); last_period = NULL; for (iter = fname; *iter; iter++) //find the last '.' char if (*iter == '.') last_period = iter; if (last_period) //if we found one, *last_period = 0; //truncate the string there /* Register the current_filename */ current_filename = filename; printf("lexer_lexfilename(%s)\n",fname); return lexer_lexfilename(fname); } uint8_t const* lexer_get_current_filepath () { static uint8_t current_path[FPATH_MAX]; static uint8_t const* last_filename; if ((!last_filename || last_filename != current_filename) && ((uint8_t*) realpath((char*)current_filename, (char*)current_path) != (uint8_t*) current_path)) { perror("realpath: "); return NULL; } return (const uint8_t*)current_path; } /* Scan filename and push the its tokens onto the stack */ int lexer_lexfilename ( uint8_t* str ) { int ntok, len; uint8_t *filepath; printf("|---- Begin lexerfilename on %s ----|\n", str); if(*str == 0) perror("Lexfilename:: str is NULL so fail\n"); /* Determine the filetype of str */ len = u8_strlen(str); ntok = lexer_lexstring(str, len); /* Pass back filepath as end of statment operator */ filepath = u8_strdup(lexer_get_current_filepath()); yylval.str = filepath; lexer_pushtok(NAME, yylval); printf("Pushing filepath %s\n", filepath); ntok++; printf("|---- Ending lexer_lexfilename on %s, %d tokens were lexed ----|\n", str, ntok); return ntok; } /**************************/ /****Abandon All Hope******/ /**************************/ /*** ***/ /*** ***/ /*** ***/ /*** ***/ #if 0 int lexer_lexelemap ( uint8_t* str) { int setname_len, elename_len, strlen; uint8_t* setname_end, *elename_end, *newstrt; uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; newstrt = str; SET_CURR_SETNAME(newstrt); SET_CURR_ELENAME(newstrt); if(PREV_MAPFILE()) { printf("Lexer_lexelemap:: previous file was mapfile*\n"); SET_MAPSTR(newstrt); } else { if(SETNAME_MATCHES()) { DEL_FTOK(newstrt); if(REF(newstrt)) DEL_FTOK(newstrt); printf("Lexer_lexelemap:: setname matches\n"); if(ELENAME_MATCHES()) DEL_FTOK(newstrt); if(REF(newstrt)) DEL_FTOK(newstrt); } } UPDATE_PREV_ELENAME(newstrt); UPDATE_PREV_SETNAME(newstrt); return newstrt - str; } int lexer_lexelemodel (uint8_t* str) { int setname_len, elename_len; uint8_t* setname_end, *elename_end, *newstrt; uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; printf("Lexer_lexelemodel:: Begin str is %s\n", str); newstrt = str; SET_CURR_SETNAME(newstrt); SET_CURR_ELENAME(newstrt); if(SETNAME_MATCHES()) { printf("Lexer_lexelemodel:: curr_setname(%s) matches prev_setname (%s)\n", curr_setname, prev_setname); DEL_FTOK(newstrt); printf("Lexer_lexelemodel:: Deleted setname, newstrt is now %s\n", newstrt); if(REF(newstrt)) DEL_FTOK(newstrt); if(ELENAME_MATCHES()) { printf("Lexer_lexelemodel:: elename matches\n"); DEL_FTOK(newstrt); if(REF(newstrt)) DEL_FTOK(newstrt); } } UPDATE_PREV_ELENAME(newstrt); UPDATE_PREV_SETNAME(newstrt); return newstrt - str; } int lexer_lexsetmap (uint8_t* str) { int setname_len, elename_len; uint8_t* setname_end, *elename_end, *newstrt; uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; newstrt = str; SET_CURR_SETNAME(newstrt); if(PREV_MAPFILE()) SET_MAPSTR(newstrt); else if( SETNAME_MATCHES()) DEL_FTOK(newstrt); if(REF(newstrt)) DEL_FTOK(newstrt); UPDATE_PREV_SETNAME(newstrt); return newstrt - str; } int lexer_lexsetmodel (uint8_t* str) { int setname_len, elename_len; uint8_t* setname_end, *elename_end, *newstrt; uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; newstrt = str; SET_CURR_SETNAME(newstrt); if( SETNAME_MATCHES()) DEL_FTOK(newstrt); if(REF(newstrt)) DEL_FTOK(newstrt); UPDATE_PREV_SETNAME(newstrt); return newstrt - str; } int lexer_lexsetvlink (uint8_t* str) { int setname_len, elename_len; uint8_t* setname_end, *elename_end, *newstrt; uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; newstrt = str; SET_CURR_SETNAME(newstrt); if( SETNAME_MATCHES()) DEL_FTOK(newstrt); if(REF((NEXT_TOK(newstrt)))) //if NAME REF REF DEL_FTOK(newstrt); UPDATE_PREV_SETNAME(newstrt); return newstrt - str; } int lexer_lexelevlink (uint8_t* str) { int setname_len, elename_len; uint8_t* setname_end, *elename_end, *newstrt; uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; newstrt = str; SET_CURR_SETNAME(newstrt); SET_CURR_ELENAME(newstrt); if(SETNAME_MATCHES()) { DEL_FTOK(newstrt); if(REF(NEXT_TOK(newstrt))) //NAME REF REF, where is set_label DEL_FTOK(newstrt); } return newstrt - str; } int lexer_lexsetolink (uint8_t* str) { int setname_len, elename_len; uint8_t* setname_end, *elename_end; uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; return 0; //do nothing } int lexer_lexeleolink (uint8_t* str) { int setname_len, elename_len; uint8_t* setname_end, *elename_end, *newstrt; uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; newstrt = str; SET_CURR_SETNAME(newstrt); printf("prev_setname %s, curr_setname %s\n", prev_setname, curr_setname); if(SETNAME_MATCHES()) { DEL_FTOK(newstrt); if(REF(newstrt)) DEL_FTOK(newstrt); } return newstrt - str; } #define REF(STR) (STR[0] <= 0x39 && STR[0] >= 0x30) #define DEL_FTOK(STR) (STR = u8_strchr(STR, '_') + 1) #define NEXT_TOK(STR) (u8_strchr(STR, '_') + 1) #define SET_CURR_SETNAME(STR) \ do { \ printf("Lexer_lexX:: setting curr_setname of str(%s)\n", STR); \ setname_end = u8_chr(STR, FNAME_MAX, '_'); \ setname_len = setname_end - str; \ u8_move(curr_setname, STR, setname_len); \ printf("Lexer_lexX:: curr_setname is now %s\n",curr_setname); \ } while (0) #define SET_CURR_ELENAME(STR) \ do { \ printf("Lexer_lexX:: setting curr_elename of str(%s)\n", STR); \ setname_end = u8_chr(STR, FNAME_MAX, '_') + 1; \ if(REF(setname_end)) \ setname_end = u8_chr(setname_end, FNAME_MAX, '_') + 1; \ elename_end = u8_chr(setname_end, FNAME_MAX, '_'); \ elename_len = elename_end - setname_end; \ u8_move(curr_elename, setname_end, elename_len); \ printf("Lexer_lexX:: curr_elename is now %s\n", curr_elename); \ } while (0) #define SETNAME_MATCHES() (u8_strcmp(curr_setname, prev_setname) == 0) #define ELENAME_MATCHES() (u8_strcmp(curr_elename, prev_elename) == 0) #define UPDATE_PREV_SETNAME(STR) \ do { \ printf("Lexer_lexX:: updating prev_setname from (%s)", prev_setname); \ u8_set(prev_setname , (ucs4_t) 0, MAX_SETNAME_LEN ); \ u8_move(prev_setname, curr_setname, setname_len); \ printf(" to %s\n", prev_setname); \ } while (0) #define UPDATE_PREV_ELENAME(STR) \ do { \ u8_set(prev_elename , (ucs4_t) 0, MAX_ELENAME_LEN ); \ u8_move(prev_elename, curr_elename, elename_len); \ } while (0) #define PREV_MAPFILE() (TK_STACKX - 5)->tok_t == MOPEN || (TK_STACKX-3)->tok_t == MOPEN #define SET_MAPSTR(STR) (STR = u8_strstr(STR, map_key)) #endif /* int lexer_lexmapfile */ /* #define INC_X() */ /* (int height, int width) */ /* { */ /* int x, y; */ /* /\* Give scanner_scanpixels a buffer and a len. Iterate through */ /* buf with buf[n]. If n == 0, do nothing. if n has a value, push x, */ /* push y, push (z = n << 24), push (ref_id = n >> 8) *\/ */ /* //scanner_scanpixels() */ /* for(i = 0; i < len; i++) */ /* if(buf[i] == 0) */ /* if(x == width) */ /* x = 0; */ /* else */ /* } */ /* fname_bytes = (uint8_t*)(DE_POP()->d_name); */ /* printf("d_name is %s\n", fname_bytes); */ /* for (fnp = filename, i = 0; i < FNAME_MAX; i += unit_size, fnp++) */ /* { unit_size = u8_mblen(fname_bytes + i, min(4, FNAME_MAX - i)); */ /* if (u8_mbtouc(fnp, fname_bytes + i, unit_size) == -1) //add ucs4 char to the filename */ /* FAIL("Lexer failed to convert ^%s to unicode\n", (fname_bytes + i)); */ /* if (*fnp == 0) //added a terminating char */ /* break; */ /* } */ /* if(u8_mbtouc(filename, DE_POP()->d_name, FNAME_MAXy) == -1) */ /* FAIL("Lexer failed to convert d_name into uint8_t\n"); */ /* ulc_fprintf(stdout, "filename is %11U\n c", filename); */