From: ken Date: Sun, 8 Jan 2017 03:42:17 +0000 (-0800) Subject: lex revision X-Git-Url: https://www.kengrimes.com/gitweb/?p=henge%2Fapc.git;a=commitdiff_plain;h=b936c857622b4ab8e260a1d1500646cae4324935 lex revision --- diff --git a/src/lexer.c b/src/lexer.c index 362d62b..9e08e86 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -24,497 +24,38 @@ #include #include #include -#include //realpath, NAME_MAX, FPATH_MAX #include - /* Local */ #include "apc.h" #include "parser.tab.h" -#ifndef DE_STACKSIZE -#define DE_STACKSIZE 1024 -#endif -#ifndef TK_STACKSIZE -#define TK_STACKSIZE 1024 -#endif - - /* Public */ -int lexer_init(void); -int lexer(void); -int lexer_lexfile(const uint8_t*); -void lexer_pushtok(int, YYSTYPE); -uint8_t const* lexer_get_current_filepath(void); -int lexer_lexfilename(uint8_t*); -struct dirent* lexer_direntpa[DE_STACKSIZE],** lexer_direntpp,** lexer_direntpb; +int lexer_lexfile(uint8_t const*); /* Private */ -extern //lexer_fsm.rl -int lexer_lexstring(uint8_t*, int); -extern //lexer_fsm.rl -int lexer_setstr(uint8_t*, int); +extern //lexer.rl +int lexer_lexstring(uint8_t const*, int); extern //scanner.c -int scanner_init(void); +yypstate* apc_pstate; extern //scanner.c -int scanner(void); -extern //bison -YYSTYPE yylval; -static -uint8_t const* current_filename; - -static -struct tok -{ YYSTYPE lval; //token val - int tok_t; //token type -} token_stack[TK_STACKSIZE], *tsp, *tsx; - -/* Directory Entity Array/Stack - Simple array for keeping track of dirents yet to be processed by the scanner. - If this list is empty and there are no tokens, the lexer is done. - This array is populated by the scanner as an array, and popped locally by the - lexer as a stack, and is popped as a FIFO stack. -*/ -#define DE_STACK (lexer_direntpa) -#define DE_STACKP (lexer_direntpp) -#define DE_STACKB (lexer_direntpb) -#define DE_LEN() (DE_STACKP - DE_STACKB) -#define DE_INIT() (DE_STACKP = DE_STACKB = DE_STACK) -#define DE_POP() (*DE_STACKB++) - -/* Token Stack - This is a FIFO stack whose pointers are a union of either a pointer to an - integer, or a pointer to two integers (a struct tok). This way, integers may - be added or removed from the stack either singularly (IPUSH/IPOP), or as a - full token of two integers (PUSH/POP). - An alignment error will occur if IPOP or IPUSH are used a non-even number of - times in a sequence! -*/ -#define TK_STACK (token_stack) -#define TK_STACKP (tsp) -#define TK_STACKX (tsx) -#define TK_LEN() (TK_STACKX - TK_STACKP) -#define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK) -#define TK_POP() (*TK_STACKP++) -#define TK_PUSH(T,L) (*TK_STACKX++ = (struct tok){L,T}) - -/* Initializer - The initializer returns boolean true if an error occurs, which may be handled - with standard errno. -*/ -int lexer_init -() -{ TK_INIT(); - DE_INIT(); - return scanner_init(); -} - -/* Lexer - If the token buffer is empty, 'lexer' will initialize the token buffer and - call 'lexer_scandir'. If SCAN_ERROR is returned, an error is printed - before sending a null return to bison. If 0 tokens are generated, the error - printing is skipped. In all other cases, 'yylval' is set, and the token's - integer representation is returned. -*/ -#define $($)#$ -#define SCAN_ERROR -1 -#define TK_EMPTY (TK_STACKP == TK_STACKX) -#define FAIL(...) \ - do { \ - fprintf(stderr,__VA_ARGS__); \ - goto done; \ - } while (0) -int lexer -() -{ struct tok token; - start: - while (DE_LEN() > 0)//lex any directory entries in our stack - { - if (lexer_lexfile((uint8_t*)DE_POP()->d_name) == 0) - FAIL("Lexer failed to tokenize [%s]\n",(*DE_STACKB)->d_name); - } - if (TK_EMPTY) //if there are no tokens, - { TK_INIT(); //initialize the token stack back to 0 - switch (scanner()) - { case SCAN_ERROR: //if an error occurred, - FAIL("Scanner error\n"); - case 0: //if the the scanner finds no dirents, - goto done; //then we are done - default: //if we found some elements to scan, - goto start; //start over and lex them - } - } - token = TK_POP(); - yylval = token.lval; - return token.tok_t; - done: - yylval.val = 0; - return 0; -} - - -/* Token Receiver - This receiver takes a struct tok and pushes it to the FIFO stack. -*/ -#define $($)#$ //stringifier -#define ERR_TK "Fatal: Generated over " $(TK_STACKSIZE) " tokens in one pass." -void lexer_pushtok -( int tok, - YYSTYPE lval -) -{ if (TK_LEN() >= TK_STACKSIZE) - { fprintf(stderr, ERR_TK); - exit(EXIT_FAILURE); - } - TK_PUSH(tok, lval); -} +yycstate* apc_cstate; +#define PUSHTOK(T,L) yypush_parse(apc_pstate, T, L, apc_cstate) /* Lexical analysis of a file - Strips a filename to its base name, then sends it to lexer_lex + Strips a filename to its base name, then sends it to lexer_lexstring before + pushing a PATH token with the filename + Returns the number of tokens pushed to the parser. */ -#define HIDDEN_WARNING "%s is hidden and will not be parsed!\n", filename int lexer_lexfile -( const uint8_t *filename -) -{ static uint8_t fname[FNAME_MAX]; - uint8_t *last_period = NULL, *iter; - - if (*filename == '.') - { fprintf (stderr, HIDDEN_WARNING); - return 0; - } - /* Copy the filename and remove its suffix */ - u8_strncpy(fname,filename,FNAME_MAX); +( uint8_t const* filename ) +{ uint8_t const* last_period,* iter; + int ntok; last_period = NULL; - for (iter = fname; *iter; iter++) //find the last '.' char + for (iter = filename; *iter; iter++) if (*iter == '.') last_period = iter; - if (last_period) //if we found one, - *last_period = 0; //truncate the string there - /* Register the current_filename */ - current_filename = filename; - printf("lexer_lexfilename(%s)\n",fname); - return lexer_lexfilename(fname); -} - -uint8_t const* lexer_get_current_filepath -() -{ static uint8_t current_path[FPATH_MAX]; - static uint8_t const* last_filename; - if ((!last_filename || last_filename != current_filename) && - ((uint8_t*) realpath((char*)current_filename, (char*)current_path) != (uint8_t*) current_path)) - { perror("realpath: "); - return NULL; - } - return (const uint8_t*)current_path; -} - -/* Scan filename and push the its tokens - onto the stack */ -int lexer_lexfilename -( uint8_t* str -) -{ int ntok, len; - uint8_t *filepath; - - - printf("|---- Begin lexerfilename on %s ----|\n", str); - - if(*str == 0) - perror("Lexfilename:: str is NULL so fail\n"); - - /* Determine the filetype of str */ - len = u8_strlen(str); - - ntok = lexer_lexstring(str, len); - - /* Pass back filepath as end of statment operator */ - filepath = u8_strdup(lexer_get_current_filepath()); - yylval.str = filepath; - lexer_pushtok(NAME, yylval); - printf("Pushing filepath %s\n", filepath); - ntok++; - - printf("|---- Ending lexer_lexfilename on %s, %d tokens were lexed ----|\n", str, ntok); - return ntok; -} - -/**************************/ -/****Abandon All Hope******/ -/**************************/ -/*** ***/ -/*** ***/ -/*** ***/ -/*** ***/ - -#if 0 -int -lexer_lexelemap -( uint8_t* str) -{ int setname_len, elename_len, strlen; - uint8_t* setname_end, *elename_end, *newstrt; - uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; - uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; - - newstrt = str; - - SET_CURR_SETNAME(newstrt); - SET_CURR_ELENAME(newstrt); - if(PREV_MAPFILE()) - { printf("Lexer_lexelemap:: previous file was mapfile*\n"); - SET_MAPSTR(newstrt); - } - else - { - if(SETNAME_MATCHES()) - { DEL_FTOK(newstrt); - if(REF(newstrt)) - DEL_FTOK(newstrt); - printf("Lexer_lexelemap:: setname matches\n"); - if(ELENAME_MATCHES()) - DEL_FTOK(newstrt); - if(REF(newstrt)) - DEL_FTOK(newstrt); - } - } - - UPDATE_PREV_ELENAME(newstrt); - UPDATE_PREV_SETNAME(newstrt); - - return newstrt - str; - - -} - -int -lexer_lexelemodel -(uint8_t* str) -{ int setname_len, elename_len; - uint8_t* setname_end, *elename_end, *newstrt; - uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; - uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; - - printf("Lexer_lexelemodel:: Begin str is %s\n", str); - - newstrt = str; - - SET_CURR_SETNAME(newstrt); - SET_CURR_ELENAME(newstrt); - if(SETNAME_MATCHES()) - { printf("Lexer_lexelemodel:: curr_setname(%s) matches prev_setname (%s)\n", curr_setname, prev_setname); - DEL_FTOK(newstrt); - printf("Lexer_lexelemodel:: Deleted setname, newstrt is now %s\n", newstrt); - if(REF(newstrt)) - DEL_FTOK(newstrt); - if(ELENAME_MATCHES()) - { printf("Lexer_lexelemodel:: elename matches\n"); - DEL_FTOK(newstrt); - if(REF(newstrt)) - DEL_FTOK(newstrt); - } - } - UPDATE_PREV_ELENAME(newstrt); - UPDATE_PREV_SETNAME(newstrt); - - return newstrt - str; -} - -int -lexer_lexsetmap -(uint8_t* str) -{ int setname_len, elename_len; - uint8_t* setname_end, *elename_end, *newstrt; - uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; - uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; - - newstrt = str; - - SET_CURR_SETNAME(newstrt); - if(PREV_MAPFILE()) - SET_MAPSTR(newstrt); - else - if( SETNAME_MATCHES()) - DEL_FTOK(newstrt); - if(REF(newstrt)) - DEL_FTOK(newstrt); - - UPDATE_PREV_SETNAME(newstrt); - - return newstrt - str; -} - -int -lexer_lexsetmodel -(uint8_t* str) -{ int setname_len, elename_len; - uint8_t* setname_end, *elename_end, *newstrt; - uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; - uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; - - newstrt = str; - - SET_CURR_SETNAME(newstrt); - if( SETNAME_MATCHES()) - DEL_FTOK(newstrt); - if(REF(newstrt)) - DEL_FTOK(newstrt); - UPDATE_PREV_SETNAME(newstrt); - - return newstrt - str; - -} - -int -lexer_lexsetvlink -(uint8_t* str) -{ int setname_len, elename_len; - uint8_t* setname_end, *elename_end, *newstrt; - uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; - uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; - - newstrt = str; - - SET_CURR_SETNAME(newstrt); - if( SETNAME_MATCHES()) - DEL_FTOK(newstrt); - if(REF((NEXT_TOK(newstrt)))) //if NAME REF REF - DEL_FTOK(newstrt); - UPDATE_PREV_SETNAME(newstrt); - - return newstrt - str; - -} - -int -lexer_lexelevlink -(uint8_t* str) -{ int setname_len, elename_len; - uint8_t* setname_end, *elename_end, *newstrt; - uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; - uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; - - newstrt = str; - - SET_CURR_SETNAME(newstrt); - SET_CURR_ELENAME(newstrt); - if(SETNAME_MATCHES()) - { DEL_FTOK(newstrt); - if(REF(NEXT_TOK(newstrt))) //NAME REF REF, where is set_label - DEL_FTOK(newstrt); + if (last_period) + { ntok = lexer_lexstring(filename, (int)(last_period - filename)); + PUSHTOK(PATH,filename); + return ntok + 1; } - - return newstrt - str; -} - -int -lexer_lexsetolink -(uint8_t* str) -{ int setname_len, elename_len; - uint8_t* setname_end, *elename_end; - uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; - uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; - - return 0; - - //do nothing -} - -int -lexer_lexeleolink -(uint8_t* str) -{ int setname_len, elename_len; - uint8_t* setname_end, *elename_end, *newstrt; - uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; - uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; - - newstrt = str; - - SET_CURR_SETNAME(newstrt); - printf("prev_setname %s, curr_setname %s\n", prev_setname, curr_setname); - if(SETNAME_MATCHES()) - { DEL_FTOK(newstrt); - if(REF(newstrt)) - DEL_FTOK(newstrt); - } - - return newstrt - str; - - + return lexer_lexstring(filename, (int)(iter - filename)); } - - - -#define REF(STR) (STR[0] <= 0x39 && STR[0] >= 0x30) -#define DEL_FTOK(STR) (STR = u8_strchr(STR, '_') + 1) -#define NEXT_TOK(STR) (u8_strchr(STR, '_') + 1) -#define SET_CURR_SETNAME(STR) \ - do { \ - printf("Lexer_lexX:: setting curr_setname of str(%s)\n", STR); \ - setname_end = u8_chr(STR, FNAME_MAX, '_'); \ - setname_len = setname_end - str; \ - u8_move(curr_setname, STR, setname_len); \ - printf("Lexer_lexX:: curr_setname is now %s\n",curr_setname); \ - } while (0) -#define SET_CURR_ELENAME(STR) \ - do { \ - printf("Lexer_lexX:: setting curr_elename of str(%s)\n", STR); \ - setname_end = u8_chr(STR, FNAME_MAX, '_') + 1; \ - if(REF(setname_end)) \ - setname_end = u8_chr(setname_end, FNAME_MAX, '_') + 1; \ - elename_end = u8_chr(setname_end, FNAME_MAX, '_'); \ - elename_len = elename_end - setname_end; \ - u8_move(curr_elename, setname_end, elename_len); \ - printf("Lexer_lexX:: curr_elename is now %s\n", curr_elename); \ - } while (0) - -#define SETNAME_MATCHES() (u8_strcmp(curr_setname, prev_setname) == 0) -#define ELENAME_MATCHES() (u8_strcmp(curr_elename, prev_elename) == 0) -#define UPDATE_PREV_SETNAME(STR) \ - do { \ - printf("Lexer_lexX:: updating prev_setname from (%s)", prev_setname); \ - u8_set(prev_setname , (ucs4_t) 0, MAX_SETNAME_LEN ); \ - u8_move(prev_setname, curr_setname, setname_len); \ - printf(" to %s\n", prev_setname); \ - } while (0) -#define UPDATE_PREV_ELENAME(STR) \ - do { \ - u8_set(prev_elename , (ucs4_t) 0, MAX_ELENAME_LEN ); \ - u8_move(prev_elename, curr_elename, elename_len); \ - } while (0) -#define PREV_MAPFILE() (TK_STACKX - 5)->tok_t == MOPEN || (TK_STACKX-3)->tok_t == MOPEN -#define SET_MAPSTR(STR) (STR = u8_strstr(STR, map_key)) - - -#endif - - -/* int lexer_lexmapfile */ -/* #define INC_X() */ -/* (int height, int width) */ -/* { */ -/* int x, y; */ - -/* /\* Give scanner_scanpixels a buffer and a len. Iterate through */ -/* buf with buf[n]. If n == 0, do nothing. if n has a value, push x, */ -/* push y, push (z = n << 24), push (ref_id = n >> 8) *\/ */ -/* //scanner_scanpixels() */ - -/* for(i = 0; i < len; i++) */ -/* if(buf[i] == 0) */ -/* if(x == width) */ -/* x = 0; */ -/* else */ - - - - -/* } */ -/* fname_bytes = (uint8_t*)(DE_POP()->d_name); */ - /* printf("d_name is %s\n", fname_bytes); */ - /* for (fnp = filename, i = 0; i < FNAME_MAX; i += unit_size, fnp++) */ - /* { unit_size = u8_mblen(fname_bytes + i, min(4, FNAME_MAX - i)); */ - /* if (u8_mbtouc(fnp, fname_bytes + i, unit_size) == -1) //add ucs4 char to the filename */ - /* FAIL("Lexer failed to convert ^%s to unicode\n", (fname_bytes + i)); */ - /* if (*fnp == 0) //added a terminating char */ - /* break; */ - /* } */ - /* if(u8_mbtouc(filename, DE_POP()->d_name, FNAME_MAXy) == -1) */ - /* FAIL("Lexer failed to convert d_name into uint8_t\n"); */ - /* ulc_fprintf(stdout, "filename is %11U\n c", filename); */