X-Git-Url: https://www.kengrimes.com/gitweb/?p=henge%2Fwebcc.git;a=blobdiff_plain;f=src%2Fapc%2Flexer.c;h=b6c6ca361edafeb51a2b247eb11eaba121784238;hp=4e0a611faa06bf16bd4501dd0bed73fa850f34ce;hb=301cac5f6e2edcecf2e1bd89aee5182130a213fc;hpb=882513ad6569a640cb5c90b83e27e742ace71b63 diff --git a/src/apc/lexer.c b/src/apc/lexer.c index 4e0a611..b6c6ca3 100644 --- a/src/apc/lexer.c +++ b/src/apc/lexer.c @@ -14,53 +14,83 @@ /* Standard */ #include #include +#include #include /* Posix */ #include +#include +#include +#include +#include +#include #include +#include //realpath, NAME_MAX, FPATH_MAX #include + /* Local */ -//#include "parser.tab.h" -#ifndef DP_STACKSIZE -#define DP_STACKSIZE 1024 +#include "parser.tab.h" +#ifndef DE_STACKSIZE +#define DE_STACKSIZE 1024 #endif #ifndef TK_STACKSIZE #define TK_STACKSIZE 1024 #endif +#ifndef MAX_SETNAME_LEN //max setname length +#define MAX_SETNAME_LEN 32 +#endif +#ifndef MAX_ELENAME_LEN //max setname length +#define MAX_ELENAME_LEN 32 +#endif +#define FNAME_MAX 1024 +#define FPATH_MAX 8192 + /* Public */ -struct tok -{ int lval; - int tok; -}; -int lexer_init(void); -int lexer(void); -inline -void lexer_pushtok(int int); -struct dirent* lexer_direntpa[DP_STACKSIZE]; +int lexer_init(void); +int lexer(void); +int lexer_lexfile(const uint8_t*); +void lexer_pushtok(int, YYSTYPE); +uint8_t const* lexer_get_current_filepath(void); +int lexer_lexfilename(uint8_t*); +struct dirent* lexer_direntpa[DE_STACKSIZE],** lexer_direntpp,** lexer_direntpb; /* Private */ +extern //lexer_fsm.rl +int lexer_lexstring(uint8_t*, int); +extern //lexer_fsm.rl +int lexer_setstr(uint8_t*, int); +extern //scanner.c +int scanner_init(void); +extern //scanner.c +int scanner(void); static inline -int scan(void); -static inline -int dredge_current_depth(void); +int dredge_current_depth(void); +extern //bison +YYSTYPE yylval; +static +uint8_t const* current_filename; static -struct dirent** dps; +uint8_t prev_setname[MAX_SETNAME_LEN]; static -struct tok token_stack[TK_STACKSIZE]; +uint8_t prev_elename[MAX_ELENAME_LEN]; static -union tokp -{ int* i; - struct tok* t; -} tks, tkx; +uint8_t map_key[] = "~"; +static +struct tok +{ YYSTYPE lval; //token val + int tok_t; //token type +} token_stack[TK_STACKSIZE], *tsp, *tsx; /* Directory Entity Array/Stack Simple array for keeping track of dirents yet to be processed by the scanner. If this list is empty and there are no tokens, the lexer is done. + This array is populated by the scanner as an array, and popped locally by the + lexer as a stack, and is popped as a FIFO stack. */ -#define DP_STACK (lexer_direntpa) -#define DP_STACKP (dps) -#define DP_LEN() (DP_STACKP - DP_STACK) -#define DP_INIT() (DP_STACKP = DP_STACK) -#define DP_POP() (*--DP_STACKP) +#define DE_STACK (lexer_direntpa) +#define DE_STACKP (lexer_direntpp) +#define DE_STACKB (lexer_direntpb) +#define DE_LEN() (DE_STACKP - DE_STACKB) +#define DE_INIT() (DE_STACKP = DE_STACKB = DE_STACK) +#define DE_POP() (*DE_STACKB++) /* Token Stack This is a FIFO stack whose pointers are a union of either a pointer to an @@ -70,100 +100,466 @@ union tokp An alignment error will occur if IPOP or IPUSH are used a non-even number of times in a sequence! */ -#define TK_STACK (token_stack) -#define TK_STACKP (tks.t) -#define TK_STACKPI (tks.i) -#define TK_STACKX (tkx.t) -#define TK_STACKXI (tkx.i) -#define TK_LEN() (TK_STACKP - TK_STACKX) -#define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK) -#define TK_POP() (*TK_STACKP++) -#define TK_POPI() (*TK_STACKPI++); -#define TK_PUSH(T) (*TKSTACKX++ = T) -#define TK_PUSHI(I) (*TKSTACKXI++ = (I)) - -extern //main.c -const char* cargs['Z']; - -extern //scanner.c -int scanner_init(void); -extern //scanner.c -int scanner(struct dirent**); +#define TK_STACK (token_stack) +#define TK_STACKP (tsp) +#define TK_STACKX (tsx) +#define TK_LEN() (TK_STACKX - TK_STACKP) +#define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK) +#define TK_POP() (*TK_STACKP++) +#define TK_PUSH(T,L) (*TK_STACKX++ = (struct tok){L,T}) /* Initializer - The initializer returns boolean true if an error occurs, which may be handled with standard errno. + The initializer returns boolean true if an error occurs, which may be handled + with standard errno. */ int lexer_init () { TK_INIT(); - DP_INIT(); + DE_INIT(); return scanner_init(); } /* Lexer If the token buffer is empty, 'lexer' will initialize the token buffer and - call 'lexer_scandir'. If #SCANDIR_ERROR is returned, an error is printed + call 'lexer_scandir'. If SCAN_ERROR is returned, an error is printed before sending a null return to bison. If 0 tokens are generated, the error printing is skipped. In all other cases, 'yylval' is set, and the token's integer representation is returned. */ int lexer +#define $($)#$ #define SCAN_ERROR -1 #define TK_EMPTY (TK_STACKP == TK_STACKX) +#define FAIL(...) \ + do { \ + fprintf(stderr,__VA_ARGS__); \ + goto done; \ + } while (0) () -{ if (TK_EMPTY) - { switch (parsedir()) - { case SCAN_ERROR: - perror("lexer_scan"); - case 0: - yylval = 0; - return 0; - default: - break; +{ struct tok token; + start: + while (DE_LEN() > 0)//lex any directory entries in our stack + { + if (lexer_lexfile(DE_POP()->d_name) == 0) + FAIL("Lexer failed to tokenize [%s]\n",(*DE_STACKB)->d_name); + } + if (TK_EMPTY) //if there are no tokens, + { TK_INIT(); //initialize the token stack back to 0 + switch (scanner()) + { case SCAN_ERROR: //if an error occurred, + FAIL("Scanner error\n"); + case 0: //if the the scanner finds no dirents, + goto done; //then we are done + default: //if we found some elements to scan, + goto start; //start over and lex them } } - yylval = TK_IPOP(); - return TK_IPOP(); + token = TK_POP(); + yylval = token.lval; + return token.tok_t; + done: + yylval.val = 0; + return 0; } + /* Token Receiver This receiver takes a struct tok and pushes it to the FIFO stack. */ -inline void lexer_pushtok -#define ERR_TK "Fatal: Generated over " S(TK_STACKSIZE) " tokens in one pass." -( struct tok token ) -{ if (TK_LEN >= TK_STACKSIZE) +#define $($)#$ //stringifier +#define ERR_TK "Fatal: Generated over " $(TK_STACKSIZE) " tokens in one pass." +( int tok, YYSTYPE lval ) +{ if (TK_LEN() >= TK_STACKSIZE) { fprintf(stderr, ERR_TK); exit(EXIT_FAILURE); } - TK_PUSH(token); + TK_PUSH(tok, lval); } -/* Lexical Analysis - Ragel state machine for tokenizing text. - */ -void lexer_lex -(const char* str) -{ struct tok token; - token.TOK = 1; - token.LVAL = 2; - lexer_pushtok(token); - printf (str); +/* Lexical analysis of a file + Strips a filename to its base name, then sends it to lexer_lex +*/ +int lexer_lexfile +#define HIDDEN_WARNING "%s is hidden and will not be parsed!\n", filename +( const uint8_t *filename +) +{ static uint8_t fname[FNAME_MAX]; + uint8_t *last_period = NULL, *iter; + + if (*filename == '.') + { fprintf (stderr, HIDDEN_WARNING); + return 0; + } + /* Copy the filename and remove its suffix */ + u8_strncpy(fname,filename,FNAME_MAX); + last_period = NULL; + for (iter = fname; *iter; iter++) //find the last '.' char + if (*iter == '.') + last_period = iter; + if (last_period) //if we found one, + *last_period = 0; //truncate the string there + /* Register the current_filename */ + current_filename = filename; + printf("lexer_lexfilename(%s)\n",fname); + return lexer_lexfilename(fname); } -/* init_file: - if (lsp != NULL) - while ((c = *lsp++) == *csp) - { switch (c) - { case DELIM: - delimeters_skipped++; - default: - csp++; //delayed to ensure csp is the start of scannable text - break; +uint8_t const* lexer_get_current_filepath +() +{ static uint8_t current_path[FPATH_MAX]; + static uint8_t const* last_filename; + if ((!last_filename || last_filename != current_filename) && + ((uint8_t*) realpath(current_filename, current_path) != (uint8_t*) current_path)) + { perror("realpath: "); + return NULL; } + return (const uint8_t*)current_path; +} + +/* Returns 1 on success, 0 on failure */ +int +lexer_ismapfile(uint8_t* str) +{ + int i, len; + + len = u8_strlen(str); + for(i = 0; i < len; i++) + if(str[i] == '~') + return 1; +} + + +/* Scan filename and push the its tokens + onto the stack */ +int lexer_lexfilename +(uint8_t* str) +#define REF(STR) (STR[0] <= 0x39 && STR[0] >= 0x30) +#define DEL_FTOK(STR) (STR = u8_strchr(STR, '_') + 1) +#define NEXT_TOK(STR) (u8_strchr(STR, '_') + 1) +#define SET_CURR_SETNAME(STR) \ + do { \ + printf("setting curr_setname of str(%s)\n", STR); \ + setname_end = u8_chr(STR, FNAME_MAX, '_'); \ + setname_len = setname_end - str; \ + u8_move(curr_setname, STR, setname_len); \ + printf("curr_setname is now %s\n",curr_setname); \ + } while (0) +#define SET_CURR_ELENAME(STR) \ + do { \ + printf("setting curr_elename of str(%s)\n", STR); \ + setname_end = u8_chr(STR, FNAME_MAX, '_') + 1; \ + if(REF(setname_end)) \ + setname_end = u8_chr(setname_end, FNAME_MAX, '_') + 1; \ + elename_end = u8_chr(setname_end, FNAME_MAX, '_'); \ + elename_len = elename_end - setname_end; \ + u8_move(curr_elename, setname_end, elename_len); \ + printf("curr_elename is now %s\n", curr_elename); \ + } while (0) + +#define SETNAME_MATCHES() (u8_strcmp(curr_setname, prev_setname) == 0) +#define ELENAME_MATCHES() (u8_strcmp(curr_elename, prev_elename) == 0) +#define UPDATE_PREV_SETNAME(STR) \ + do { \ + printf("updating prev_setname from (%s)", prev_setname); \ + u8_set(prev_setname , (ucs4_t) 0, MAX_SETNAME_LEN ); \ + u8_move(prev_setname, curr_setname, setname_len); \ + printf(" to %s\n", prev_setname); \ + } while (0) +#define UPDATE_PREV_ELENAME(STR) \ + do { \ + u8_set(prev_elename , (ucs4_t) 0, MAX_ELENAME_LEN ); \ + u8_move(prev_elename, curr_elename, elename_len); \ + } while (0) +#define PREV_MAPFILE() (TK_STACKX - 5)->tok_t == MOPEN || (TK_STACKX-3)->tok_t == MOPEN +#define SET_MAPSTR(STR) (STR = u8_strstr(STR, map_key)) + +{ int ntok, len, newstrt; + uint8_t *filepath; + typedef enum filetypes { + error = 0, + set_model, + set_map, + ele_model, + ele_map, + ele_vlink, + set_olink, + set_vlink + } filetypes; + + ntok = 0; + + printf("|---- Begin lexerfilename on %s ----|\n", str); + + if(*str == 0) + perror("Lexfilename:: str is NULL so fail\n"); + + /* Determine the filetype of str */ + len = u8_strlen(str); + newstrt = lexer_setstr(str,len); + + str = str + newstrt; + + len = u8_strlen(str); + + ntok += lexer_lexstring(str, len); + + /* Need to add map variant name 'default' if user did not specify a + map variant name */ + /* if(filetype == ele_map) */ + /* { if(!u8_strchr(str, '_')) //map variant name not provided */ + /* { yylval.str = "default"; */ + /* lexer_pushtok(NAME, yylval); */ + /* ntok++; */ + /* printf("Pushing default ele_map name\n"); */ + /* } */ + /* } */ + + /* Pass back filepath as end of statment operator */ + filepath = u8_strdup(lexer_get_current_filepath()); + yylval.str = filepath; + lexer_pushtok(NAME, yylval); + printf("Pushing filepath %s\n", filepath); + ntok++; + + printf("|---- Ending lexer_lexfilename on %s, %d tokens were lexed ----|\n", str, ntok); + return ntok; +} + +int +lexer_lexelemap +( uint8_t* str) +{ int setname_len, elename_len, strlen; + uint8_t* setname_end, *elename_end, *newstrt; + uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; + uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; + + newstrt = str; + + SET_CURR_SETNAME(newstrt); + SET_CURR_ELENAME(newstrt); + if(PREV_MAPFILE()) + { printf("*previous file was mapfile*\n"); + SET_MAPSTR(newstrt); } - last_string = string; - scan_text: - return scanner_tokenize(csp); -*/ + else + { + if(SETNAME_MATCHES()) + { DEL_FTOK(newstrt); + if(REF(newstrt)) + DEL_FTOK(newstrt); + printf("setname matches\n"); + if(ELENAME_MATCHES()) + DEL_FTOK(newstrt); + if(REF(str)) + DEL_FTOK(newstrt); + } + } + UPDATE_PREV_ELENAME(newstrt); + UPDATE_PREV_SETNAME(newstrt); + + return newstrt - str; + + +} + +int +lexer_lexelemodel +(uint8_t* str) +{ int setname_len, elename_len; + uint8_t* setname_end, *elename_end, *newstrt; + uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; + uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; + + printf("In lexelemodel, str is %s\n", str); + + newstrt = str; + + SET_CURR_SETNAME(newstrt); + SET_CURR_ELENAME(newstrt); + if(SETNAME_MATCHES()) + { printf("in ele_model: setname matches\n"); + DEL_FTOK(newstrt); + printf("newstrt is now %s\n", newstrt); + if(REF(newstrt)) + DEL_FTOK(newstrt); + if(ELENAME_MATCHES()) + { printf("in ele_model: elename matches\n"); + DEL_FTOK(newstrt); + if(REF(newstrt)) + DEL_FTOK(newstrt); + } + } + UPDATE_PREV_ELENAME(newstrt); + UPDATE_PREV_SETNAME(newstrt); + + return newstrt - str; +} + +int +lexer_lexsetmap +(uint8_t* str) +{ int setname_len, elename_len; + uint8_t* setname_end, *elename_end, *newstrt; + uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; + uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; + + newstrt = str; + + SET_CURR_SETNAME(newstrt); + if(PREV_MAPFILE()) + SET_MAPSTR(newstrt); + else + if( SETNAME_MATCHES()) + DEL_FTOK(newstrt); + if(REF(newstrt)) + DEL_FTOK(newstrt); + + UPDATE_PREV_SETNAME(newstrt); + + return newstrt - str; +} + +int +lexer_lexsetmodel +(uint8_t* str) +{ int setname_len, elename_len; + uint8_t* setname_end, *elename_end, *newstrt; + uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; + uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; + + newstrt = str; + + SET_CURR_SETNAME(newstrt); + if( SETNAME_MATCHES()) + DEL_FTOK(newstrt); + if(REF(newstrt)) + DEL_FTOK(newstrt); + UPDATE_PREV_SETNAME(newstrt); + + return newstrt - str; + +} + +int +lexer_lexsetvlink +(uint8_t* str) +{ int setname_len, elename_len; + uint8_t* setname_end, *elename_end, *newstrt; + uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; + uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; + + newstrt = str; + + SET_CURR_SETNAME(newstrt); + if( SETNAME_MATCHES()) + DEL_FTOK(newstrt); + if(REF((NEXT_TOK(newstrt)))) //if NAME REF REF + DEL_FTOK(newstrt); + UPDATE_PREV_SETNAME(newstrt); + + return newstrt - str; + +} + +int +lexer_lexelevlink +(uint8_t* str) +{ int setname_len, elename_len; + uint8_t* setname_end, *elename_end, *newstrt; + uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; + uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; + + newstrt = str; + + SET_CURR_SETNAME(newstrt); + SET_CURR_ELENAME(newstrt); + if(SETNAME_MATCHES()) + { DEL_FTOK(newstrt); + if(REF(NEXT_TOK(newstrt))) //NAME REF REF, where is set_label + DEL_FTOK(newstrt); + } + + return newstrt - str; +} + +int +lexer_lexsetolink +(uint8_t* str) +{ int setname_len, elename_len; + uint8_t* setname_end, *elename_end; + uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; + uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; + + return 0; + + //do nothing +} + +int +lexer_lexeleolink +(uint8_t* str) +{ int setname_len, elename_len; + uint8_t* setname_end, *elename_end, *newstrt; + uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; + uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; + + newstrt = str; + + SET_CURR_SETNAME(newstrt); + printf("prev_setname %s, curr_setname %s\n", prev_setname, curr_setname); + if(SETNAME_MATCHES()) + { DEL_FTOK(newstrt); + if(REF(newstrt)) + DEL_FTOK(newstrt); + } + + return newstrt - str; + + +} + + +/**************************/ +/****Abandon All Hope******/ +/**************************/ +/*** ***/ +/*** ***/ +/*** ***/ +/*** ***/ + + + +/* int lexer_lexmapfile */ +/* #define INC_X() */ +/* (int height, int width) */ +/* { */ +/* int x, y; */ + +/* /\* Give scanner_scanpixels a buffer and a len. Iterate through */ +/* buf with buf[n]. If n == 0, do nothing. if n has a value, push x, */ +/* push y, push (z = n << 24), push (ref_id = n >> 8) *\/ */ +/* //scanner_scanpixels() */ + +/* for(i = 0; i < len; i++) */ +/* if(buf[i] == 0) */ +/* if(x == width) */ +/* x = 0; */ +/* else */ + + + + +/* } */ +/* fname_bytes = (uint8_t*)(DE_POP()->d_name); */ + /* printf("d_name is %s\n", fname_bytes); */ + /* for (fnp = filename, i = 0; i < FNAME_MAX; i += unit_size, fnp++) */ + /* { unit_size = u8_mblen(fname_bytes + i, min(4, FNAME_MAX - i)); */ + /* if (u8_mbtouc(fnp, fname_bytes + i, unit_size) == -1) //add ucs4 char to the filename */ + /* FAIL("Lexer failed to convert ^%s to unicode\n", (fname_bytes + i)); */ + /* if (*fnp == 0) //added a terminating char */ + /* break; */ + /* } */ + /* if(u8_mbtouc(filename, DE_POP()->d_name, FNAME_MAXy) == -1) */ + /* FAIL("Lexer failed to convert d_name into uint8_t\n"); */ + /* ulc_fprintf(stdout, "filename is %11U\n c", filename); */