X-Git-Url: https://www.kengrimes.com/gitweb/?p=henge%2Fwebcc.git;a=blobdiff_plain;f=src%2Fapc%2Flexer.c;h=b6c6ca361edafeb51a2b247eb11eaba121784238;hp=0d408b9daff8ee3af471912436fbb34d6ffb3d22;hb=301cac5f6e2edcecf2e1bd89aee5182130a213fc;hpb=c6658233045b0039b328394f38b3297501df0c30 diff --git a/src/apc/lexer.c b/src/apc/lexer.c index 0d408b9..b6c6ca3 100644 --- a/src/apc/lexer.c +++ b/src/apc/lexer.c @@ -1,92 +1,565 @@ /*!@file \brief lexical analyzer implementation for APC - \details this lexer scans a root directory given from the command line - for subdirectories and files structured for the APC grammar. + \details The lexer manages two FIFO stacks. One for maintaining tokens, the + other for maintaining a list of files to be scanned. During + execution, the lexer will return a token from its token queue if any + are present. If not, the lexer will will pop an element from its + file queue to 'scanner' to be tokenized. If the file queue is empty, + the lexer will instead call 'parsedir' to traverse the directory tree + and tokenize the results. If 'parsedir' does not generate any new + tokens, we are done. \author Jordan Lavatai \date Aug 2016 ----------------------------------------------------------------------------*/ -//stdc +/* Standard */ #include #include +#include #include -//posix +/* Posix */ #include +#include +#include +#include +#include +#include #include -//bison -#include "fileparser.tab.h" -#define TOKEN_BUF_SIZE 1024 -#define DIRP_STACK_SIZE 512 +#include //realpath, NAME_MAX, FPATH_MAX +#include -int lexer_init(void); -int lexer(void); +/* Local */ +#include "parser.tab.h" +#ifndef DE_STACKSIZE +#define DE_STACKSIZE 1024 +#endif +#ifndef TK_STACKSIZE +#define TK_STACKSIZE 1024 +#endif +#ifndef MAX_SETNAME_LEN //max setname length +#define MAX_SETNAME_LEN 32 +#endif +#ifndef MAX_ELENAME_LEN //max setname length +#define MAX_ELENAME_LEN 32 +#endif +#define FNAME_MAX 1024 +#define FPATH_MAX 8192 +/* Public */ +int lexer_init(void); +int lexer(void); +int lexer_lexfile(const uint8_t*); +void lexer_pushtok(int, YYSTYPE); +uint8_t const* lexer_get_current_filepath(void); +int lexer_lexfilename(uint8_t*); +struct dirent* lexer_direntpa[DE_STACKSIZE],** lexer_direntpp,** lexer_direntpb; +/* Private */ +extern //lexer_fsm.rl +int lexer_lexstring(uint8_t*, int); +extern //lexer_fsm.rl +int lexer_setstr(uint8_t*, int); +extern //scanner.c +int scanner_init(void); +extern //scanner.c +int scanner(void); +static inline +int dredge_current_depth(void); +extern //bison +YYSTYPE yylval; static -int lexer_scan(void); - +uint8_t const* current_filename; +static +uint8_t prev_setname[MAX_SETNAME_LEN]; +static +uint8_t prev_elename[MAX_ELENAME_LEN]; static -int token_buf[TOKEN_BUF_SIZE], *tbp, *tbx; +uint8_t map_key[] = "~"; static -DIR* dirp_stack[DIRP_STACK_SIZE], *dsp; +struct tok +{ YYSTYPE lval; //token val + int tok_t; //token type +} token_stack[TK_STACKSIZE], *tsp, *tsx; -/* Initialize pointers */ -int -lexer_init() -{ tbp = tbx = token_buf; - dsp = dirp_stack; +/* Directory Entity Array/Stack + Simple array for keeping track of dirents yet to be processed by the scanner. + If this list is empty and there are no tokens, the lexer is done. + This array is populated by the scanner as an array, and popped locally by the + lexer as a stack, and is popped as a FIFO stack. +*/ +#define DE_STACK (lexer_direntpa) +#define DE_STACKP (lexer_direntpp) +#define DE_STACKB (lexer_direntpb) +#define DE_LEN() (DE_STACKP - DE_STACKB) +#define DE_INIT() (DE_STACKP = DE_STACKB = DE_STACK) +#define DE_POP() (*DE_STACKB++) + +/* Token Stack + This is a FIFO stack whose pointers are a union of either a pointer to an + integer, or a pointer to two integers (a struct tok). This way, integers may + be added or removed from the stack either singularly (IPUSH/IPOP), or as a + full token of two integers (PUSH/POP). + An alignment error will occur if IPOP or IPUSH are used a non-even number of + times in a sequence! +*/ +#define TK_STACK (token_stack) +#define TK_STACKP (tsp) +#define TK_STACKX (tsx) +#define TK_LEN() (TK_STACKX - TK_STACKP) +#define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK) +#define TK_POP() (*TK_STACKP++) +#define TK_PUSH(T,L) (*TK_STACKX++ = (struct tok){L,T}) + +/* Initializer + The initializer returns boolean true if an error occurs, which may be handled + with standard errno. +*/ +int lexer_init +() +{ TK_INIT(); + DE_INIT(); + return scanner_init(); +} + +/* Lexer + If the token buffer is empty, 'lexer' will initialize the token buffer and + call 'lexer_scandir'. If SCAN_ERROR is returned, an error is printed + before sending a null return to bison. If 0 tokens are generated, the error + printing is skipped. In all other cases, 'yylval' is set, and the token's + integer representation is returned. +*/ +int lexer +#define $($)#$ +#define SCAN_ERROR -1 +#define TK_EMPTY (TK_STACKP == TK_STACKX) +#define FAIL(...) \ + do { \ + fprintf(stderr,__VA_ARGS__); \ + goto done; \ + } while (0) +() +{ struct tok token; + start: + while (DE_LEN() > 0)//lex any directory entries in our stack + { + if (lexer_lexfile(DE_POP()->d_name) == 0) + FAIL("Lexer failed to tokenize [%s]\n",(*DE_STACKB)->d_name); + } + if (TK_EMPTY) //if there are no tokens, + { TK_INIT(); //initialize the token stack back to 0 + switch (scanner()) + { case SCAN_ERROR: //if an error occurred, + FAIL("Scanner error\n"); + case 0: //if the the scanner finds no dirents, + goto done; //then we are done + default: //if we found some elements to scan, + goto start; //start over and lex them + } + } + token = TK_POP(); + yylval = token.lval; + return token.tok_t; + done: + yylval.val = 0; return 0; } -/* Returns a token identifier and sets yylval */ + +/* Token Receiver + This receiver takes a struct tok and pushes it to the FIFO stack. +*/ +void lexer_pushtok +#define $($)#$ //stringifier +#define ERR_TK "Fatal: Generated over " $(TK_STACKSIZE) " tokens in one pass." +( int tok, YYSTYPE lval ) +{ if (TK_LEN() >= TK_STACKSIZE) + { fprintf(stderr, ERR_TK); + exit(EXIT_FAILURE); + } + TK_PUSH(tok, lval); +} + +/* Lexical analysis of a file + Strips a filename to its base name, then sends it to lexer_lex +*/ +int lexer_lexfile +#define HIDDEN_WARNING "%s is hidden and will not be parsed!\n", filename +( const uint8_t *filename +) +{ static uint8_t fname[FNAME_MAX]; + uint8_t *last_period = NULL, *iter; + + if (*filename == '.') + { fprintf (stderr, HIDDEN_WARNING); + return 0; + } + /* Copy the filename and remove its suffix */ + u8_strncpy(fname,filename,FNAME_MAX); + last_period = NULL; + for (iter = fname; *iter; iter++) //find the last '.' char + if (*iter == '.') + last_period = iter; + if (last_period) //if we found one, + *last_period = 0; //truncate the string there + /* Register the current_filename */ + current_filename = filename; + printf("lexer_lexfilename(%s)\n",fname); + return lexer_lexfilename(fname); +} + +uint8_t const* lexer_get_current_filepath +() +{ static uint8_t current_path[FPATH_MAX]; + static uint8_t const* last_filename; + if ((!last_filename || last_filename != current_filename) && + ((uint8_t*) realpath(current_filename, current_path) != (uint8_t*) current_path)) + { perror("realpath: "); + return NULL; + } + return (const uint8_t*)current_path; +} + +/* Returns 1 on success, 0 on failure */ int -lexer() -{ if (lexer_scan() == 0) - return 0; - yylval = *tbp++; - return *tbp++; +lexer_ismapfile(uint8_t* str) +{ + int i, len; + + len = u8_strlen(str); + for(i = 0; i < len; i++) + if(str[i] == '~') + return 1; } -/* Scanner - Scans a filename from its alphabetically ordered list of file elements - and tokenizes the result. If the file list is empty, then the stack of - directory elements will be popped and processed as they are encountered. - Returns the number of tokens generated. -*/ -#define MAX_ENTITIES 256 -static -int lexer_scan() -{ static struct dirent* entity; - static struct dirent* files[MAX_ENTITIES]; - static struct dirent* dirs = files + MAX_ENTITIES - 1; - static int num_files = 0; - static int num_dirs = 0; - - //sort out files and directories, grow directories from bottom up - while ((entity = readdir(dirp)) != NULL) - { switch (entity->d_type) - { case DT_LNK: - case DT_REG: - files[num_files++] = entity; - break; - case DT_DIR: - *(dirs - num_dirs++) = entity; - break; - case DT_UNKNOWN: - default: - printf("Ignoring unknown file: %s\n", entity->d_name); - break; - } +/* Scan filename and push the its tokens + onto the stack */ +int lexer_lexfilename +(uint8_t* str) +#define REF(STR) (STR[0] <= 0x39 && STR[0] >= 0x30) +#define DEL_FTOK(STR) (STR = u8_strchr(STR, '_') + 1) +#define NEXT_TOK(STR) (u8_strchr(STR, '_') + 1) +#define SET_CURR_SETNAME(STR) \ + do { \ + printf("setting curr_setname of str(%s)\n", STR); \ + setname_end = u8_chr(STR, FNAME_MAX, '_'); \ + setname_len = setname_end - str; \ + u8_move(curr_setname, STR, setname_len); \ + printf("curr_setname is now %s\n",curr_setname); \ + } while (0) +#define SET_CURR_ELENAME(STR) \ + do { \ + printf("setting curr_elename of str(%s)\n", STR); \ + setname_end = u8_chr(STR, FNAME_MAX, '_') + 1; \ + if(REF(setname_end)) \ + setname_end = u8_chr(setname_end, FNAME_MAX, '_') + 1; \ + elename_end = u8_chr(setname_end, FNAME_MAX, '_'); \ + elename_len = elename_end - setname_end; \ + u8_move(curr_elename, setname_end, elename_len); \ + printf("curr_elename is now %s\n", curr_elename); \ + } while (0) + +#define SETNAME_MATCHES() (u8_strcmp(curr_setname, prev_setname) == 0) +#define ELENAME_MATCHES() (u8_strcmp(curr_elename, prev_elename) == 0) +#define UPDATE_PREV_SETNAME(STR) \ + do { \ + printf("updating prev_setname from (%s)", prev_setname); \ + u8_set(prev_setname , (ucs4_t) 0, MAX_SETNAME_LEN ); \ + u8_move(prev_setname, curr_setname, setname_len); \ + printf(" to %s\n", prev_setname); \ + } while (0) +#define UPDATE_PREV_ELENAME(STR) \ + do { \ + u8_set(prev_elename , (ucs4_t) 0, MAX_ELENAME_LEN ); \ + u8_move(prev_elename, curr_elename, elename_len); \ + } while (0) +#define PREV_MAPFILE() (TK_STACKX - 5)->tok_t == MOPEN || (TK_STACKX-3)->tok_t == MOPEN +#define SET_MAPSTR(STR) (STR = u8_strstr(STR, map_key)) + +{ int ntok, len, newstrt; + uint8_t *filepath; + typedef enum filetypes { + error = 0, + set_model, + set_map, + ele_model, + ele_map, + ele_vlink, + set_olink, + set_vlink + } filetypes; + + ntok = 0; + + printf("|---- Begin lexerfilename on %s ----|\n", str); + + if(*str == 0) + perror("Lexfilename:: str is NULL so fail\n"); + + /* Determine the filetype of str */ + len = u8_strlen(str); + newstrt = lexer_setstr(str,len); + + str = str + newstrt; + + len = u8_strlen(str); + + ntok += lexer_lexstring(str, len); + + /* Need to add map variant name 'default' if user did not specify a + map variant name */ + /* if(filetype == ele_map) */ + /* { if(!u8_strchr(str, '_')) //map variant name not provided */ + /* { yylval.str = "default"; */ + /* lexer_pushtok(NAME, yylval); */ + /* ntok++; */ + /* printf("Pushing default ele_map name\n"); */ + /* } */ + /* } */ + + /* Pass back filepath as end of statment operator */ + filepath = u8_strdup(lexer_get_current_filepath()); + yylval.str = filepath; + lexer_pushtok(NAME, yylval); + printf("Pushing filepath %s\n", filepath); + ntok++; + + printf("|---- Ending lexer_lexfilename on %s, %d tokens were lexed ----|\n", str, ntok); + return ntok; +} + +int +lexer_lexelemap +( uint8_t* str) +{ int setname_len, elename_len, strlen; + uint8_t* setname_end, *elename_end, *newstrt; + uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; + uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; + + newstrt = str; + + SET_CURR_SETNAME(newstrt); + SET_CURR_ELENAME(newstrt); + if(PREV_MAPFILE()) + { printf("*previous file was mapfile*\n"); + SET_MAPSTR(newstrt); + } + else + { + if(SETNAME_MATCHES()) + { DEL_FTOK(newstrt); + if(REF(newstrt)) + DEL_FTOK(newstrt); + printf("setname matches\n"); + if(ELENAME_MATCHES()) + DEL_FTOK(newstrt); + if(REF(str)) + DEL_FTOK(newstrt); + } + } + UPDATE_PREV_ELENAME(newstrt); + UPDATE_PREV_SETNAME(newstrt); + + return newstrt - str; + + +} + +int +lexer_lexelemodel +(uint8_t* str) +{ int setname_len, elename_len; + uint8_t* setname_end, *elename_end, *newstrt; + uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; + uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; + + printf("In lexelemodel, str is %s\n", str); + + newstrt = str; + + SET_CURR_SETNAME(newstrt); + SET_CURR_ELENAME(newstrt); + if(SETNAME_MATCHES()) + { printf("in ele_model: setname matches\n"); + DEL_FTOK(newstrt); + printf("newstrt is now %s\n", newstrt); + if(REF(newstrt)) + DEL_FTOK(newstrt); + if(ELENAME_MATCHES()) + { printf("in ele_model: elename matches\n"); + DEL_FTOK(newstrt); + if(REF(newstrt)) + DEL_FTOK(newstrt); + } + } + UPDATE_PREV_ELENAME(newstrt); + UPDATE_PREV_SETNAME(newstrt); + + return newstrt - str; +} + +int +lexer_lexsetmap +(uint8_t* str) +{ int setname_len, elename_len; + uint8_t* setname_end, *elename_end, *newstrt; + uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; + uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; + + newstrt = str; + + SET_CURR_SETNAME(newstrt); + if(PREV_MAPFILE()) + SET_MAPSTR(newstrt); + else + if( SETNAME_MATCHES()) + DEL_FTOK(newstrt); + if(REF(newstrt)) + DEL_FTOK(newstrt); + + UPDATE_PREV_SETNAME(newstrt); + + return newstrt - str; +} + +int +lexer_lexsetmodel +(uint8_t* str) +{ int setname_len, elename_len; + uint8_t* setname_end, *elename_end, *newstrt; + uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; + uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; + + newstrt = str; + + SET_CURR_SETNAME(newstrt); + if( SETNAME_MATCHES()) + DEL_FTOK(newstrt); + if(REF(newstrt)) + DEL_FTOK(newstrt); + UPDATE_PREV_SETNAME(newstrt); + + return newstrt - str; + +} + +int +lexer_lexsetvlink +(uint8_t* str) +{ int setname_len, elename_len; + uint8_t* setname_end, *elename_end, *newstrt; + uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; + uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; + + newstrt = str; + + SET_CURR_SETNAME(newstrt); + if( SETNAME_MATCHES()) + DEL_FTOK(newstrt); + if(REF((NEXT_TOK(newstrt)))) //if NAME REF REF + DEL_FTOK(newstrt); + UPDATE_PREV_SETNAME(newstrt); + + return newstrt - str; + +} + +int +lexer_lexelevlink +(uint8_t* str) +{ int setname_len, elename_len; + uint8_t* setname_end, *elename_end, *newstrt; + uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; + uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; + + newstrt = str; + + SET_CURR_SETNAME(newstrt); + SET_CURR_ELENAME(newstrt); + if(SETNAME_MATCHES()) + { DEL_FTOK(newstrt); + if(REF(NEXT_TOK(newstrt))) //NAME REF REF, where is set_label + DEL_FTOK(newstrt); } - if (errno) - perror("readdir"); - qsort(&files[0], num_files, sizeof struct dirent*, qalpha); - num_ents = scandirat(dirfd, ".", &namelist, scanfilter, scancompar); - if (num_ents < 0) - { perror("scandirat"); - return -1; + + return newstrt - str; +} + +int +lexer_lexsetolink +(uint8_t* str) +{ int setname_len, elename_len; + uint8_t* setname_end, *elename_end; + uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; + uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; + + return 0; + + //do nothing +} + +int +lexer_lexeleolink +(uint8_t* str) +{ int setname_len, elename_len; + uint8_t* setname_end, *elename_end, *newstrt; + uint8_t curr_setname[MAX_SETNAME_LEN] = {0}; + uint8_t curr_elename[MAX_ELENAME_LEN] = {0}; + + newstrt = str; + + SET_CURR_SETNAME(newstrt); + printf("prev_setname %s, curr_setname %s\n", prev_setname, curr_setname); + if(SETNAME_MATCHES()) + { DEL_FTOK(newstrt); + if(REF(newstrt)) + DEL_FTOK(newstrt); } - //process files - //recurse into directories + return newstrt - str; + } + + +/**************************/ +/****Abandon All Hope******/ +/**************************/ +/*** ***/ +/*** ***/ +/*** ***/ +/*** ***/ + + + +/* int lexer_lexmapfile */ +/* #define INC_X() */ +/* (int height, int width) */ +/* { */ +/* int x, y; */ + +/* /\* Give scanner_scanpixels a buffer and a len. Iterate through */ +/* buf with buf[n]. If n == 0, do nothing. if n has a value, push x, */ +/* push y, push (z = n << 24), push (ref_id = n >> 8) *\/ */ +/* //scanner_scanpixels() */ + +/* for(i = 0; i < len; i++) */ +/* if(buf[i] == 0) */ +/* if(x == width) */ +/* x = 0; */ +/* else */ + + + + +/* } */ +/* fname_bytes = (uint8_t*)(DE_POP()->d_name); */ + /* printf("d_name is %s\n", fname_bytes); */ + /* for (fnp = filename, i = 0; i < FNAME_MAX; i += unit_size, fnp++) */ + /* { unit_size = u8_mblen(fname_bytes + i, min(4, FNAME_MAX - i)); */ + /* if (u8_mbtouc(fnp, fname_bytes + i, unit_size) == -1) //add ucs4 char to the filename */ + /* FAIL("Lexer failed to convert ^%s to unicode\n", (fname_bytes + i)); */ + /* if (*fnp == 0) //added a terminating char */ + /* break; */ + /* } */ + /* if(u8_mbtouc(filename, DE_POP()->d_name, FNAME_MAXy) == -1) */ + /* FAIL("Lexer failed to convert d_name into uint8_t\n"); */ + /* ulc_fprintf(stdout, "filename is %11U\n c", filename); */