X-Git-Url: https://www.kengrimes.com/gitweb/?p=henge%2Fwebcc.git;a=blobdiff_plain;f=src%2Fapc%2Flexer.c;h=940ee8163968151c90e9ec508e81ba722265ad3b;hp=47d2c35d9bcd43d303c3b90555eb3bf0462d6d51;hb=957d4c384203bdc325c81a594943a5790dcb32aa;hpb=f8d924b76a3dfbaaa9e92dacbc4992a7ab6dc8f2 diff --git a/src/apc/lexer.c b/src/apc/lexer.c index 47d2c35..940ee81 100644 --- a/src/apc/lexer.c +++ b/src/apc/lexer.c @@ -1,7 +1,13 @@ /*!@file \brief lexical analyzer implementation for APC - \details this lexer scans a root directory given from the command line - for subdirectories and files structured for the APC grammar. + \details The lexer manages two FIFO stacks. One for maintaining tokens, the + other for maintaining a list of files to be scanned. During + execution, the lexer will return a token from its token queue if any + are present. If not, the lexer will will pop an element from its + file queue to 'scanner' to be tokenized. If the file queue is empty, + the lexer will instead call 'parsedir' to traverse the directory tree + and tokenize the results. If 'parsedir' does not generate any new + tokens, we are done. \author Jordan Lavatai \date Aug 2016 ----------------------------------------------------------------------------*/ @@ -12,111 +18,54 @@ /* Posix */ #include #include +#include //realpath, NAME_MAX, PATH_MAX #include /* Local */ #include "parser.tab.h" -#define TOKEN_BUF_SIZE 1024 -#define ENTITY_BUF_SIZE 512 -#define SCANDIR_ERROR -1 - -int lexer_init(void); -int lexer(void); - -extern //main.c -const char* cargs['Z']; - -static inline -int scan(void); -static inline -int qcomp(const void*, const void*); - -/* Directory Listing Stack - FILO Stack for keeping an open DIR* at each directory depth for treewalk. - This stack is depth-safe, checking its depth during push operations, but not - during pop operations, to ensure the thread doesn't open too many files at - once (512 in c runtime), or traverse too far through symbolic links. - A directory listing includes a DIR* and all DIR-typed entity in the directory - as recognized by dirent, populated externally (and optionally). - This stack behaves abnormally by incrementing its PUSH operation prior to - evaluation, and the POP operations after evaluation. This behavior allows - the 'DL_CURDEPTH' operation to map to the current element in the 'dl_stack' - array, and it is always treated as the "current depth". This also allows us - to init the root directory to 'directory_list_stack'[0] and pop it in a safe - and explicit manner. -*/ -#define S(S)#S -#ifndef DL_DEPTHMAX -#define DL_DEPTHMAX 64 +#ifndef DE_STACKSIZE +#define DE_STACKSIZE 1024 #endif -#ifndef DL_CHILDMAX -#define DL_CHILDMAX DL_DEPTHMAX //square tree +#ifndef TK_STACKSIZE +#define TK_STACKSIZE 1024 #endif -#define DL_STACK (directory_list_stack) -#define DL_STACKP (dls) -#define DL_STACKSIZE (DL_DEPTHMAX + 1) //+1 because push increments early -#define DL_LEN (DL_STACKP - DL_STACK) -#define DL_CURDEPTH (DL_LEN) -#define DL_CD_STACK ((*DL_STACKP).child_entity_stack) -#define DL_CD_STACKP ((*DL_STACKP).cds) -#define DL_CD_STACKSIZE (DL_DEPTHMAX) //square tree -#define DL_CD_LEN (DL_CD_STACKP - DL_CD_STACK) -#define ERR_CHILD "Fatal: Maximum of " S(MAX_CHILD) \ - " child directories exceeded for directory %s at depth %i\n" \ - ,DL_STACK[DL_DEPTH()].label,DL_DEPTH() -#define ERR_DEPTH "Fatal: Maximum directory depth of " S(DL_DEPTHMAX) \ - " exceeded during directory scan\n" -#define DL_INIT() (DL_STACKP = DL_STACK) -#define DL_CD_INIT() (DL_CD_STACKP = DL_CD_STACK) -#define DL_POP() ((DIR*)(*DL_STACKP--)) -#define DL_CD_POP() (*--DL_CD_STACKP) -#define DL_PUSH(D) (*(DIR**)(++DL_STACKP) = DIRP) -#define DL_CD_PUSH(E) (*DL_CD_STACKP++ = E) -#define DL_SAFEPUSH(D) \ - do { \ - if (DL_DEPTH() >= DL_DEPTHMAX) \ - { fprintf(stderr, ERR_DEPTH); \ - exit(EXIT_FAILURE); \ - } \ - if ((*(DIR**)(++DL_STACKP) = D) == NULL) \ - { perror("DL_SAFEPUSH"); \ - exit(EXIT_FAILURE); \ - } \ - } while(0) -#define DL_CD_POPULATE() \ - do { \ - DL_CD_INIT(); \ - while ((*DL_CD_STACKP = readdir(*DL_STACKP)) != NULL) \ - { switch((*DL_CD_STACKP)->d_type) \ - { case DT_REG: \ - case DT_DIR: \ - DL_CD_STACKP++; \ - break; \ - default: \ - } \ - } \ - qsort(DL_CD_STACKP, DL_CD_LEN, sizeof *DL_CD_STACKP, qcomp); \ - } while (0) - +/* Public */ +int lexer_init(void); +int lexer(void); +int lexer_lexfile(const char*); +void lexer_pushtok(int, YYSTYPE); +char const* lexer_get_current_filepath(void); +struct dirent* lexer_direntpa[DE_STACKSIZE],** lexer_direntpp,** lexer_direntpb; +/* Private */ +extern //lexer_lex.rl +int lexer_lex(const char*); +extern //scanner.c +int scanner_init(void); +extern //scanner.c +int scanner(void); +static inline +int dredge_current_depth(void); +extern //bison +YYSTYPE yylval; static -struct dirlist -{ DIR* dirp; - struct dirent** child_directory_stack[DL_CD_STACKSIZE],*** cds; - const char* label; -} directory_list_stack[DL_STACKSIZE],* dls; +char const* current_filename; static - -/* Directory Entity Stack - Simple stack for keeping track of dirents still being processed at the - current depth. +struct tok +{ YYSTYPE lval; //token val + int tok_t; //token type +} token_stack[TK_STACKSIZE], *tsp, *tsx; + +/* Directory Entity Array/Stack + Simple array for keeping track of dirents yet to be processed by the scanner. + If this list is empty and there are no tokens, the lexer is done. + This array is populated by the scanner as an array, and popped locally by the + lexer as a stack, and is popped as a FIFO stack. */ -#define CE_STACK (child_entity_stack) -#define CE_STACKP (ces) -#define CE_STACKSIZE 1024 -static -struct dirent** child_entity_stack[CE_STACKSIZE],*** ces; -#define CE_INIT() (CE_STACKP = CE_STACK) -#define CE_POP() (*--CE_STACK) -#define CE_PUSH(E) (*CE_STACK++ = E) +#define DE_STACK (lexer_direntpa) +#define DE_STACKP (lexer_direntpp) +#define DE_STACKB (lexer_direntpb) +#define DE_LEN() (DE_STACKP - DE_STACKB) +#define DE_INIT() (DE_STACKP = DE_STACKB = DE_STACK) +#define DE_POP() (*DE_STACKB++) /* Token Stack This is a FIFO stack whose pointers are a union of either a pointer to an @@ -126,140 +75,118 @@ struct dirent** child_entity_stack[CE_STACKSIZE],*** ces; An alignment error will occur if IPOP or IPUSH are used a non-even number of times in a sequence! */ -#define TK_STACK (token_stack) -#define TK_STACKP (tks.t) -#define TK_STACKPI (tks.i) -#define TK_STACKX (tkx.t) -#define TK_STACKXI (tkx.i) -#ifndef TK_STACKSIZE -#define TK_STACKSIZE 1024 -#endif -#define TK_EMPTY (TK_STACKP == TK_STACKX) -#define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK) -#define TK_POP() (*TK_STACKP++) -#define TK_IPOP() (*TK_STACKPI++); -#define TK_PUSH(TOK,LVAL) (*TKSTACKX++ = (struct tok){(LVAL),(TOK)}) -#define TK_IPUSH(I) (*TKSTACKXI++ = (I)) -static -struct tok -{ int lval; - int tok; -} token_stack[TK_STACKSIZE]; -static -union tokp -{ int* i; - struct tok* t; -} tks, tkx; - +#define TK_STACK (token_stack) +#define TK_STACKP (tsp) +#define TK_STACKX (tsx) +#define TK_LEN() (TK_STACKX - TK_STACKP) +#define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK) +#define TK_POP() (*TK_STACKP++) +#define TK_PUSH(T,L) (*TK_STACKX++ = (struct tok){L,T}) /* Initializer - The initializer returns boolean true if opendir contains an error. + The initializer returns boolean true if an error occurs, which may be handled + with standard errno. */ -int lexer_init() -#define ERR_UF "Fatal: Unknown file [%s] encountered in directory %s", \ - DL_CD_STACK[i]->d_name, DL_STACK[DL_DEPTH] -{ int i = 0; - TK_INIT(); - DL_INIT(); - DL_STACK[0].label = cargs['r'] ? cargs['r'] : "./"; - if ((DL_STACK[0].dirp = opendir(DL_STACK[0].label)) == NULL) - return -1; - errno = 0; - while ((DL_CD_STACK[i] = readdir(DL_STACK[0].dirp)) != NULL) - switch(DL_CD_STACK[i]->d_type) - { - default: - fprintf(stderr, "Fatal: Unkown File %s\n", DL_CD_STACK[i]->d_name); - } - return (errno); +int lexer_init +() +{ TK_INIT(); + DE_INIT(); + current_filename = NULL; + return scanner_init(); } /* Lexer If the token buffer is empty, 'lexer' will initialize the token buffer and - call 'lexer_scandir'. If #SCANDIR_ERROR is returned, an error is printed + call 'lexer_scandir'. If SCAN_ERROR is returned, an error is printed before sending a null return to bison. If 0 tokens are generated, the error printing is skipped. In all other cases, 'yylval' is set, and the token's integer representation is returned. */ -int lexer() -{ if (TOK_BUF_EMPTY) - { TOK_BUF_INIT(); - switch (lexer_scan) - { case SCANDIR_ERROR: - perror("lexer_scan"); - case 0: - yylval = 0; - return 0; - default: +int lexer +#define $($)#$ +#define SCAN_ERROR -1 +#define TK_EMPTY (TK_STACKP == TK_STACKX) +#define FAIL(...) \ + do { \ + fprintf(stderr,__VA_ARGS__); \ + goto done; \ + } while (0) +() +{ struct tok token; + start: + while (DE_LEN() > 0) //lex any directory entries in our stack + if (lexer_lexfile(DE_POP()->d_name) == 0) + FAIL("Lexer failed to tokenize [%s]\n",(*DE_STACKB)->d_name); + if (TK_EMPTY) //if there are no tokens, + { TK_INIT(); //initialize the token stack back to 0 + switch (scanner()) + { case SCAN_ERROR: //if an error occurred, + FAIL("Scanner error\n"); + case 0: //if the the scanner finds no dirents, + goto done; //then we are done + default: //if we found some elements to scan, + goto start; //start over and lex them } } - yylval = TOK_BUF_IPOP(); - return TOK_BUF_IPOP(); + token = TK_POP(); + yylval = token.lval; + return token.tok_t; + done: + yylval.val = 0; + return 0; } -static inline -int lexer_scan() -{ static DIR* -} - -/* Scanner - Scans a filename from its alphabetically ordered list of file elements - and tokenizes the result. If the file list is empty, then the stack of - directory elements will be popped and processed as they are encountered. - - Returns the number of tokens generated. +/* Token Receiver + This receiver takes a struct tok and pushes it to the FIFO stack. */ -#define MAX_ENTITIES 256 -static -int lexer_scan() -{ static struct dirent* entity; - static struct dirent* files[MAX_ENTITIES] = {0}; - static int num_files = 0; -#define NO_FILES (num_files == 0) - - if (NO_FILES) - - - //sort out files and directories, grow directories from bottom up - while ((entity = readdir(dirp)) != NULL) - { switch (entity->d_type) - { case DT_LNK: - case DT_REG: - files[num_files++] = entity; - break; - case DT_DIR: - *(dirs - num_dirs++) = entity; - break; - case DT_UNKNOWN: - default: - printf("Ignoring unknown file: %s\n", entity->d_name); - break; - } +void lexer_pushtok +#define $($)#$ //stringifier +#define ERR_TK "Fatal: Generated over " $(TK_STACKSIZE) " tokens in one pass." +( int tok, YYSTYPE lval ) +{ if (TK_LEN() >= TK_STACKSIZE) + { fprintf(stderr, ERR_TK); + exit(EXIT_FAILURE); } - if (errno) - perror("readdir"); - qsort(&files[0], num_files, sizeof struct dirent*, qalpha); - num_ents = scandirat(dirfd, ".", &namelist, scanfilter, scancompar); - if (num_ents < 0) - { perror("scandirat"); - return -1; - } - //process files - - //recurse into directories - return tbx - tbp; + TK_PUSH(tok, lval); } -/* Quicksort comparison function - sort each dirent encountered first by its file type (regular files first) - and then by their alphabetical ordering. +/* Lexical analysis of a file + Strips a filename to its base name, then sends it to lexer_lex */ -static -int qcompar -( const void* a, - const void* b - ) -{ +int lexer_lexfile +#define HIDDEN_WARNING "%s is hidden and will not be parsed!\n", filename +( const char *filename +) +{ static char fname[NAME_MAX]; + char *last_period = NULL, *iter; + + if (*filename == '.') + { fprintf (stderr, HIDDEN_WARNING); + return 0; + } + /* Copy the filename and remove its suffix */ + strncpy(fname,filename,NAME_MAX); + last_period = NULL; + for (iter = fname; *iter; iter++) //find the last '.' char + if (*iter == '.') + last_period = iter; + if (last_period) //if we found one, + *last_period = '\0'; //truncate the string there + /* Register the current_filename */ + current_filename = filename; + + return lexer_lex(fname); } +char const* lexer_get_current_filepath +() +{ static char current_path[PATH_MAX]; + static char const* last_filename; + if ((!last_filename || last_filename != current_filename) && + (realpath(current_filename, current_path) != current_path)) + { perror("realpath: "); + return NULL; + } + return (const char*)current_path; +}