X-Git-Url: https://www.kengrimes.com/gitweb/?p=henge%2Fwebcc.git;a=blobdiff_plain;f=src%2Fapc%2Flexer.c;h=940ee8163968151c90e9ec508e81ba722265ad3b;hp=0d408b9daff8ee3af471912436fbb34d6ffb3d22;hb=957d4c384203bdc325c81a594943a5790dcb32aa;hpb=c6658233045b0039b328394f38b3297501df0c30 diff --git a/src/apc/lexer.c b/src/apc/lexer.c index 0d408b9..940ee81 100644 --- a/src/apc/lexer.c +++ b/src/apc/lexer.c @@ -1,92 +1,192 @@ /*!@file \brief lexical analyzer implementation for APC - \details this lexer scans a root directory given from the command line - for subdirectories and files structured for the APC grammar. + \details The lexer manages two FIFO stacks. One for maintaining tokens, the + other for maintaining a list of files to be scanned. During + execution, the lexer will return a token from its token queue if any + are present. If not, the lexer will will pop an element from its + file queue to 'scanner' to be tokenized. If the file queue is empty, + the lexer will instead call 'parsedir' to traverse the directory tree + and tokenize the results. If 'parsedir' does not generate any new + tokens, we are done. \author Jordan Lavatai \date Aug 2016 ----------------------------------------------------------------------------*/ -//stdc +/* Standard */ #include #include #include -//posix +/* Posix */ #include #include -//bison -#include "fileparser.tab.h" -#define TOKEN_BUF_SIZE 1024 -#define DIRP_STACK_SIZE 512 +#include //realpath, NAME_MAX, PATH_MAX +#include +/* Local */ +#include "parser.tab.h" +#ifndef DE_STACKSIZE +#define DE_STACKSIZE 1024 +#endif +#ifndef TK_STACKSIZE +#define TK_STACKSIZE 1024 +#endif +/* Public */ +int lexer_init(void); +int lexer(void); +int lexer_lexfile(const char*); +void lexer_pushtok(int, YYSTYPE); +char const* lexer_get_current_filepath(void); +struct dirent* lexer_direntpa[DE_STACKSIZE],** lexer_direntpp,** lexer_direntpb; +/* Private */ +extern //lexer_lex.rl +int lexer_lex(const char*); +extern //scanner.c +int scanner_init(void); +extern //scanner.c +int scanner(void); +static inline +int dredge_current_depth(void); +extern //bison +YYSTYPE yylval; +static +char const* current_filename; +static +struct tok +{ YYSTYPE lval; //token val + int tok_t; //token type +} token_stack[TK_STACKSIZE], *tsp, *tsx; -int lexer_init(void); -int lexer(void); +/* Directory Entity Array/Stack + Simple array for keeping track of dirents yet to be processed by the scanner. + If this list is empty and there are no tokens, the lexer is done. + This array is populated by the scanner as an array, and popped locally by the + lexer as a stack, and is popped as a FIFO stack. +*/ +#define DE_STACK (lexer_direntpa) +#define DE_STACKP (lexer_direntpp) +#define DE_STACKB (lexer_direntpb) +#define DE_LEN() (DE_STACKP - DE_STACKB) +#define DE_INIT() (DE_STACKP = DE_STACKB = DE_STACK) +#define DE_POP() (*DE_STACKB++) -static -int lexer_scan(void); +/* Token Stack + This is a FIFO stack whose pointers are a union of either a pointer to an + integer, or a pointer to two integers (a struct tok). This way, integers may + be added or removed from the stack either singularly (IPUSH/IPOP), or as a + full token of two integers (PUSH/POP). + An alignment error will occur if IPOP or IPUSH are used a non-even number of + times in a sequence! +*/ +#define TK_STACK (token_stack) +#define TK_STACKP (tsp) +#define TK_STACKX (tsx) +#define TK_LEN() (TK_STACKX - TK_STACKP) +#define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK) +#define TK_POP() (*TK_STACKP++) +#define TK_PUSH(T,L) (*TK_STACKX++ = (struct tok){L,T}) -static -int token_buf[TOKEN_BUF_SIZE], *tbp, *tbx; -static -DIR* dirp_stack[DIRP_STACK_SIZE], *dsp; +/* Initializer + The initializer returns boolean true if an error occurs, which may be handled + with standard errno. +*/ +int lexer_init +() +{ TK_INIT(); + DE_INIT(); + current_filename = NULL; + return scanner_init(); +} -/* Initialize pointers */ -int -lexer_init() -{ tbp = tbx = token_buf; - dsp = dirp_stack; +/* Lexer + If the token buffer is empty, 'lexer' will initialize the token buffer and + call 'lexer_scandir'. If SCAN_ERROR is returned, an error is printed + before sending a null return to bison. If 0 tokens are generated, the error + printing is skipped. In all other cases, 'yylval' is set, and the token's + integer representation is returned. +*/ +int lexer +#define $($)#$ +#define SCAN_ERROR -1 +#define TK_EMPTY (TK_STACKP == TK_STACKX) +#define FAIL(...) \ + do { \ + fprintf(stderr,__VA_ARGS__); \ + goto done; \ + } while (0) +() +{ struct tok token; + start: + while (DE_LEN() > 0) //lex any directory entries in our stack + if (lexer_lexfile(DE_POP()->d_name) == 0) + FAIL("Lexer failed to tokenize [%s]\n",(*DE_STACKB)->d_name); + if (TK_EMPTY) //if there are no tokens, + { TK_INIT(); //initialize the token stack back to 0 + switch (scanner()) + { case SCAN_ERROR: //if an error occurred, + FAIL("Scanner error\n"); + case 0: //if the the scanner finds no dirents, + goto done; //then we are done + default: //if we found some elements to scan, + goto start; //start over and lex them + } + } + token = TK_POP(); + yylval = token.lval; + return token.tok_t; + done: + yylval.val = 0; return 0; } -/* Returns a token identifier and sets yylval */ -int -lexer() -{ if (lexer_scan() == 0) - return 0; - yylval = *tbp++; - return *tbp++; -} -/* Scanner - Scans a filename from its alphabetically ordered list of file elements - and tokenizes the result. If the file list is empty, then the stack of - directory elements will be popped and processed as they are encountered. +/* Token Receiver + This receiver takes a struct tok and pushes it to the FIFO stack. +*/ +void lexer_pushtok +#define $($)#$ //stringifier +#define ERR_TK "Fatal: Generated over " $(TK_STACKSIZE) " tokens in one pass." +( int tok, YYSTYPE lval ) +{ if (TK_LEN() >= TK_STACKSIZE) + { fprintf(stderr, ERR_TK); + exit(EXIT_FAILURE); + } + TK_PUSH(tok, lval); +} - Returns the number of tokens generated. +/* Lexical analysis of a file + Strips a filename to its base name, then sends it to lexer_lex */ -#define MAX_ENTITIES 256 -static -int lexer_scan() -{ static struct dirent* entity; - static struct dirent* files[MAX_ENTITIES]; - static struct dirent* dirs = files + MAX_ENTITIES - 1; - static int num_files = 0; - static int num_dirs = 0; +int lexer_lexfile +#define HIDDEN_WARNING "%s is hidden and will not be parsed!\n", filename +( const char *filename +) +{ static char fname[NAME_MAX]; + char *last_period = NULL, *iter; - //sort out files and directories, grow directories from bottom up - while ((entity = readdir(dirp)) != NULL) - { switch (entity->d_type) - { case DT_LNK: - case DT_REG: - files[num_files++] = entity; - break; - case DT_DIR: - *(dirs - num_dirs++) = entity; - break; - case DT_UNKNOWN: - default: - printf("Ignoring unknown file: %s\n", entity->d_name); - break; - } - } - if (errno) - perror("readdir"); - qsort(&files[0], num_files, sizeof struct dirent*, qalpha); - num_ents = scandirat(dirfd, ".", &namelist, scanfilter, scancompar); - if (num_ents < 0) - { perror("scandirat"); - return -1; + if (*filename == '.') + { fprintf (stderr, HIDDEN_WARNING); + return 0; } - //process files - - //recurse into directories + /* Copy the filename and remove its suffix */ + strncpy(fname,filename,NAME_MAX); + last_period = NULL; + for (iter = fname; *iter; iter++) //find the last '.' char + if (*iter == '.') + last_period = iter; + if (last_period) //if we found one, + *last_period = '\0'; //truncate the string there + /* Register the current_filename */ + current_filename = filename; + + return lexer_lex(fname); +} +char const* lexer_get_current_filepath +() +{ static char current_path[PATH_MAX]; + static char const* last_filename; + if ((!last_filename || last_filename != current_filename) && + (realpath(current_filename, current_path) != current_path)) + { perror("realpath: "); + return NULL; + } + return (const char*)current_path; }