/*!@file
\brief lexical analyzer implementation for APC
- \details this lexer scans a root directory given from the command line
- for subdirectories and files structured for the APC grammar.
+ \details The lexer manages two FIFO stacks. One for maintaining tokens, the
+ other for maintaining a list of files to be scanned. During
+ execution, the lexer will return a token from its token queue if any
+ are present. If not, the lexer will will pop an element from its
+ file queue to 'scanner' to be tokenized. If the file queue is empty,
+ the lexer will instead call 'parsedir' to traverse the directory tree
+ and tokenize the results. If 'parsedir' does not generate any new
+ tokens, we are done.
\author Jordan Lavatai
\date Aug 2016
----------------------------------------------------------------------------*/
-//stdc
+/* Standard */
#include <stdio.h>
#include <string.h>
#include <errno.h>
-//posix
+/* Posix */
#include <unistd.h>
#include <stdlib.h>
-//bison
-#include "fileparser.tab.h"
-#define TOKEN_BUF_SIZE 1024
-#define DIRP_STACK_SIZE 512
-
-int lexer_init(void);
-int lexer(void);
-
+#include <dirent.h>
+/* Local */
+#include "parser.tab.h"
+#ifndef DE_STACKSIZE
+#define DE_STACKSIZE 1024
+#endif
+#ifndef TK_STACKSIZE
+#define TK_STACKSIZE 1024
+#endif
+/* Public */
+int lexer_init(void);
+int lexer(void);
+int lexer_lex(const char*);
+void lexer_pushtok(int, int);
+struct dirent* lexer_direntpa[DE_STACKSIZE];
+/* Private */
+extern //scanner.c
+int scanner_init(void);
+extern //scanner.c
+int scanner(void);
+static inline
+int dredge_current_depth(void);
+extern //bison
+YYSTYPE yylval;
static
-int lexer_scan(void);
-
+struct tok
+{ int lval;
+ int tok;
+} token_stack[TK_STACKSIZE];
static
-int token_buf[TOKEN_BUF_SIZE], *tbp, *tbx;
+union tokp
+{ int* i;
+ struct tok* t;
+} tks, tkx;
static
-DIR* dirp_stack[DIRP_STACK_SIZE], *dsp;
+struct dirent** dps;
-/* Initialize pointers */
-int
-lexer_init()
-{ tbp = tbx = token_buf;
- dsp = dirp_stack;
- return 0;
-}
-
-/* Returns a token identifier and sets yylval */
-int
-lexer()
-{ if (lexer_scan() == 0)
- return 0;
- yylval = *tbp++;
- return *tbp++;
-}
+/* Directory Entity Array/Stack
+ Simple array for keeping track of dirents yet to be processed by the scanner.
+ If this list is empty and there are no tokens, the lexer is done.
+ This array is populated by the scanner as an array, and popped locally by the
+ lexer as a stack.
+*/
+#define DE_STACK (lexer_direntpa)
+#define DE_STACKP (dps)
+#define DE_LEN() (DE_STACKP - DE_STACK)
+#define DE_INIT() (DE_STACKP = DE_STACK)
+#define DE_POP() (*--DE_STACKP)
-/* Scanner
- Scans a filename from its alphabetically ordered list of file elements
- and tokenizes the result. If the file list is empty, then the stack of
- directory elements will be popped and processed as they are encountered.
+/* Token Stack
+ This is a FIFO stack whose pointers are a union of either a pointer to an
+ integer, or a pointer to two integers (a struct tok). This way, integers may
+ be added or removed from the stack either singularly (IPUSH/IPOP), or as a
+ full token of two integers (PUSH/POP).
+ An alignment error will occur if IPOP or IPUSH are used a non-even number of
+ times in a sequence!
+*/
+#define TK_STACK (token_stack)
+#define TK_STACKP (tks.t)
+#define TK_STACKPI (tks.i)
+#define TK_STACKX (tkx.t)
+#define TK_STACKXI (tkx.i)
+#define TK_LEN() (TK_STACKP - TK_STACKX)
+#define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK)
+#define TK_POP() (*TK_STACKP++)
+#define TK_POPI() (*TK_STACKPI++);
+#define TK_PUSH(T,L) (*TK_STACKX++ = (struct tok){L,T})
- Returns the number of tokens generated.
+/* Initializer
+ The initializer returns boolean true if an error occurs, which may be handled with standard errno.
*/
-#define MAX_ENTITIES 256
-static
-int lexer_scan()
-{ static struct dirent* entity;
- static struct dirent* files[MAX_ENTITIES];
- static struct dirent* dirs = files + MAX_ENTITIES - 1;
- static int num_files = 0;
- static int num_dirs = 0;
+int lexer_init
+()
+{ TK_INIT();
+ DE_INIT();
+ return scanner_init();
+}
- //sort out files and directories, grow directories from bottom up
- while ((entity = readdir(dirp)) != NULL)
- { switch (entity->d_type)
- { case DT_LNK:
- case DT_REG:
- files[num_files++] = entity;
- break;
- case DT_DIR:
- *(dirs - num_dirs++) = entity;
- break;
- case DT_UNKNOWN:
- default:
- printf("Ignoring unknown file: %s\n", entity->d_name);
- break;
+/* Lexer
+ If the token buffer is empty, 'lexer' will initialize the token buffer and
+ call 'lexer_scandir'. If #SCANDIR_ERROR is returned, an error is printed
+ before sending a null return to bison. If 0 tokens are generated, the error
+ printing is skipped. In all other cases, 'yylval' is set, and the token's
+ integer representation is returned.
+*/
+int lexer
+#define SCAN_ERROR -1
+#define TK_EMPTY (TK_STACKP == TK_STACKX)
+()
+{ if (TK_EMPTY)
+ { TK_INIT();
+ if (scanner() == 0)
+ { yylval.val = 0;
+ return 0;
}
}
- if (errno)
- perror("readdir");
- qsort(&files[0], num_files, sizeof struct dirent*, qalpha);
- num_ents = scandirat(dirfd, ".", &namelist, scanfilter, scancompar);
- if (num_ents < 0)
- { perror("scandirat");
- return -1;
- }
- //process files
+ yylval.val = TK_POPI();
+ return TK_POPI();
+}
+
+/* Lexical Analysis
+ Ragel state machine for tokenizing text.
+*/
+int lexer_lex
+(const char* str)
+{ lexer_pushtok(1, 2);
+ printf (str);
+ return 1;
+}
- //recurse into directories
+/* Token Receiver
+ This receiver takes a struct tok and pushes it to the FIFO stack.
+*/
+void lexer_pushtok
+#define S(S)#S //stringifier
+#define ERR_TK "Fatal: Generated over " S(TK_STACKSIZE) " tokens in one pass."
+( int tok, int lval )
+{ if (TK_LEN() >= TK_STACKSIZE)
+ { fprintf(stderr, ERR_TK);
+ exit(EXIT_FAILURE);
+ }
+ TK_PUSH(tok, lval);
}
+/* init_file:
+ if (lsp != NULL)
+ while ((c = *lsp++) == *csp)
+ { switch (c)
+ { case DELIM:
+ delimeters_skipped++;
+ default:
+ csp++; //delayed to ensure csp is the start of scannable text
+ break;
+ }
+ }
+ last_string = string;
+ scan_text:
+ return scanner_tokenize(csp);
+*/