lexer testing complete
[henge/webcc.git] / src / apc / lexer.c
1 /*!@file
2 \brief lexical analyzer implementation for APC
3 \details The lexer manages two FIFO stacks. One for maintaining tokens, the
4 other for maintaining a list of files to be scanned. During
5 execution, the lexer will return a token from its token queue if any
6 are present. If not, the lexer will will pop an element from its
7 file queue to 'scanner' to be tokenized. If the file queue is empty,
8 the lexer will instead call 'parsedir' to traverse the directory tree
9 and tokenize the results. If 'parsedir' does not generate any new
10 tokens, we are done.
11 \author Jordan Lavatai
12 \date Aug 2016
13 ----------------------------------------------------------------------------*/
14 /* Standard */
15 #include <stdio.h>
16 #include <string.h>
17 #include <errno.h>
18 /* Posix */
19 #include <unistd.h>
20 #include <stdlib.h>
21 #include <dirent.h>
22 /* Local */
23 #include "parser.tab.h"
24 #ifndef DE_STACKSIZE
25 #define DE_STACKSIZE 1024
26 #endif
27 #ifndef TK_STACKSIZE
28 #define TK_STACKSIZE 1024
29 #endif
30 /* Public */
31 int lexer_init(void);
32 int lexer(void);
33 int lexer_lexfile(const char*);
34 void lexer_pushtok(int, YYSTYPE);
35 extern //lexer_lex.rl
36 int lexer_lex(const char*);
37 struct dirent* lexer_direntpa[DE_STACKSIZE], **lexer_direntpp;
38 /* Private */
39 extern //scanner.c
40 int scanner_init(void);
41 extern //scanner.c
42 int scanner(void);
43 static inline
44 int dredge_current_depth(void);
45 extern //bison
46 YYSTYPE yylval;
47 static
48 struct tok
49 { YYSTYPE lval; //token val
50 int tok_t; //token type
51 } token_stack[TK_STACKSIZE];
52 static
53 union tokp
54 { int* tpt; //token pointer type
55 struct tok* tok;
56 YYSTYPE* tvp; //token value pointer
57 } tks, tkx;
58
59 /* Directory Entity Array/Stack
60 Simple array for keeping track of dirents yet to be processed by the scanner.
61 If this list is empty and there are no tokens, the lexer is done.
62 This array is populated by the scanner as an array, and popped locally by the
63 lexer as a stack.
64 */
65 #define DE_STACK (lexer_direntpa)
66 #define DE_STACKP (lexer_direntpp)
67 #define DE_LEN() (DE_STACKP - DE_STACK)
68 #define DE_INIT() (DE_STACKP = DE_STACK)
69 #define DE_POP() (*--DE_STACKP)
70
71 /* Token Stack
72 This is a FIFO stack whose pointers are a union of either a pointer to an
73 integer, or a pointer to two integers (a struct tok). This way, integers may
74 be added or removed from the stack either singularly (IPUSH/IPOP), or as a
75 full token of two integers (PUSH/POP).
76 An alignment error will occur if IPOP or IPUSH are used a non-even number of
77 times in a sequence!
78 */
79 #define TK_STACK (token_stack)
80 #define TK_STACKP (tks.tok)
81 #define TK_STACKPI (tks.tpt)
82 #define TK_STACKPL (tks.tvp)
83 #define TK_STACKX (tkx.tok)
84 #define TK_STACKXI (tkx.tpt)
85 #define TK_LEN() (TK_STACKX - TK_STACKP)
86 #define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK)
87 #define TK_POP() (*TK_STACKP++)
88 #define TK_POPI() (*TK_STACKPI++);
89 #define TK_POPL() (*TK_STACKPL++);
90 #define TK_PUSH(T,L) (*TK_STACKX++ = (struct tok){L,T})
91
92 /* Initializer
93 The initializer returns boolean true if an error occurs, which may be handled with standard errno.
94 */
95 int lexer_init
96 ()
97 { TK_INIT();
98 DE_INIT();
99 return scanner_init();
100 }
101
102 /* Lexer
103 If the token buffer is empty, 'lexer' will initialize the token buffer and
104 call 'lexer_scandir'. If SCAN_ERROR is returned, an error is printed
105 before sending a null return to bison. If 0 tokens are generated, the error
106 printing is skipped. In all other cases, 'yylval' is set, and the token's
107 integer representation is returned.
108 */
109 int lexer
110 #define $($)#$
111 #define SCAN_ERROR -1
112 #define TK_EMPTY (TK_STACKP == TK_STACKX)
113 #define FAIL(...) \
114 do { \
115 fprintf(stderr,__VA_ARGS__); \
116 goto done; \
117 } while (0)
118 ()
119 {start:
120 while (DE_LEN() > 0) //lex any directory entries in our stack
121 if (lexer_lexfile(DE_POP()->d_name) == 0)
122 FAIL("Lexer failed to tokenize [%s]\n",(*DE_STACKP)->d_name);
123 if (TK_EMPTY) //if there are no tokens,
124 { TK_INIT(); //initialize the token stack back to 0
125 switch (scanner())
126 { case SCAN_ERROR: //if an error occurred,
127 FAIL("Scanner error\n");
128 case 0: //if the the scanner finds no dirents,
129 goto done; //then we are done
130 default: //if we found some elements to scan,
131 goto start; //start over and lex them
132 }
133 }
134 yylval = TK_POPL();
135 return TK_POPI();
136 done:
137 yylval.val = 0;
138 return 0;
139 }
140
141
142 /* Token Receiver
143 This receiver takes a struct tok and pushes it to the FIFO stack.
144 */
145 void lexer_pushtok
146 #define $($)#$ //stringifier
147 #define ERR_TK "Fatal: Generated over " $(TK_STACKSIZE) " tokens in one pass."
148 ( int tok, YYSTYPE lval )
149 { if (TK_LEN() >= TK_STACKSIZE)
150 { fprintf(stderr, ERR_TK);
151 exit(EXIT_FAILURE);
152 }
153 TK_PUSH(tok, lval);
154 printf("Pushed Token %i | %i\n", TK_STACK[TK_LEN() - 1].tok_t, TK_STACK[TK_LEN() - 1].lval.val);
155 }
156
157 /* Lexical analysis of a file
158 Strips a filename to its base name, then sends it to lexer_lex
159 */
160 int lexer_lexfile
161 #define MAX_FNAME 2048
162 #define HIDDEN_WARNING "%s is hidden and will not be parsed!\n", filename
163 ( const char *filename
164 )
165 { static char fname[MAX_FNAME];
166 char *last_period = NULL, *iter;
167
168 if (*filename == '.')
169 { fprintf (stderr, HIDDEN_WARNING);
170 return 0;
171 }
172 strncpy(fname,filename,MAX_FNAME);
173 last_period = NULL;
174 for (iter = fname; *iter; iter++)
175 if (*iter == '.')
176 last_period = iter;
177 if (last_period)
178 *last_period = '\0';
179 return lexer_lex(fname);
180 }