Lexer 1.0
[henge/webcc.git] / src / apc / lexer.c
1 /*!@file
2 \brief lexical analyzer implementation for APC
3 \details The lexer manages two FIFO stacks. One for maintaining tokens, the
4 other for maintaining a list of files to be scanned. During
5 execution, the lexer will return a token from its token queue if any
6 are present. If not, the lexer will will pop an element from its
7 file queue to 'scanner' to be tokenized. If the file queue is empty,
8 the lexer will instead call 'parsedir' to traverse the directory tree
9 and tokenize the results. If 'parsedir' does not generate any new
10 tokens, we are done.
11 \author Jordan Lavatai
12 \date Aug 2016
13 ----------------------------------------------------------------------------*/
14 /* Standard */
15 #include <stdio.h>
16 #include <string.h>
17 #include <errno.h>
18 /* Posix */
19 #include <unistd.h>
20 #include <stdlib.h>
21 #include <dirent.h>
22 /* Local */
23 //#include "parser.tab.h"
24 #ifndef DP_STACKSIZE
25 #define DP_STACKSIZE 1024
26 #endif
27 #ifndef TK_STACKSIZE
28 #define TK_STACKSIZE 1024
29 #endif
30 /* Public */
31 struct tok
32 { int lval;
33 int tok;
34 };
35 int lexer_init(void);
36 int lexer(void);
37 inline
38 void lexer_pushtok(int int);
39 struct dirent* lexer_direntpa[DP_STACKSIZE];
40 /* Private */
41 static inline
42 int scan(void);
43 static inline
44 int dredge_current_depth(void);
45 static
46 struct dirent** dps;
47 static
48 struct tok token_stack[TK_STACKSIZE];
49 static
50 union tokp
51 { int* i;
52 struct tok* t;
53 } tks, tkx;
54
55 /* Directory Entity Array/Stack
56 Simple array for keeping track of dirents yet to be processed by the scanner.
57 If this list is empty and there are no tokens, the lexer is done.
58 */
59 #define DP_STACK (lexer_direntpa)
60 #define DP_STACKP (dps)
61 #define DP_LEN() (DP_STACKP - DP_STACK)
62 #define DP_INIT() (DP_STACKP = DP_STACK)
63 #define DP_POP() (*--DP_STACKP)
64
65 /* Token Stack
66 This is a FIFO stack whose pointers are a union of either a pointer to an
67 integer, or a pointer to two integers (a struct tok). This way, integers may
68 be added or removed from the stack either singularly (IPUSH/IPOP), or as a
69 full token of two integers (PUSH/POP).
70 An alignment error will occur if IPOP or IPUSH are used a non-even number of
71 times in a sequence!
72 */
73 #define TK_STACK (token_stack)
74 #define TK_STACKP (tks.t)
75 #define TK_STACKPI (tks.i)
76 #define TK_STACKX (tkx.t)
77 #define TK_STACKXI (tkx.i)
78 #define TK_LEN() (TK_STACKP - TK_STACKX)
79 #define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK)
80 #define TK_POP() (*TK_STACKP++)
81 #define TK_POPI() (*TK_STACKPI++);
82 #define TK_PUSH(T) (*TKSTACKX++ = T)
83 #define TK_PUSHI(I) (*TKSTACKXI++ = (I))
84
85 extern //main.c
86 const char* cargs['Z'];
87
88 extern //scanner.c
89 int scanner_init(void);
90 extern //scanner.c
91 int scanner(struct dirent**);
92
93 /* Initializer
94 The initializer returns boolean true if an error occurs, which may be handled with standard errno.
95 */
96 int lexer_init
97 ()
98 { TK_INIT();
99 DP_INIT();
100 return scanner_init();
101 }
102
103 /* Lexer
104 If the token buffer is empty, 'lexer' will initialize the token buffer and
105 call 'lexer_scandir'. If #SCANDIR_ERROR is returned, an error is printed
106 before sending a null return to bison. If 0 tokens are generated, the error
107 printing is skipped. In all other cases, 'yylval' is set, and the token's
108 integer representation is returned.
109 */
110 int lexer
111 #define SCAN_ERROR -1
112 #define TK_EMPTY (TK_STACKP == TK_STACKX)
113 ()
114 { if (TK_EMPTY)
115 { switch (parsedir())
116 { case SCAN_ERROR:
117 perror("lexer_scan");
118 case 0:
119 yylval = 0;
120 return 0;
121 default:
122 break;
123 }
124 }
125 yylval = TK_IPOP();
126 return TK_IPOP();
127 }
128
129 /* Token Receiver
130 This receiver takes a struct tok and pushes it to the FIFO stack.
131 */
132 inline
133 void lexer_pushtok
134 #define ERR_TK "Fatal: Generated over " S(TK_STACKSIZE) " tokens in one pass."
135 ( struct tok token )
136 { if (TK_LEN >= TK_STACKSIZE)
137 { fprintf(stderr, ERR_TK);
138 exit(EXIT_FAILURE);
139 }
140 TK_PUSH(token);
141 }
142
143 /* Lexical Analysis
144 Ragel state machine for tokenizing text.
145 */
146 void lexer_lex
147 (const char* str)
148 { struct tok token;
149 token.TOK = 1;
150 token.LVAL = 2;
151 lexer_pushtok(token);
152 printf (str);
153 }
154
155 /* init_file:
156 if (lsp != NULL)
157 while ((c = *lsp++) == *csp)
158 { switch (c)
159 { case DELIM:
160 delimeters_skipped++;
161 default:
162 csp++; //delayed to ensure csp is the start of scannable text
163 break;
164 }
165 }
166 last_string = string;
167 scan_text:
168 return scanner_tokenize(csp);
169 */