Lexer WIP
[henge/webcc.git] / src / apc / lexer.c
1 /*!@file
2 \brief lexical analyzer implementation for APC
3 \details this lexer scans a root directory given from the command line
4 for subdirectories and files structured for the APC grammar.
5 \author Jordan Lavatai
6 \date Aug 2016
7 ----------------------------------------------------------------------------*/
8 /* Standard */
9 #include <stdio.h>
10 #include <string.h>
11 #include <errno.h>
12 /* Posix */
13 #include <unistd.h>
14 #include <stdlib.h>
15 #include <dirent.h>
16 /* Local */
17 #include "parser.tab.h"
18 #define TOKEN_BUF_SIZE 1024
19 #define ENTITY_BUF_SIZE 512
20 #define SCANDIR_ERROR -1
21
22 int lexer_init(void);
23 int lexer(void);
24
25 extern //main.c
26 const char* cargs['Z'];
27
28 static inline
29 int scan(void);
30 static inline
31 int qcomp(const void*, const void*);
32
33 /* Directory Listing Stack
34 FILO Stack for keeping an open DIR* at each directory depth for treewalk.
35 This stack is depth-safe, checking its depth during push operations, but not
36 during pop operations, to ensure the thread doesn't open too many files at
37 once (512 in c runtime), or traverse too far through symbolic links.
38 A directory listing includes a DIR* and all DIR-typed entity in the directory
39 as recognized by dirent, populated externally (and optionally).
40 This stack behaves abnormally by incrementing its PUSH operation prior to
41 evaluation, and the POP operations after evaluation. This behavior allows
42 the 'DL_CURDEPTH' operation to map to the current element in the 'dl_stack'
43 array, and it is always treated as the "current depth". This also allows us
44 to init the root directory to 'directory_list_stack'[0] and pop it in a safe
45 and explicit manner.
46 */
47 #define S(S)#S
48 #ifndef DL_DEPTHMAX
49 #define DL_DEPTHMAX 64
50 #endif
51 #ifndef DL_CHILDMAX
52 #define DL_CHILDMAX DL_DEPTHMAX //square tree
53 #endif
54 #define DL_STACK (directory_list_stack)
55 #define DL_STACKP (dls)
56 #define DL_STACKSIZE (DL_DEPTHMAX + 1) //+1 because push increments early
57 #define DL_LEN (DL_STACKP - DL_STACK)
58 #define DL_CURDEPTH (DL_LEN)
59 #define DL_CD_STACK ((*DL_STACKP).child_entity_stack)
60 #define DL_CD_STACKP ((*DL_STACKP).cds)
61 #define DL_CD_STACKSIZE (DL_DEPTHMAX) //square tree
62 #define DL_CD_LEN (DL_CD_STACKP - DL_CD_STACK)
63 #define ERR_CHILD "Fatal: Maximum of " S(MAX_CHILD) \
64 " child directories exceeded for directory %s at depth %i\n" \
65 ,DL_STACK[DL_DEPTH()].label,DL_DEPTH()
66 #define ERR_DEPTH "Fatal: Maximum directory depth of " S(DL_DEPTHMAX) \
67 " exceeded during directory scan\n"
68 #define DL_INIT() (DL_STACKP = DL_STACK)
69 #define DL_CD_INIT() (DL_CD_STACKP = DL_CD_STACK)
70 #define DL_POP() ((DIR*)(*DL_STACKP--))
71 #define DL_CD_POP() (*--DL_CD_STACKP)
72 #define DL_PUSH(D) (*(DIR**)(++DL_STACKP) = DIRP)
73 #define DL_CD_PUSH(E) (*DL_CD_STACKP++ = E)
74 #define DL_SAFEPUSH(D) \
75 do { \
76 if (DL_DEPTH() >= DL_DEPTHMAX) \
77 { fprintf(stderr, ERR_DEPTH); \
78 exit(EXIT_FAILURE); \
79 } \
80 if ((*(DIR**)(++DL_STACKP) = D) == NULL) \
81 { perror("DL_SAFEPUSH"); \
82 exit(EXIT_FAILURE); \
83 } \
84 } while(0)
85 #define DL_CD_POPULATE() \
86 do { \
87 DL_CD_INIT(); \
88 while ((*DL_CD_STACKP = readdir(*DL_STACKP)) != NULL) \
89 { switch((*DL_CD_STACKP)->d_type) \
90 { case DT_REG: \
91 case DT_DIR: \
92 DL_CD_STACKP++; \
93 break; \
94 default: \
95 } \
96 } \
97 qsort(DL_CD_STACKP, DL_CD_LEN, sizeof *DL_CD_STACKP, qcomp); \
98 } while (0)
99
100 static
101 struct dirlist
102 { DIR* dirp;
103 struct dirent** child_directory_stack[DL_CD_STACKSIZE],*** cds;
104 const char* label;
105 } directory_list_stack[DL_STACKSIZE],* dls;
106 static
107
108 /* Directory Entity Stack
109 Simple stack for keeping track of dirents still being processed at the
110 current depth.
111 */
112 #define CE_STACK (child_entity_stack)
113 #define CE_STACKP (ces)
114 #define CE_STACKSIZE 1024
115 static
116 struct dirent** child_entity_stack[CE_STACKSIZE],*** ces;
117 #define CE_INIT() (CE_STACKP = CE_STACK)
118 #define CE_POP() (*--CE_STACK)
119 #define CE_PUSH(E) (*CE_STACK++ = E)
120
121 /* Token Stack
122 This is a FIFO stack whose pointers are a union of either a pointer to an
123 integer, or a pointer to two integers (a struct tok). This way, integers may
124 be added or removed from the stack either singularly (IPUSH/IPOP), or as a
125 full token of two integers (PUSH/POP).
126 An alignment error will occur if IPOP or IPUSH are used a non-even number of
127 times in a sequence!
128 */
129 #define TK_STACK (token_stack)
130 #define TK_STACKP (tks.t)
131 #define TK_STACKPI (tks.i)
132 #define TK_STACKX (tkx.t)
133 #define TK_STACKXI (tkx.i)
134 #ifndef TK_STACKSIZE
135 #define TK_STACKSIZE 1024
136 #endif
137 #define TK_EMPTY (TK_STACKP == TK_STACKX)
138 #define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK)
139 #define TK_POP() (*TK_STACKP++)
140 #define TK_IPOP() (*TK_STACKPI++);
141 #define TK_PUSH(TOK,LVAL) (*TKSTACKX++ = (struct tok){(LVAL),(TOK)})
142 #define TK_IPUSH(I) (*TKSTACKXI++ = (I))
143 static
144 struct tok
145 { int lval;
146 int tok;
147 } token_stack[TK_STACKSIZE];
148 static
149 union tokp
150 { int* i;
151 struct tok* t;
152 } tks, tkx;
153
154
155 /* Initializer
156 The initializer returns boolean true if opendir contains an error.
157 */
158 int lexer_init()
159 #define ERR_UF "Fatal: Unknown file [%s] encountered in directory %s", \
160 DL_CD_STACK[i]->d_name, DL_STACK[DL_DEPTH]
161 { int i = 0;
162 TK_INIT();
163 DL_INIT();
164 DL_STACK[0].label = cargs['r'] ? cargs['r'] : "./";
165 if ((DL_STACK[0].dirp = opendir(DL_STACK[0].label)) == NULL)
166 return -1;
167 errno = 0;
168 while ((DL_CD_STACK[i] = readdir(DL_STACK[0].dirp)) != NULL)
169 switch(DL_CD_STACK[i]->d_type)
170 {
171 default:
172 fprintf(stderr, "Fatal: Unkown File %s\n", DL_CD_STACK[i]->d_name);
173 }
174 return (errno);
175 }
176
177 /* Lexer
178 If the token buffer is empty, 'lexer' will initialize the token buffer and
179 call 'lexer_scandir'. If #SCANDIR_ERROR is returned, an error is printed
180 before sending a null return to bison. If 0 tokens are generated, the error
181 printing is skipped. In all other cases, 'yylval' is set, and the token's
182 integer representation is returned.
183 */
184 int lexer()
185 { if (TOK_BUF_EMPTY)
186 { TOK_BUF_INIT();
187 switch (lexer_scan)
188 { case SCANDIR_ERROR:
189 perror("lexer_scan");
190 case 0:
191 yylval = 0;
192 return 0;
193 default:
194 }
195 }
196 yylval = TOK_BUF_IPOP();
197 return TOK_BUF_IPOP();
198 }
199
200
201 static inline
202 int lexer_scan()
203 { static DIR*
204 }
205
206 /* Scanner
207 Scans a filename from its alphabetically ordered list of file elements
208 and tokenizes the result. If the file list is empty, then the stack of
209 directory elements will be popped and processed as they are encountered.
210
211 Returns the number of tokens generated.
212 */
213 #define MAX_ENTITIES 256
214 static
215 int lexer_scan()
216 { static struct dirent* entity;
217 static struct dirent* files[MAX_ENTITIES] = {0};
218 static int num_files = 0;
219 #define NO_FILES (num_files == 0)
220
221 if (NO_FILES)
222
223
224 //sort out files and directories, grow directories from bottom up
225 while ((entity = readdir(dirp)) != NULL)
226 { switch (entity->d_type)
227 { case DT_LNK:
228 case DT_REG:
229 files[num_files++] = entity;
230 break;
231 case DT_DIR:
232 *(dirs - num_dirs++) = entity;
233 break;
234 case DT_UNKNOWN:
235 default:
236 printf("Ignoring unknown file: %s\n", entity->d_name);
237 break;
238 }
239 }
240 if (errno)
241 perror("readdir");
242 qsort(&files[0], num_files, sizeof struct dirent*, qalpha);
243 num_ents = scandirat(dirfd, ".", &namelist, scanfilter, scancompar);
244 if (num_ents < 0)
245 { perror("scandirat");
246 return -1;
247 }
248 //process files
249
250 //recurse into directories
251 return tbx - tbp;
252 }
253
254 /* Quicksort comparison function
255 sort each dirent encountered first by its file type (regular files first)
256 and then by their alphabetical ordering.
257 */
258 static
259 int qcompar
260 ( const void* a,
261 const void* b
262 )
263 {
264 }
265