53b0c44b3044f34d44797d2d3ee2fd981d6b989c
[henge/webcc.git] / src / apc / lexer.c
1 /*!@file
2 \brief lexical analyzer implementation for APC
3 \details The lexer manages two FIFO stacks. One for maintaining tokens, the
4 other for maintaining a list of files to be scanned. During
5 execution, the lexer will return a token from its token queue if any
6 are present. If not, the lexer will will pop an element from its
7 file queue to 'scanner' to be tokenized. If the file queue is empty,
8 the lexer will instead call 'parsedir' to traverse the directory tree
9 and tokenize the results. If 'parsedir' does not generate any new
10 tokens, we are done.
11 \author Jordan Lavatai
12 \date Aug 2016
13 ----------------------------------------------------------------------------*/
14 /* Standard */
15 #include <stdio.h>
16 #include <string.h>
17 #include <errno.h>
18 /* Posix */
19 #include <unistd.h>
20 #include <stdlib.h>
21 #include <limits.h> //realpath, NAME_MAX, PATH_MAX
22 #include <dirent.h>
23 /* Local */
24 #include "parser.tab.h"
25 #ifndef DE_STACKSIZE
26 #define DE_STACKSIZE 1024
27 #endif
28 #ifndef TK_STACKSIZE
29 #define TK_STACKSIZE 1024
30 #endif
31 /* Public */
32 int lexer_init(void);
33 int lexer(void);
34 int lexer_lexfile(const char*);
35 void lexer_pushtok(int, YYSTYPE);
36 char const* lexer_get_current_filepath(void);
37 struct dirent* lexer_direntpa[DE_STACKSIZE],** lexer_direntpp,** lexer_direntpb;
38 /* Private */
39 extern //lexer_lex.rl
40 int lexer_lex(const char*);
41 extern //scanner.c
42 int scanner_init(void);
43 extern //scanner.c
44 int scanner(void);
45 static inline
46 int dredge_current_depth(void);
47 extern //bison
48 YYSTYPE yylval;
49 static
50 char const* current_filename;
51 static
52 struct tok
53 { YYSTYPE lval; //token val
54 int tok_t; //token type
55 } token_stack[TK_STACKSIZE], *tsp, *tsx;
56
57 /* Directory Entity Array/Stack
58 Simple array for keeping track of dirents yet to be processed by the scanner.
59 If this list is empty and there are no tokens, the lexer is done.
60 This array is populated by the scanner as an array, and popped locally by the
61 lexer as a stack, and is popped as a FIFO stack.
62 */
63 #define DE_STACK (lexer_direntpa)
64 #define DE_STACKP (lexer_direntpp)
65 #define DE_STACKB (lexer_direntpb)
66 #define DE_LEN() (DE_STACKP - DE_STACKB)
67 #define DE_INIT() (DE_STACKP = DE_STACKB = DE_STACK)
68 #define DE_POP() (*DE_STACKB++)
69
70 /* Token Stack
71 This is a FIFO stack whose pointers are a union of either a pointer to an
72 integer, or a pointer to two integers (a struct tok). This way, integers may
73 be added or removed from the stack either singularly (IPUSH/IPOP), or as a
74 full token of two integers (PUSH/POP).
75 An alignment error will occur if IPOP or IPUSH are used a non-even number of
76 times in a sequence!
77 */
78 #define TK_STACK (token_stack)
79 #define TK_STACKP (tsp)
80 #define TK_STACKX (tsx)
81 #define TK_LEN() (TK_STACKX - TK_STACKP)
82 #define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK)
83 #define TK_POP() (*TK_STACKP++)
84 #define TK_PUSH(T,L) (*TK_STACKX++ = (struct tok){L,T})
85
86 /* Initializer
87 The initializer returns boolean true if an error occurs, which may be handled
88 with standard errno.
89 */
90 int lexer_init
91 ()
92 { TK_INIT();
93 DE_INIT();
94 return scanner_init();
95 }
96
97 /* Lexer
98 If the token buffer is empty, 'lexer' will initialize the token buffer and
99 call 'lexer_scandir'. If SCAN_ERROR is returned, an error is printed
100 before sending a null return to bison. If 0 tokens are generated, the error
101 printing is skipped. In all other cases, 'yylval' is set, and the token's
102 integer representation is returned.
103 */
104 int lexer
105 #define $($)#$
106 #define SCAN_ERROR -1
107 #define TK_EMPTY (TK_STACKP == TK_STACKX)
108 #define FAIL(...) \
109 do { \
110 fprintf(stderr,__VA_ARGS__); \
111 goto done; \
112 } while (0)
113 ()
114 { struct tok token;
115 start:
116 while (DE_LEN() > 0) //lex any directory entries in our stack
117 if (lexer_lexfile(DE_POP()->d_name) == 0)
118 FAIL("Lexer failed to tokenize [%s]\n",(*DE_STACKB)->d_name);
119 if (TK_EMPTY) //if there are no tokens,
120 { TK_INIT(); //initialize the token stack back to 0
121 switch (scanner())
122 { case SCAN_ERROR: //if an error occurred,
123 FAIL("Scanner error\n");
124 case 0: //if the the scanner finds no dirents,
125 goto done; //then we are done
126 default: //if we found some elements to scan,
127 goto start; //start over and lex them
128 }
129 }
130 token = TK_POP();
131 yylval = token.lval;
132 return token.tok_t;
133 done:
134 yylval.val = 0;
135 return 0;
136 }
137
138
139 /* Token Receiver
140 This receiver takes a struct tok and pushes it to the FIFO stack.
141 */
142 void lexer_pushtok
143 #define $($)#$ //stringifier
144 #define ERR_TK "Fatal: Generated over " $(TK_STACKSIZE) " tokens in one pass."
145 ( int tok, YYSTYPE lval )
146 { if (TK_LEN() >= TK_STACKSIZE)
147 { fprintf(stderr, ERR_TK);
148 exit(EXIT_FAILURE);
149 }
150 TK_PUSH(tok, lval);
151 }
152
153 /* Lexical analysis of a file
154 Strips a filename to its base name, then sends it to lexer_lex
155 */
156 int lexer_lexfile
157 #define HIDDEN_WARNING "%s is hidden and will not be parsed!\n", filename
158 ( const char *filename
159 )
160 { static char fname[NAME_MAX];
161 char *last_period = NULL, *iter;
162
163 if (*filename == '.')
164 { fprintf (stderr, HIDDEN_WARNING);
165 return 0;
166 }
167 /* Copy the filename and remove its suffix */
168 strncpy(fname,filename,NAME_MAX);
169 last_period = NULL;
170 for (iter = fname; *iter; iter++) //find the last '.' char
171 if (*iter == '.')
172 last_period = iter;
173 if (last_period) //if we found one,
174 *last_period = '\0'; //truncate the string there
175 /* Register the current_filename */
176 current_filename = filename;
177
178 return lexer_lex(fname);
179 }
180
181 char const* lexer_get_current_filepath
182 ()
183 { static char current_path[PATH_MAX];
184 static char const* last_filename;
185 if ((!last_filename || last_filename != current_filename) &&
186 (realpath(current_filename, current_path) != current_path))
187 { perror("realpath: ");
188 return NULL;
189 }
190 return (const char*)current_path;
191 }