comments updated
[henge/apc.git] / src / lexer.rl
1 #include <stdio.h>
2 #include <stdint.h>
3 #include <stdlib.h>
4 #include <errno.h>
5 #include <unistr.h>
6 #include "parser.tab.h"
7 #include "apc.h"
8 #include "print.h"
9 /* Public */
10 int lexer_init(void);
11 void lexer_quit(void);
12 int lexer_lexfile(uint8_t*);
13 int lexer_lexdir(uint8_t*);
14 int lexer_lexstring(uint8_t*, int);
15 //apc.c
16 static
17 yypstate* pstate;
18 static
19 yycstate* cstate;
20 /* Ring buffer for keeping lexical tokens valid for up to 255 tokens */
21 static
22 YYSTYPE lval_stack[0xFF + 1];
23 static
24 uint8_t lval_offs;
25 #define $($)#$
26 #define PUSHTOK(T,L) yypush_parse(pstate, T, (L), cstate)
27 #define LEXTOK(T,Y,L) do { \
28 if (DEBUG) { \
29 ulc_fprintf(stdout, "["$(T)); \
30 switch (T) { \
31 case NAME: case PATH: \
32 ulc_fprintf(stdout, "->%U", L); break; \
33 case REF: \
34 ulc_fprintf(stdout, "->%X", L); break; \
35 default: break; \
36 } \
37 ulc_fprintf(stdout, "]"); \
38 } \
39 lval_stack[lval_offs].Y = L; \
40 PUSHTOK(T,lval_stack + lval_offs); \
41 lval_offs++; \
42 ntok++; \
43 } while (0)
44 #define PUSHFACE(F) LEXTOK(FACING, face, F)
45 #define PUSHREF(R) LEXTOK(REF, ref, R)
46 #define PUSHLINK() LEXTOK(LINK, val, 0)
47 #define PUSHNUM(N) LEXTOK(NUM, val, N)
48 #define PUSHNAME(N) LEXTOK(NAME, str, N)
49 #define PUSHOP(O) LEXTOK(O, val, 0)
50 #define PUSHPATH(P) LEXTOK(PATH, str, P)
51
52 /* Lexstring is the main lexer for APC and is generated by ragel. It lexes file names of files
53 that have been scanned and pushes their types and values into the tok_stack, which yyparse
54 eventually calls during parsing. */
55
56 %%{
57 machine lexstring;
58
59 # set up yylval and tok_t to be pushed to stack
60 action push_ref { errno = 0;
61 lval.ref = strtoll((char*)ts,NULL,16);
62 if (errno)
63 { ulc_fprintf(stderr, "Invalid hex number in file %U\n",str);
64 exit(1);
65 }
66 PUSHREF(lval.ref);
67 }
68 action push_link { PUSHLINK(); }
69 action push_val { errno = 0;
70 lval.val = strtoll((char*)ts,NULL,10);
71 if (errno)
72 { ulc_fprintf(stderr, "strtoll could not parse %U\n",str);
73 exit(1);
74 }
75 PUSHNUM(lval.val);
76 }
77 action push_name { PUSHNAME(ts); }
78 action push_map { PUSHOP(MAP); }
79 action set_ts { ts = p; }
80 action push_SS { PUSHOP(SS); }
81 action push_S { PUSHFACE(SFACE); }
82 action push_SW { PUSHFACE(SWFACE); }
83 action push_W { PUSHFACE(WFACE); }
84 action push_NW { PUSHFACE(NWFACE); }
85 action push_N { PUSHFACE(NFACE); }
86 action push_NE { PUSHFACE(NEFACE); }
87 action push_E { PUSHFACE(EFACE); }
88 action push_SE { PUSHFACE(SEFACE); }
89 action ref_error { ulc_fprintf(stderr, "ref from %U to %U has an inappropriate amount of hex digits, it must have eight.\n", ts, p);
90 exit(1);
91 }
92 action p { dprintf("Lexer_lexstring:: p = %U\n", p); }
93
94 N = 'N' %push_N;
95 W = 'W' %push_W;
96 S = 'S' %push_S;
97 E = 'E' %push_E;
98 NW = 'NW' %push_NW;
99 NE = 'NE' %push_NW;
100 SW = 'SW' %push_SW;
101 SE = 'SE' %push_SE;
102
103 tok_delimiter = [_\0];
104
105 direction = (N | W | S | E | NW | NE | SW | SE) ;
106 dimensions = (digit+ - '0') >set_ts %push_val 'x' (digit+ - '0') >set_ts %push_val;
107 link = '#' %push_link;
108 SS = ('+SS' %to(push_SS)) | ('+SS' %to(push_SS) link ) ;
109 ref = '0x' >set_ts alnum{8} $err(ref_error) %push_ref ;
110 val = digit+ >set_ts %push_val ;
111 name = lower >set_ts (lower | digit)* %push_name ;
112 map = '+MAP' %to(push_map);
113 tok = (name | val | ref | dimensions | map | link | SS | direction);
114
115
116 main := (tok tok_delimiter)* tok ;
117
118 write data nofinal noerror noprefix;
119
120 }%%
121
122 int lexer_init
123 ( void )
124 { pstate = yypstate_new();
125 cstate = yycstate_new();
126 lval_offs = 0;
127 return !pstate || !cstate;
128 return en_main == 1;
129 }
130
131 void lexer_quit
132 ( void )
133 { if (pstate) yypstate_delete(pstate);
134 if (cstate) yycstate_delete(cstate);
135 }
136
137 int lexer_lexstring
138 ( uint8_t* str,
139 int size
140 )
141 { uint8_t* p, * ts, * pe, * eof;
142 int cs, ntok;
143 YYSTYPE lval;
144 ntok = 0;
145 p = ts = str;
146 pe = eof = p + size;
147 %%write init;
148 %%write exec;
149 return ntok;
150 }
151
152 /* Lexical analysis of a file
153 Strips a filename to its base name, then sends it to lexer_lexstring before
154 pushing a PATH token with the filename
155 Returns the number of tokens pushed to the parser.
156 */
157 int lexer_lexfile
158 ( uint8_t* filename )
159 { uint8_t* last_period,* iter,* filename_end;
160 int ntok;
161 last_period = NULL;
162 for (iter = filename; *iter; iter++)
163 switch (*iter)
164 { // Keep track of the last 'dot' in the name
165 case '.' : last_period = iter; continue;
166 // replace '_' with '\0' so bison can use strlen on them as tokens.
167 case '_' : *iter = '\0';
168 default: continue;
169 }
170 // Mark the end of the filename
171 filename_end = iter;
172 // Lex from either the last period, if present, or filename end
173 dprintf("%U\n\t",filename);
174 ntok = (last_period) ?
175 lexer_lexstring(filename, (int)(last_period - filename))
176 : lexer_lexstring(filename, (int)(iter - filename));
177
178 // Replace nulls with their original '_'
179 for (iter = filename; iter < filename_end; iter++)
180 if (*iter == '\0')
181 *iter = '_';
182 PUSHPATH(filename);
183 dprintf("\n\t[%i Token%s]\n", ntok, (ntok > 1) ? "s" : "");
184 return ntok + 1;
185 }
186
187 int lexer_lexdir
188 ( uint8_t* dirname )
189 { int ntok;
190 ntok = 0;
191 if (DEBUG) putchar('\t');
192 PUSHNAME(dirname);
193 PUSHOP(CLOPEN);
194 if (DEBUG) putchar('\n');
195 return ntok;
196 }
197
198 int lexer_closedir
199 ( void )
200 { int ntok = 0;
201 if (DEBUG) putchar('\t');
202 PUSHOP(CLCLOSE);
203 if (DEBUG) putchar('\n');
204 return ntok;
205 }