fixes
[henge/apc.git] / src / lexer.rl
1 #include <stdio.h>
2 #include <stdint.h>
3 #include <stdlib.h>
4 #include <errno.h>
5 #include "parser.tab.h"
6 #include "apc.h"
7 #include <unistdio.h>
8 #include <unistr.h>
9 extern //lexer.c
10 void lexer_pushtok(int, YYSTYPE);
11 /* Public */
12 int lexer_setdirection(uint8_t*, int);
13 int lexer_lexfile(uint8_t*);
14 int lexer_lexdir(uint8_t*);
15 int lexer_lexstring(uint8_t*, int);
16 int lexer_setstr(uint8_t*, int);
17 //apc.c
18 extern
19 yypstate* apc_pstate;
20 extern
21 yycstate* apc_cstate;
22 static
23 YYSTYPE lval_stack[1024], * lvalsp;
24 #define PUSHTOK(T,L) yypush_parse(apc_pstate, T, (L), apc_cstate)
25 #define LEXTOK(T,Y,L) do { \
26 lvalsp->Y = L; \
27 PUSHTOK(T,lvalsp); \
28 lvalsp++; \
29 ntok++; \
30 } while (0);
31 #define PUSHFACE(F) LEXTOK(FACING, face, F)
32 #define PUSHREF(R) LEXTOK(REF, ref, R)
33 #define PUSHLINK() LEXTOK(LINK, val, 0)
34 #define PUSHNUM(N) LEXTOK(NUM, val, N)
35 #define PUSHNAME(N) LEXTOK(NAME, str, N)
36 #define PUSHOP(O) LEXTOK(O, val, 0)
37 #define PUSHPATH(P) LEXTOK(PATH, str, P)
38
39 /* Lexstring is the main lexer for APC and is generated by ragel. It lexes file names of files
40 that have been scanned and pushes their types and values into the tok_stack, which yyparse
41 eventually calls during parsing. */
42
43 %%{
44 machine lexstring;
45
46 # set up yylval and tok_t to be pushed to stack
47 action push_ref { errno = 0;
48 lval.ref = strtoll((char*)ts,NULL,16);
49 if (errno)
50 { fprintf(stderr, "Invalid hex number in file %s\n",(char*)str);
51 exit(1);
52 }
53 PUSHREF(lval.ref);
54 }
55 action push_link { PUSHLINK(); }
56 action push_val { errno = 0;
57 lval.val = strtoll((char*)ts,NULL,10);
58 if (errno)
59 { fprintf(stderr, "strtoll could not parse %s\n", (char*)str);
60 exit(1);
61 }
62 PUSHNUM(lval.val);
63 }
64 action push_name { printf("Lexer_lexstring:: action:push_name: from %s to %s\n", ts, p);
65 PUSHNAME(ts);
66 }
67 action push_map { printf("Lexer_lexstring:: action:push_map: pushing map token\n");
68 PUSHOP(MAP);
69 }
70 action set_ts { printf("Lexer_lexstring:: action:set_ts. ts = %s\n", p); ts = p; }
71 action push_SS { printf("Lexer_lexstring:: action:push_SS. p = %s\n",p);
72 PUSHOP(SS);
73 }
74 action push_S { printf("Lexer_lexstring:: action:push_S. p = %s\n", p);
75 PUSHFACE(SFACE);
76 }
77 action push_SW { printf("Lexer_lexstring:: action:push_SW. p = %s\n", p);
78 PUSHFACE(SWFACE);
79 }
80 action push_W { printf("Lexer_lexstring:: action:push_W. p = %s\n", p);
81 PUSHFACE(WFACE);
82 }
83 action push_NW { printf("Lexer_lexstring:: action:push_NW. p = %s\n", p);
84 PUSHFACE(NWFACE);
85 }
86 action push_N { printf("Lexer_lexstring:: action:push_N. p = %s\n", p);
87 PUSHFACE(NFACE);
88 }
89 action push_NE { printf("Lexer_lexstring:: action:push_NE. p = %s\n", p);
90 PUSHFACE(NEFACE);
91 }
92 action push_E { printf("Lexer_lexstring:: action:push_N. p = %s\n", p);
93 PUSHFACE(EFACE);
94 }
95 action push_SE { printf("Lexer_lexstring:: action:push_N. p = %s\n", p);
96 PUSHFACE(SEFACE);
97 }
98 action ref_error { printf("ref from %s to %s has an inappropriate amount of hex digits, it must have eight.\n", ts, p);
99 exit(1);
100 }
101 action p { printf("Lexer_lexstring:: p = %s\n", p);}
102
103 N = 'N' %push_N;
104 W = 'W' %push_W;
105 S = 'S' %push_S;
106 E = 'E' %push_E;
107 NW = 'NW' %push_NW;
108 NE = 'NE' %push_NW;
109 SW = 'SW' %push_SW;
110 SE = 'SE' %push_SE;
111
112 tok_delimiter = [_];
113
114 direction = (N | W | S | E | NW | NE | SW | SE) ;
115 dimensions = (digit+ - '0') >set_ts %push_val 'x' (digit+ - '0') >set_ts %push_val;
116 link = '#' %push_link;
117 SS = ('+SS' %to(push_SS)) | ('+SS' %to(push_SS) link ) ;
118 ref = '0x' >set_ts alnum{8} $err(ref_error) %push_ref ;
119 val = digit+ >set_ts %push_val ;
120 name = lower >set_ts (lower | digit)* %push_name ;
121 map = '+MAP' %to(push_map);
122 tok = (name | val | ref | dimensions | map | link | SS | direction);
123
124
125 main := (tok tok_delimiter)* tok [\0];
126
127 write data nofinal noerror noprefix;
128
129 }%%
130
131 int lexer_lexstring
132 ( uint8_t* str,
133 int size
134 )
135 { uint8_t *p;
136 uint8_t *ts, *pe, *eof;
137 int cs, ntok;
138 YYSTYPE lval;
139
140 lvalsp = lval_stack;
141 ntok = 0;
142 p = ts = str;
143 pe = eof = p + size + 1;
144
145 printf("|---Begin lexstring on p = %s, pe = %s.\n",p, pe);
146
147 %%write init;
148 %%write exec;
149
150 printf("Ending lexstring of file %s, pushed %d tokens.\n",str, ntok);
151
152 return ntok;
153 }
154
155 /* Lexical analysis of a file
156 Strips a filename to its base name, then sends it to lexer_lexstring before
157 pushing a PATH token with the filename
158 Returns the number of tokens pushed to the parser.
159 */
160 int lexer_lexfile
161 ( uint8_t* filename )
162 { uint8_t* last_period,* iter,* filename_end;
163 int ntok;
164 last_period = NULL;
165 for (iter = filename; *iter; iter++)
166 switch (*iter)
167 { // Keep track of the last 'dot' in the name
168 case '.' : last_period = iter; continue;
169 // replace '_' with '\0' so bison can use strlen on them as tokens.
170 case '_' : *iter = '\0';
171 default: continue;
172 }
173 // Mark the end of the filename
174 filename_end = iter;
175 // Lex from either the last period, if present, or filename end
176 ntok = (last_period) ?
177 lexer_lexstring(filename, (int)(last_period - filename))
178 : lexer_lexstring(filename, (int)(iter - filename));
179 // Replace nulls with their original '_'
180 for (iter = filename; iter < filename_end; iter++)
181 if (*iter == '\0')
182 *iter = '_';
183 PUSHPATH(filename);
184 return ntok + 1;
185 return en_main == 1;
186 }
187
188 int lexer_lexdir
189 ( uint8_t* dirname )
190 { uint8_t* de = dirname;
191 int ntok;
192 ntok = 0;
193 de = dirname;
194 if (*de) while (*++de);
195 ntok = lexer_lexstring(dirname, (int)(de - dirname));
196 PUSHOP(CLOPEN);
197 return ntok;
198 }
199
200 int lexer_closedir
201 ( void )
202 { int ntok = 0;
203 PUSHOP(CLCLOSE);
204 return ntok;
205 }
206
207 /**************************/
208 /****Abandon All Hope******/
209 /**************************/
210 /*** ***/
211 /*** ***/
212 /*** ***/
213 /*** ***/
214
215
216 #if 0
217
218 %%{
219 machine setdirection;
220
221 action ret_north {printf("Lexer_setdirection:: direction is north, returning 4\n"); return 4;; }
222 action ret_west { printf("Lexer_setdirection:: direction is west, returning 2\n");return 2;}
223 action ret_east { printf("Lexer_setdirection:: direction is east, returning 6\n");return 6;}
224 action ret_south { printf("Lexer_setdirection:: direction is south, returning 0\n");return 0;}
225 action ret_northeast { printf("Lexer_setdirection:: direction is northeast, returning 5\n");return 5 ;}
226 action ret_northwest { printf("Lexer_setdirection:: direction is northwest, returning 3\n");return 3;}
227 action ret_southeast { printf("Lexer_setdirection:: direction is southeast, returning 7\n");return 7;}
228 action ret_southwest { printf("Lexer_setdirection:: direction is southwest, returning 1\n");return 1;}
229
230 def = [_\0] %to(ret_south);
231 N = 'N'[_\0] %to(ret_north);
232 W = 'W' [_\0] %to(ret_west);
233 S = 'S' [_\0] %to(ret_south);
234 E = 'E' [_\0] %to(ret_east);
235 NW = 'NW' [_\0] %to(ret_northwest);
236 NE = 'NE' [_\0] %to(ret_northeast);
237 SW = 'SW' [_\0] %to(ret_southwest);
238 SE = 'SE' [_\0] %to(ret_southeast);
239
240 direction = (N | W | S | E | NW | NE | SW | SE | def);
241
242 main := direction;
243
244 write data nofinal noprefix noerror;
245
246
247 }%%
248
249
250 int
251 lexer_setdirection
252 (uint8_t* str, int size)
253 { uint8_t *p, *pe, *eof;
254 int cs;
255
256
257 p = str;
258 pe = str + size + 1;
259
260 printf("|--- Begin lexer_setdirection str = %s, p = %s, pe = %s ---|\n", str,p, pe);
261
262 %%write init;
263 %%write exec noend;
264
265 printf("|--- Error in: lexer_setdirection ---|\n");
266
267 return -1;
268 }
269
270
271
272 %%{
273 machine setstr;
274
275
276 action lex_setvlink {printf("Lexer_setstr:: Returning setvlink filetype for %s\n", str); type = 5; newstrt = lexer_lexsetvlink(str); fbreak;}
277 action lex_elevlink {printf("Lexer_setstr:: Returning elevlink filetype for %s\n", str); type = 6; newstrt = lexer_lexelevlink(str); fbreak;}
278 action lex_setmodel {printf("Lexer_setstr:: Returning setmodel filetype\n"); newstrt = lexer_lexsetmodel(str); type = 1; fbreak;}
279 action lex_setmap {printf("Lexer_setstr:: Returning setmap filetype\n"); newstrt = lexer_lexsetmap(str); type = 2; fbreak;}
280 action lex_elemodel {printf("Lexer_setstr:: Returning elemodel filetype for %s\n", str); newstrt = lexer_lexelemodel(str); type = 3; fbreak;}
281 action lex_elemap {printf("Lexer_setstr:: Returning elemap filetype for %s\n", str); newstrt = lexer_lexelemap(str); type = 4; fbreak;}
282 action lex_setolink { printf("Lexer_setstr:: Returning setolink filetype\n"); type = 8; newstrt = lexer_lexsetolink(str); fbreak;}
283 action lex_eleolink { printf("Lexer_setstr:: Returning eleolink filetype\n"); type = 7; newstrt = lexer_lexeleolink(str); fbreak;}
284 action p {printf("p = %s \n",p);}
285 action name_error {printf("In %s, there is a syntactic error. Make sure your set/element names dont conflict with the reserved keywords.\n", str);}
286
287
288 N = 'N';
289 W = 'W';
290 S = 'S';
291 E = 'E';
292 NW = 'NW';
293 NE = 'NE';
294 SW = 'SW';
295 SE = 'SE';
296
297 SS = 'SS';
298 direction = (N | W | S | E | NW | NE | SW | SE) $p;
299
300 SSD = SS direction;
301
302
303
304 name = alpha+ $p - SSD $p;
305 num = digit+ $p;
306 ref = '0x' $p alnum+ $p;
307
308
309 set_label = name | (name '_' ref);
310 ele_label = name | (name '_' ref);
311
312 model_types = (name) | (name '_' num '_' num) | (name '_' num);
313
314
315 set_model = set_label '_' SS %to(lex_setmodel);
316 set_map = set_label '_' '~' %to(lex_setmap);
317 ele_model = set_label '_' ele_label '_' SS %to(lex_elemodel);
318 ele_map = set_label '_' ele_label '_' '~' %to(lex_elemap);
319 set_olink = ref %to(lex_setolink) [\0] ;
320 ele_olink = set_label '_' '~' '_' ref [\0] %to(lex_eleolink);
321 set_vlink = set_label '_' '#' '_' (ref | ref '_' name) [\0] %to(lex_setvlink);
322 ele_vlink = set_label '_' ele_label '_' '#' '_' (ref | ref '_' name) [\0] %to(lex_elevlink);
323
324 main := (ele_map | set_model | set_map |ele_model | ele_vlink | set_vlink | set_olink | ele_olink);
325
326 write data;
327
328
329 }%%
330
331 int
332 lexer_setstr
333 (uint8_t* str, int size)
334 { uint8_t *p, *pe, *eof;
335 int cs, type, newstrt;
336
337 type = newstrt = 0;
338
339 p = str;
340 pe = str + size + 1;
341
342 printf("|--- Begin lexer_setstr with str = %s, p = %s, pe = %s ---|\n", str,p, pe);
343
344 %%write init;
345 %%write exec noend;
346
347 printf("|--- End lexer_setstr. Incrementing str by %d, type is %d ---|\n", newstrt, type);
348
349 return newstrt;
350 }
351
352 #endif
353
354
355 /* %%{ */
356 /* machine file_matcher; */
357
358 /* action call_ml { ts = p; fgoto set_hw ;} */
359 /* action call_tl { return 0;} */
360 /* action set_height {height = ttov(p, p-ts+1); ts = p;} */
361 /* action set_width { width = ttov(p, p-ts+1);} */
362 /* action call_lmf {lexer_lexmapfile(height, width); } */
363 /* action lex_error {printf("input error: character %c in filename %s is invalid\n = %s\n", fc, str, p);} */
364
365 /* #This machine determines the type of file we are lexing */
366 /* #and calls the appropriate machine to handle it. */
367
368 /* #TODO add mapping name */
369 /* width = digit+ %set_width; */
370 /* height = digit+ %set_height; */
371
372 /* set_hw := height . '_' . width [\0] %to(call_lmf); */
373
374 /* tok_segment = alnum; */
375 /* map_end = 'm' . '_' %to(call_ml); */
376 /* tok_end = alnum+ . [\0] %to(call_tl); */
377
378 /* file_matcher := (tok_segment+ . '_' )+ ( map_end | tok_end ); */
379
380 /* write data; */
381 /* }%% */
382
383 /* int */
384 /* lexer_matchfile */
385 /* (char* str, int size) */
386 /* { *p, *pe; */
387 /* char* ts; */
388 /* int cs, ntok, height, width; */
389
390 /* p = str; */
391 /* pe = p + size; */
392 /* height = width = 0; */
393
394 /* printf("Checking if filename is a map file:: filename = %s, p = %c, pe = %c\n", str, *p, *pe); */
395
396 /* %%write init; */
397 /* %%write exec noend; */
398
399 /* printf("Ending lexer_ismapfile on %s\n", str); */
400
401 /* return ntok; */
402 /* } */
403
404 /* %%{ */
405 /* machine vartype; */
406
407 /* action isele {return 0;} */
408 /* action ismodel {return 1;} */
409
410 /* set_name = alpha+; */
411 /* ele_name = alpha+; */
412 /* model_name = alpha+; */
413
414 /* ele = set_name '_' model_name '_' ele_name %isele; */
415 /* model = set_name '_' model_name [\0] %ismodel; */
416
417
418 /* ismodel := (ele | model); */
419
420 /* write data; */
421
422 /* }%% */
423
424 /* int */
425 /* lexer_ismodel */
426 /* (uint8_t* str, int size) */
427 /* { uint8_t *p, *pe, *eof; */
428 /* int cs; */
429
430 /* p = str; */
431 /* pe = p + size + 1; */
432
433 /* %%write init; */
434 /* %%write exec; */
435
436
437 /* } */