c7600aecdeb93be1d9603eda873b9b9dfcb0d545
[henge/apc.git] / src / lexer.rl
1 #include <stdio.h>
2 #include <stdint.h>
3 #include <stdlib.h>
4 #include <errno.h>
5 #include <unistr.h>
6 #include "parser.tab.h"
7 #include "apc.h"
8 #include "print.h"
9 /* Public */
10 int lexer_init(void);
11 void lexer_quit(void);
12 int lexer_lexfile(uint8_t*);
13 int lexer_lexdir(uint8_t*);
14 int lexer_lexstring(uint8_t*, int);
15 //apc.c
16 static
17 yypstate* pstate;
18 static
19 yycstate* cstate;
20 /* Ring buffer for keeping lexical tokens valid for up to 255 tokens */
21 static
22 YYSTYPE lval_stack[0xFF + 1];
23 static
24 uint8_t lval_offs;
25 #define $($)#$
26 #define PUSHTOK(T,L) yypush_parse(pstate, T, (L), cstate)
27 #define LEXTOK(T,Y,L) do { \
28 if (DEBUG) { \
29 ulc_fprintf(stdout, "["$(T)); \
30 switch (T) { \
31 case NAME: case PATH: \
32 ulc_fprintf(stdout, "->%U", L); break; \
33 case REF: \
34 ulc_fprintf(stdout, "->%X", L); break; \
35 default: break; \
36 } \
37 ulc_fprintf(stdout, "]"); \
38 } \
39 lval_stack[lval_offs].Y = L; \
40 PUSHTOK(T,lval_stack + lval_offs); \
41 lval_offs++; \
42 ntok++; \
43 } while (0)
44 #define PUSHFACE(F) LEXTOK(FACING, face, F)
45 #define PUSHREF(R) LEXTOK(REF, ref, R)
46 #define PUSHLINK() LEXTOK(LINK, val, 0)
47 #define PUSHNUM(N) LEXTOK(NUM, val, N)
48 #define PUSHNAME(N) LEXTOK(NAME, str, N)
49 #define PUSHOP(O) LEXTOK(O, val, 0)
50 #define PUSHPATH(P) LEXTOK(PATH, str, P)
51
52 /* Lexstring is the main lexer for APC and is generated by ragel. It lexes file names of files
53 that have been scanned and pushes their types and values into the tok_stack, which yyparse
54 eventually calls during parsing. */
55
56 %%{
57 machine lexstring;
58
59 # set up yylval and tok_t to be pushed to stack
60 action push_ref { errno = 0;
61 lval.ref = strtoll((char*)ts,NULL,16);
62 if (errno)
63 { ulc_fprintf(stderr, "Invalid hex number in file %U\n",str);
64 exit(1);
65 }
66 PUSHREF(lval.ref);
67 }
68 action push_link { PUSHLINK(); }
69 action push_val { errno = 0;
70 lval.val = strtoll((char*)ts,NULL,10);
71 if (errno)
72 { ulc_fprintf(stderr, "strtoll could not parse %U\n",str);
73 exit(1);
74 }
75 PUSHNUM(lval.val);
76 }
77 action push_name { PUSHNAME(ts); }
78 action push_map { PUSHOP(MAP); }
79 action set_ts { ts = p; }
80 action push_SS { PUSHOP(SS); }
81 action push_S { PUSHFACE(SFACE); }
82 action push_SW { PUSHFACE(SWFACE); }
83 action push_W { PUSHFACE(WFACE); }
84 action push_NW { PUSHFACE(NWFACE); }
85 action push_N { PUSHFACE(NFACE); }
86 action push_NE { PUSHFACE(NEFACE); }
87 action push_E { PUSHFACE(EFACE); }
88 action push_SE { PUSHFACE(SEFACE); }
89 action ref_error { ulc_fprintf(stderr, "ref from %U to %U has an inappropriate amount of hex digits, it must have eight.\n", ts, p);
90 exit(1);
91 }
92 action p { dprintf("Lexer_lexstring:: p = %U\n", p); }
93
94 N = 'N' %push_N;
95 W = 'W' %push_W;
96 S = 'S' %push_S;
97 E = 'E' %push_E;
98 NW = 'NW' %push_NW;
99 NE = 'NE' %push_NW;
100 SW = 'SW' %push_SW;
101 SE = 'SE' %push_SE;
102
103 tok_delimiter = [_\0];
104
105 direction = (N | W | S | E | NW | NE | SW | SE) ;
106 dimensions = (digit+ - '0') >set_ts %push_val 'x' (digit+ - '0') >set_ts %push_val;
107 link = '#' %push_link;
108 SS = ('+SS' %to(push_SS)) | ('+SS' %to(push_SS) link ) ;
109 ref = '0x' >set_ts alnum{8} $err(ref_error) %push_ref ;
110 val = digit+ >set_ts %push_val ;
111 name = lower >set_ts (lower | digit)* %push_name ;
112 map = '+MAP' %to(push_map);
113 tok = (name | val | ref | dimensions | map | link | SS | direction);
114
115
116 main := (tok tok_delimiter)* tok [\0];
117
118 write data nofinal noerror noprefix;
119
120 }%%
121
122 int lexer_init
123 ( void )
124 { pstate = yypstate_new();
125 cstate = yycstate_new();
126 lval_offs = 0;
127 return !pstate || !cstate;
128 return en_main == 1;
129 }
130
131 void lexer_quit
132 ( void )
133 { if (pstate) yypstate_delete(pstate);
134 if (cstate) yycstate_delete(cstate);
135 }
136
137 int lexer_lexstring
138 ( uint8_t* str,
139 int size
140 )
141 { uint8_t* p, * ts, * pe, * eof;
142 int cs, ntok;
143 YYSTYPE lval;
144
145 ntok = 0;
146 p = ts = str;
147 pe = eof = p + size + 1;
148
149 dprintf("\n|---Begin lexstring on p = %U, pe = %p.---|\n",p, pe);
150
151 %%write init;
152 %%write exec;
153
154 dprintf("\n|---Ending lexstring of file %U, pushed %d tokens.---|\n",str, ntok);
155
156 return ntok;
157 }
158
159 /* Lexical analysis of a file
160 Strips a filename to its base name, then sends it to lexer_lexstring before
161 pushing a PATH token with the filename
162 Returns the number of tokens pushed to the parser.
163 */
164 int lexer_lexfile
165 ( uint8_t* filename )
166 { uint8_t* last_period,* iter,* filename_end;
167 int ntok;
168 last_period = NULL;
169 for (iter = filename; *iter; iter++)
170 switch (*iter)
171 { // Keep track of the last 'dot' in the name
172 case '.' : last_period = iter; continue;
173 // replace '_' with '\0' so bison can use strlen on them as tokens.
174 case '_' : *iter = '\0';
175 default: continue;
176 }
177 // Mark the end of the filename
178 filename_end = iter;
179 // Lex from either the last period, if present, or filename end
180 ntok = (last_period) ?
181 lexer_lexstring(filename, (int)(last_period - filename))
182 : lexer_lexstring(filename, (int)(iter - filename));
183 // Replace nulls with their original '_'
184 for (iter = filename; iter < filename_end; iter++)
185 if (*iter == '\0')
186 *iter = '_';
187 PUSHPATH(filename);
188 return ntok + 1;
189 }
190
191 int lexer_lexdir
192 ( uint8_t* dirname )
193 { int ntok;
194 ntok = 0;
195 PUSHNAME(dirname);
196 PUSHOP(CLOPEN);
197 return ntok;
198 }
199
200 int lexer_closedir
201 ( void )
202 { int ntok = 0;
203 PUSHOP(CLCLOSE);
204 return ntok;
205 }