filename terminator is now a .
[henge/apc.git] / src / lexer.rl
1 #include <stdio.h>
2 #include <stdint.h>
3 #include <stdlib.h>
4 #include <errno.h>
5 #include <unistr.h>
6 #include "parser.tab.h"
7 #include "apc.h"
8 #include "print.h"
9 /* Public */
10 int lexer_init(void);
11 void lexer_quit(void);
12 int lexer_lexfile(uint8_t*);
13 int lexer_lexdir(uint8_t*);
14 int lexer_lexstring(uint8_t*, int);
15 //apc.c
16 static
17 yypstate* pstate;
18 static
19 yycstate* cstate;
20 /* Ring buffer for keeping lexical tokens valid for up to 255 tokens */
21 static
22 YYSTYPE lval_stack[0xFF + 1];
23 static
24 uint8_t lval_offs;
25 #define $($)#$
26 #define PUSHTOK(T,L) yypush_parse(pstate, T, (L), cstate)
27 #define LEXTOK(T,Y,L) do { \
28 if (DEBUG) { \
29 ulc_fprintf(stdout, "["$(T)); \
30 switch (T) { \
31 case NAME: case PATH: \
32 ulc_fprintf(stdout, "->%U", L); break; \
33 case REF: \
34 ulc_fprintf(stdout, "->%X", L); break; \
35 default: break; \
36 } \
37 ulc_fprintf(stdout, "]"); \
38 } \
39 lval_stack[lval_offs].Y = L; \
40 PUSHTOK(T,lval_stack + lval_offs); \
41 lval_offs++; \
42 ntok++; \
43 } while (0)
44 #define PUSHFACE(F) LEXTOK(FACING, face, F)
45 #define PUSHREF(R) LEXTOK(REF, ref, R)
46 #define PUSHLINK() LEXTOK(LINK, val, 0)
47 #define PUSHNUM(N) LEXTOK(NUM, val, N)
48 #define PUSHNAME(N) LEXTOK(NAME, str, N)
49 #define PUSHOP(O) LEXTOK(O, val, 0)
50 #define PUSHPATH(P) LEXTOK(PATH, str, P)
51
52 /* Lexstring is the main lexer for APC and is generated by ragel. It lexes file names of files
53 that have been scanned and pushes their types and values into the tok_stack, which yyparse
54 eventually calls during parsing. */
55
56 %%{
57 machine lexstring;
58
59 # set up yylval and tok_t to be pushed to stack
60 action push_ref { errno = 0;
61 lval.ref = strtoll((char*)ts,NULL,16);
62 if (errno)
63 { ulc_fprintf(stderr, "Invalid hex number in file %U\n",str);
64 exit(1);
65 }
66 PUSHREF(lval.ref);
67 }
68 action push_link { PUSHLINK(); }
69 action push_val { errno = 0;
70 lval.val = strtoll((char*)ts,NULL,10);
71 if (errno)
72 { ulc_fprintf(stderr, "strtoll could not parse %U\n",str);
73 exit(1);
74 }
75 PUSHNUM(lval.val);
76 }
77 action push_name { PUSHNAME(ts); }
78 action push_map { PUSHOP(MAP); }
79 action set_ts { ts = p; }
80 action push_SS { PUSHOP(SS); }
81 action push_S { PUSHFACE(SFACE); }
82 action push_SW { PUSHFACE(SWFACE); }
83 action push_W { PUSHFACE(WFACE); }
84 action push_NW { PUSHFACE(NWFACE); }
85 action push_N { PUSHFACE(NFACE); }
86 action push_NE { PUSHFACE(NEFACE); }
87 action push_E { PUSHFACE(EFACE); }
88 action push_SE { PUSHFACE(SEFACE); }
89 action ref_error { ulc_fprintf(stderr, "ref from %U to %U has an inappropriate amount of hex digits, it must have eight.\n", ts, p);
90 exit(1);
91 }
92 action p { dprintf("Lexer_lexstring:: p = %U\n", p); }
93
94 N = 'N' %push_N;
95 W = 'W' %push_W;
96 S = 'S' %push_S;
97 E = 'E' %push_E;
98 NW = 'NW' %push_NW;
99 NE = 'NE' %push_NW;
100 SW = 'SW' %push_SW;
101 SE = 'SE' %push_SE;
102
103 tok_delimiter = [_\0];
104
105 direction = (N | W | S | E | NW | NE | SW | SE) ;
106 dimensions = (digit+ - '0') >set_ts %push_val 'x' (digit+ - '0') >set_ts %push_val;
107 link = '#' %push_link;
108 SS = ('+SS' %to(push_SS)) | ('+SS' %to(push_SS) link ) ;
109 ref = '0x' >set_ts alnum{8} $err(ref_error) %push_ref ;
110 val = digit+ >set_ts %push_val ;
111 name = lower >set_ts (lower | digit)* %push_name ;
112 map = '+MAP' %to(push_map);
113 tok = (name | val | ref | dimensions | map | link | SS | direction);
114
115
116 main := (tok tok_delimiter)* tok [.];
117
118 write data nofinal noerror noprefix;
119
120 }%%
121
122 int lexer_init
123 ( void )
124 { pstate = yypstate_new();
125 cstate = yycstate_new();
126 lval_offs = 0;
127 return !pstate || !cstate;
128 return en_main == 1;
129 }
130
131 void lexer_quit
132 ( void )
133 { if (pstate) yypstate_delete(pstate);
134 if (cstate) yycstate_delete(cstate);
135 }
136
137 int lexer_lexstring
138 ( uint8_t* str,
139 int size
140 )
141 { uint8_t* p, * ts, * pe, * eof;
142 int cs, ntok;
143 YYSTYPE lval;
144 ntok = 0;
145 p = ts = str;
146 pe = eof = p + size + 1;
147 %%write init;
148 %%write exec;
149 return ntok;
150 }
151
152 /* Lexical analysis of a file
153 Strips a filename to its base name, then sends it to lexer_lexstring before
154 pushing a PATH token with the filename
155 Returns the number of tokens pushed to the parser.
156 */
157 int lexer_lexfile
158 ( uint8_t* filename )
159 { uint8_t* last_period,* iter,* filename_end;
160 int ntok;
161 last_period = NULL;
162 printf("size of file = %d\n", u8_strlen(filename));
163 for (iter = filename; *iter; iter++)
164 switch (*iter)
165 { // Keep track of the last 'dot' in the name
166 case '.' : last_period = iter; continue;
167 // replace '_' with '\0' so bison can use strlen on them as tokens.
168 case '_' : *iter = '\0';
169 default: continue;
170 }
171 // Mark the end of the filename
172 filename_end = iter;
173 // Lex from either the last period, if present, or filename end
174 dprintf("%U\n\t",filename);
175 ntok = (last_period) ?
176 lexer_lexstring(filename, (int)(last_period - filename))
177 : lexer_lexstring(filename, (int)(iter - filename));
178
179 // Replace nulls with their original '_'
180 for (iter = filename; iter < filename_end; iter++)
181 if (*iter == '\0')
182 *iter = '_';
183 PUSHPATH(filename);
184 dprintf("\n\t[%i Token%s]\n", ntok, (ntok > 1) ? "s" : "");
185 return ntok + 1;
186 }
187
188 int lexer_lexdir
189 ( uint8_t* dirname )
190 { int ntok;
191 ntok = 0;
192 if (DEBUG) putchar('\t');
193 PUSHNAME(dirname);
194 PUSHOP(CLOPEN);
195 if (DEBUG) putchar('\n');
196 return ntok;
197 }
198
199 int lexer_closedir
200 ( void )
201 { int ntok = 0;
202 if (DEBUG) putchar('\t');
203 PUSHOP(CLCLOSE);
204 if (DEBUG) putchar('\n');
205 return ntok;
206 }