edb9a6abe880cebd351ccbf92f27f529211e5577
[henge/webcc.git] / src / apc / lexer_lex.rl
1 /* Ragel State Machine for tokenizing text */
2 #include <stdio.h>
3 #include <string.h>
4 #include <apc/parser.tab.h>
5
6 extern void lexer_pushtok(int, YYSTYPE);
7
8 int lexer_lex(const char*);
9 int ipow(int, int);
10 int ttov(const char* str, int);
11 uint64_t ttor(const char* str, int);
12 char* ttos(const char* str, int);
13
14
15 #define MAX_TOK_LEN 64
16 #define MAX_TOKENS 16
17 #define MAX_STR_SIZE (MAX_TOK_LEN * MAX_TOKENS)
18 #define $($)#$
19 #define PUSHTOK(TOK,LFUNC,UTYPE) \
20 do { \
21 printf("PUSHTOK(" $(TOK) $(LFUNC) $(UTYPE) ")\n"); \
22 tok_t = TOK; \
23 yylval.UTYPE = LFUNC(ts, p-ts+1); \
24 lexer_pushtok(tok_t, yylval); \
25 ++ntok; \
26 } while (0)
27
28 %%{
29 machine token_matcher;
30
31 # set up yylval and tok_t to be pushed to stack
32 action set_ref { PUSHTOK(REF, ttor, ref); }
33 action set_val { PUSHTOK(NUM, ttov, val); }
34 action set_name { PUSHTOK(NAME, ttos, str); }
35 action set_ts { ts = p; }
36 action lex_error {printf("input error: character %c in filename %s is invalid\n", fc, str);}
37
38 # instantiate machines for each possible token
39 ref = '0x'. xdigit+ %set_ref;
40 val = digit+ %set_val;
41 name = alpha+ %set_name;
42 tok = (ref | val | name);
43 segment = tok . [_\0] %set_ts;
44
45 main := segment+ ;
46
47 }%%
48
49
50 %%write data;
51 /* Scan filename and push the its tokens
52 onto the stack */
53 int lexer_lex (const char* str)
54 {
55 const char *p, *pe, *ts, *eof;
56 int cs, tok_t, ntok = 0;
57 printf ("Lexing: %s\n",str);
58 p = ts = str;
59 pe = p + strlen(str) + 1;
60 %%write init;
61 %%write exec;
62 printf ("Lexed %i tokens\n",ntok);
63 return ntok;
64 }
65
66 int ipow(int base, int exp)
67 {
68 int result = 1;
69 while (exp)
70 {
71 if (exp & 1)
72 result = result * base;
73 exp = exp >> 1;
74 base *= base;
75 }
76
77 return result;
78 }
79
80 /* Token to Value */
81 int ttov(const char* str, int len)
82 {
83 int i, val = 0;
84
85 for (i = 0; i < len; i++)
86 {
87 val += ((str[len - (i + 1)] - '0') * ipow(10,i));
88 }
89
90 return val;
91 }
92
93 uint64_t ttor(const char* str, int len)
94 {
95 int i;
96 uint64_t num = 0;
97
98 for (i = 0; i < len; i++)
99 {
100 num += ((str[len - (i + 1)] - '0') * ipow(10,i));
101 }
102
103 return num;
104 }
105
106 char* ttos(const char* str, int len)
107 {
108 int i;
109 char token_buf[MAX_TOK_LEN];
110
111 memmove(token_buf, str, len);
112 token_buf[len+1] = '\0';
113
114 return strdup(token_buf);
115 }