d27c40b13ae225f5cb0403e12375b581c548c407
[henge/webcc.git] / src / apc / lexer_lex.rl
1 /* Ragel State Machine for tokenizing text */
2 #include <stdio.h>
3 #include <string.h>
4 #include <apc/parser.tab.h>
5
6 extern void lexer_pushtok(int, YYSTYPE);
7
8 int lexer_lex(const char*);
9 int ipow(int, int);
10 int ttov(const char* str, int);
11 uint64_t ttor(const char* str, int);
12 char* ttos(const char* str, int);
13
14
15 #define MAX_TOK_LEN 64
16 #define MAX_TOKENS 16
17 #define MAX_STR_SIZE (MAX_TOK_LEN * MAX_TOKENS)
18
19
20 %%{
21 machine token_matcher;
22
23 # set up yylval and tok_t to be pushed to stack
24 action set_ref {
25 tok_t = REF; \
26 yylval.ref = ttor(ts, p-ts); \
27 lexer_pushtok(tok_t, yylval); }
28
29 action set_val { tok_t = NUM; \
30 yylval.val = ttov(ts, p-ts); \
31 lexer_pushtok(tok_t, yylval); }
32
33 action set_name { tok_t = NAME; \
34 yylval.str = ttos(ts, p-ts); \
35 lexer_pushtok(tok_t, yylval); }
36
37 action set_ts { ts = p; }
38
39 # instantiate machines for each possible token
40 ref = '0x' xdigit+ %set_ref;
41 val = digit+ %set_val;
42 name = alpha+ %set_name;
43 tok = ref | val | name;
44 segment = (tok . '_') %set_ts;
45
46 main := segment* . tok;
47 }%%
48
49
50 %%write data;
51
52 /* Scan filename and push the its tokens
53 onto the stack */
54 int lexer_lex (const char* str)
55 {
56 const char *p, *pe, *ts, *eof;
57 int cs, tok_t ; //tok_t == token type
58
59 p = ts = str;
60 pe = p + strlen(str) + 1;
61 %%write init;
62 %%write exec;
63
64 lexer_pushtok(tok_t, yylval);
65
66 printf (str);
67 return 1;
68 }
69
70 int ipow(int base, int exp)
71 {
72 int result = 1;
73 while (exp)
74 {
75 if (exp & 1)
76 result = result * base;
77 exp = exp >> 1;
78 base *= base;
79 }
80
81 return result;
82 }
83
84 /* Token to Value */
85 int ttov(const char* str, int len)
86 {
87 int i, val = 0;
88
89 for (i = 0; i < len; i++)
90 {
91 val += ((str[len - (i + 1)] - '0') * ipow(10,i));
92 }
93
94 return val;
95 }
96
97 uint64_t ttor(const char* str, int len)
98 {
99 int i;
100 uint64_t num = 0;
101
102 for (i = 0; i < len; i++)
103 {
104 num += ((str[len - (i + 1)] - '0') * ipow(10,i));
105 }
106
107 return num;
108 }
109
110 char* ttos(const char* str, int len)
111 {
112 int i;
113 char token_buf[MAX_TOK_LEN];
114
115 memmove(token_buf, str, len);
116 token_buf[len+1] = '\0';
117
118 return strdup(token_buf);
119 }