X-Git-Url: https://www.kengrimes.com/gitweb/?p=henge%2Fwebcc.git;a=blobdiff_plain;f=src%2Fapc%2Flexer_lex.rl;fp=src%2Fapc%2Flexer_lex.rl;h=cb068d6e237e2bd23cf884c9ad4d659660c30455;hp=0000000000000000000000000000000000000000;hb=dc6e33a26d6488e388817d4dd255dcdff22b4a2a;hpb=a4acd734f2064a6876849652e7f896ecade6497a diff --git a/src/apc/lexer_lex.rl b/src/apc/lexer_lex.rl new file mode 100644 index 0000000..cb068d6 --- /dev/null +++ b/src/apc/lexer_lex.rl @@ -0,0 +1,123 @@ +/* Ragel State Machine for tokenizing text */ +#include +#include +#include + +extern void lexer_pushtok(int, YYSTYPE); + +int lexer_lex(const char*); +int ipow(int, int); +int ttov(const char* str, int); +uint64_t ttor(const char* str, int); +char* ttos(const char* str, int); + + +#define MAX_TOK_LEN 64 +#define MAX_TOKENS 16 +#define MAX_STR_SIZE (MAX_TOK_LEN * MAX_TOKENS) + + +%%{ + machine token_matcher; + + # set up yylval and tok_t to be pushed to stack + action set_ref { + tok_t = REF; \ + yylval.ref = ttor(ts, p-ts); \ + lexer_pushtok(tok_t, yylval); \ + ts = p; } + + action set_val { tok_t = NUM; \ + yylval.val = ttov(ts, p-ts); \ + lexer_pushtok(tok_t, yylval); \ + ts = p; } + + action set_name { tok_t = NAME; \ + yylval.str = ttos(ts, p-ts); \ + lexer_pushtok(tok_t, yylval); \ + ts = p; } + + # instantiate machines for each possible token + ref = '0x' xdigit+ %set_ref; + val = digit+ %set_val; + name = alpha+ %set_name; + tok = ref | val | name; + + main := (tok . '_')* . tok; +}%% + + +%%write data; + +/* 0xxdigit+ => tok_t REF, yylval.ref = uint64_t + [0-9]+ => tok_t NUM, yylval.val = int + [a-zA-Z]+ => tok_t NAME, yylval.str = char* */ + +/* Scan filename and push the its tokens + onto the stack */ +int lexer_lex (const char* str) +{ + const char *p, *pe, *ts, *eof; + int cs, tok_t ; //tok_t == token type + + p = ts = str; + pe = p + strlen(str) + 1; + %%write init; + %%write exec; + + lexer_pushtok(tok_t, yylval); + + printf (str); + return 1; +} + +int ipow(int base, int exp) +{ + int result = 1; + while (exp) + { + if (exp & 1) + result = result * base; + exp = exp >> 1; + base *= base; + } + + return result; +} + +/* Token to Value */ +int ttov(const char* str, int len) +{ + int i, val = 0; + + for (i = 0; i < len; i++) + { + val += ((str[len - (i + 1)] - '0') * ipow(10,i)); + } + + return val; +} + +uint64_t ttor(const char* str, int len) +{ + int i; + uint64_t num = 0; + + for (i = 0; i < len; i++) + { + num += ((str[len - (i + 1)] - '0') * ipow(10,i)); + } + + return num; +} + +char* ttos(const char* str, int len) +{ + int i; + char token_buf[MAX_TOK_LEN]; + + memmove(token_buf, str, len); + token_buf[len+1] = '\0'; + + return strdup(token_buf); +}