merge
[henge/webcc.git] /
1 /* Ragel State Machine for tokenizing text */
2 #include <stdio.h>
3 #include <string.h>
4 #include <apc/parser.tab.h>
5
6 extern void lexer_pushtok(int, YYSTYPE);
7
8 int lexer_lex(const char*);
9 int ipow(int, int);
10 int ttov(const char* str, int);
11 uint64_t ttor(const char* str, int);
12 char* ttos(const char* str, int);
13
14
15 #define MAX_TOK_LEN 64
16 #define MAX_TOKENS 16
17 #define MAX_STR_SIZE (MAX_TOK_LEN * MAX_TOKENS)
18 #define $($)#$
19 #define PUSHTOK(TOK,LFUNC,UTYPE) \
20 do { \
21 printf("PUSHTOK(" $(TOK) $(LFUNC) $(UTYPE) ")\n"); \
22 tok_t = TOK; \
23 yylval.UTYPE = LFUNC(ts, p-ts); \
24 lexer_pushtok(tok_t, yylval); \
25 ++ntok; \
26 } while (0)
27
28 %%{
29 machine token_matcher;
30
31 # set up yylval and tok_t to be pushed to stack
32 action set_ref { PUSHTOK(REF, ttor, ref); }
33 action set_val { PUSHTOK(NUM, ttov, val); }
34 action set_name { PUSHTOK(NAME, ttos, str); }
35 action set_ts { ts = p; }
36
37 # instantiate machines for each possible token
38 ref = '0x' xdigit+ %set_ref;
39 val = digit+ %set_val;
40 name = alpha+ %set_name;
41 tok = ref | val | name;
42 segment = (tok . '_') %set_ts;
43
44 main := segment* . tok;
45 }%%
46
47
48 %%write data;
49
50 /* Scan filename and push the its tokens
51 onto the stack */
52 int lexer_lex (const char* str)
53 {
54 const char *p, *pe, *ts, *eof;
55 int cs, tok_t, ntok = 0;
56 printf ("Lexing: %s\n",str);
57 p = ts = str;
58 pe = p + strlen(str) + 1;
59 %%write init;
60 %%write exec;
61 printf ("Lexed %i tokens\n",ntok);
62 return ntok;
63 }
64
65 int ipow(int base, int exp)
66 {
67 int result = 1;
68 while (exp)
69 {
70 if (exp & 1)
71 result = result * base;
72 exp = exp >> 1;
73 base *= base;
74 }
75
76 return result;
77 }
78
79 /* Token to Value */
80 int ttov(const char* str, int len)
81 {
82 int i, val = 0;
83
84 for (i = 0; i < len; i++)
85 {
86 val += ((str[len - (i + 1)] - '0') * ipow(10,i));
87 }
88
89 return val;
90 }
91
92 uint64_t ttor(const char* str, int len)
93 {
94 int i;
95 uint64_t num = 0;
96
97 for (i = 0; i < len; i++)
98 {
99 num += ((str[len - (i + 1)] - '0') * ipow(10,i));
100 }
101
102 return num;
103 }
104
105 char* ttos(const char* str, int len)
106 {
107 int i;
108 char token_buf[MAX_TOK_LEN];
109
110 memmove(token_buf, str, len);
111 token_buf[len+1] = '\0';
112
113 return strdup(token_buf);
114 }