lexer_lex now returns the number of tokens it pushed
[henge/webcc.git] / src / apc / lexer_lex.rl
1 /* Ragel State Machine for tokenizing text */
2 #include <stdio.h>
3 #include <string.h>
4 #include <apc/parser.tab.h>
5
6 extern void lexer_pushtok(int, YYSTYPE);
7
8 int lexer_lex(const char*);
9 int ipow(int, int);
10 int ttov(const char* str, int);
11 uint64_t ttor(const char* str, int);
12 char* ttos(const char* str, int);
13
14
15 #define MAX_TOK_LEN 64
16 #define MAX_TOKENS 16
17 #define MAX_STR_SIZE (MAX_TOK_LEN * MAX_TOKENS)
18
19
20 %%{
21 machine token_matcher;
22
23 # set up yylval and tok_t to be pushed to stack
24 action set_ref {
25 tok_t = REF; \
26 yylval.ref = ttor(ts, p-ts); \
27 lexer_pushtok(tok_t, yylval); \
28 num_tokens++; }
29
30 action set_val { tok_t = NUM; \
31 yylval.val = ttov(ts, p-ts); \
32 lexer_pushtok(tok_t, yylval); \
33 num_tokens++; }
34
35 action set_name { tok_t = NAME; \
36 yylval.str = ttos(ts, p-ts); \
37 lexer_pushtok(tok_t, yylval); \
38 num_tokens++; }
39
40 action set_ts { ts = p; }
41
42 # instantiate machines for each possible token
43 ref = '0x' xdigit+ %set_ref;
44 val = digit+ %set_val;
45 name = alpha+ %set_name;
46 tok = ref | val | name;
47 segment = (tok . '_') %set_ts;
48
49 main := segment* . tok;
50 }%%
51
52
53 %%write data;
54
55 /* Scan filename and push the its tokens
56 onto the stack */
57 int lexer_lex (const char* str)
58 {
59 const char *p, *pe, *ts, *eof;
60 int cs, tok_t, num_tokens; //tok_t == token type
61
62 num_tokens = 0;
63
64 p = ts = str;
65 pe = p + strlen(str) + 1;
66 %%write init;
67 %%write exec;
68
69
70 printf (str);
71 return num_tokens;
72 }
73
74 int ipow(int base, int exp)
75 {
76 int result = 1;
77 while (exp)
78 {
79 if (exp & 1)
80 result = result * base;
81 exp = exp >> 1;
82 base *= base;
83 }
84
85 return result;
86 }
87
88 /* Token to Value */
89 int ttov(const char* str, int len)
90 {
91 int i, val = 0;
92
93 for (i = 0; i < len; i++)
94 {
95 val += ((str[len - (i + 1)] - '0') * ipow(10,i));
96 }
97
98 return val;
99 }
100
101 uint64_t ttor(const char* str, int len)
102 {
103 int i;
104 uint64_t num = 0;
105
106 for (i = 0; i < len; i++)
107 {
108 num += ((str[len - (i + 1)] - '0') * ipow(10,i));
109 }
110
111 return num;
112 }
113
114 char* ttos(const char* str, int len)
115 {
116 int i;
117 char token_buf[MAX_TOK_LEN];
118
119 memmove(token_buf, str, len);
120 token_buf[len+1] = '\0';
121
122 return strdup(token_buf);
123 }