/* Ragel State Machine for tokenizing text */ #include #include #include extern void lexer_pushtok(int, YYSTYPE); int lexer_lex(const char*); int ipow(int, int); int ttov(const char* str, int); uint64_t ttor(const char* str, int); char* ttos(const char* str, int); #define MAX_TOK_LEN 64 #define MAX_TOKENS 16 #define MAX_STR_SIZE (MAX_TOK_LEN * MAX_TOKENS) %%{ machine token_matcher; # set up yylval and tok_t to be pushed to stack action set_ref { tok_t = REF; \ yylval.ref = ttor(ts, p-ts); \ lexer_pushtok(tok_t, yylval); \ num_tokens++; } action set_val { tok_t = NUM; \ yylval.val = ttov(ts, p-ts); \ lexer_pushtok(tok_t, yylval); \ num_tokens++; } action set_name { tok_t = NAME; \ yylval.str = ttos(ts, p-ts); \ lexer_pushtok(tok_t, yylval); \ num_tokens++; } action set_ts { ts = p; } # instantiate machines for each possible token ref = '0x' xdigit+ %set_ref; val = digit+ %set_val; name = alpha+ %set_name; tok = ref | val | name; segment = (tok . '_') %set_ts; main := segment* . tok; }%% %%write data; /* Scan filename and push the its tokens onto the stack */ int lexer_lex (const char* str) { const char *p, *pe, *ts, *eof; int cs, tok_t, num_tokens; //tok_t == token type num_tokens = 0; p = ts = str; pe = p + strlen(str) + 1; %%write init; %%write exec; printf (str); return num_tokens; } int ipow(int base, int exp) { int result = 1; while (exp) { if (exp & 1) result = result * base; exp = exp >> 1; base *= base; } return result; } /* Token to Value */ int ttov(const char* str, int len) { int i, val = 0; for (i = 0; i < len; i++) { val += ((str[len - (i + 1)] - '0') * ipow(10,i)); } return val; } uint64_t ttor(const char* str, int len) { int i; uint64_t num = 0; for (i = 0; i < len; i++) { num += ((str[len - (i + 1)] - '0') * ipow(10,i)); } return num; } char* ttos(const char* str, int len) { int i; char token_buf[MAX_TOK_LEN]; memmove(token_buf, str, len); token_buf[len+1] = '\0'; return strdup(token_buf); }