/* Ragel State Machine for tokenizing text */ #include #include #include extern void lexer_pushtok(int, YYSTYPE); int lexer_lex(const char*); int ipow(int, int); int ttov(const char* str, int); uint64_t ttor(const char* str, int); char* ttos(const char* str, int); #define MAX_TOK_LEN 64 #define MAX_TOKENS 16 #define MAX_STR_SIZE (MAX_TOK_LEN * MAX_TOKENS) #define $($)#$ #define PUSHTOK(TOK,LFUNC,UTYPE) \ do { \ printf("PUSHTOK(" $(TOK) $(LFUNC) $(UTYPE) ")\n"); \ tok_t = TOK; \ yylval.UTYPE = LFUNC(ts, p-ts); \ lexer_pushtok(tok_t, yylval); \ ++ntok; \ } while (0) %%{ machine token_matcher; # set up yylval and tok_t to be pushed to stack action set_ref { PUSHTOK(REF, ttor, ref); } action set_val { PUSHTOK(NUM, ttov, val); } action set_name { PUSHTOK(NAME, ttos, str); } action set_ts { ts = p; } # instantiate machines for each possible token ref = '0x' xdigit+ %set_ref; val = digit+ %set_val; name = alpha+ %set_name; tok = ref | val | name; segment = (tok . '_') %set_ts; main := segment* . tok; }%% %%write data; /* Scan filename and push the its tokens onto the stack */ int lexer_lex (const char* str) { const char *p, *pe, *ts, *eof; int cs, tok_t, ntok = 0; printf ("Lexing: %s\n",str); p = ts = str; pe = p + strlen(str) + 1; %%write init; %%write exec; printf ("Lexed %i tokens\n",ntok); return ntok; } int ipow(int base, int exp) { int result = 1; while (exp) { if (exp & 1) result = result * base; exp = exp >> 1; base *= base; } return result; } /* Token to Value */ int ttov(const char* str, int len) { int i, val = 0; for (i = 0; i < len; i++) { val += ((str[len - (i + 1)] - '0') * ipow(10,i)); } return val; } uint64_t ttor(const char* str, int len) { int i; uint64_t num = 0; for (i = 0; i < len; i++) { num += ((str[len - (i + 1)] - '0') * ipow(10,i)); } return num; } char* ttos(const char* str, int len) { int i; char token_buf[MAX_TOK_LEN]; memmove(token_buf, str, len); token_buf[len+1] = '\0'; return strdup(token_buf); }