/* Public */
int lexer_init(void);
int lexer(void);
-int lexer_lex(const char*);
-void lexer_pushtok(int, int);
+void lexer_pushtok(int, YYSTYPE);
+extern //ragel
+int lexer_lex(const char*);
struct dirent* lexer_direntpa[DE_STACKSIZE];
/* Private */
extern //scanner.c
YYSTYPE yylval;
static
struct tok
-{ int lval;
- int tok;
+{ union YYSTYPE val; //token val
+ int tt; //token type
} token_stack[TK_STACKSIZE];
static
union tokp
-{ int* i;
- struct tok* t;
+{ int* tpt; //token pointer type
+ struct tok* tok;
+ union YYSTYPE* tvp; //token value pointer
} tks, tkx;
static
struct dirent** dps;
times in a sequence!
*/
#define TK_STACK (token_stack)
-#define TK_STACKP (tks.t)
-#define TK_STACKPI (tks.i)
-#define TK_STACKX (tkx.t)
-#define TK_STACKXI (tkx.i)
+#define TK_STACKP (tks.tok)
+#define TK_STACKPI (tks.tpt)
+#define TK_STACKPL (tks.tvp)
+#define TK_STACKX (tkx.tok)
+#define TK_STACKXI (tkx.tpt)
#define TK_LEN() (TK_STACKP - TK_STACKX)
#define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK)
#define TK_POP() (*TK_STACKP++)
#define TK_POPI() (*TK_STACKPI++);
+#define TK_POPL() (*TK_STACKPL++);
#define TK_PUSH(T,L) (*TK_STACKX++ = (struct tok){L,T})
/* Initializer
return 0;
}
}
- yylval.val = TK_POPI();
+ yylval = TK_POPL();
return TK_POPI();
}
-/* Lexical Analysis
- Ragel state machine for tokenizing text.
-*/
-int lexer_lex
-(const char* str)
-{ lexer_pushtok(1, 2);
- printf (str);
- return 1;
-}
-
/* Token Receiver
This receiver takes a struct tok and pushes it to the FIFO stack.
void lexer_pushtok
#define S(S)#S //stringifier
#define ERR_TK "Fatal: Generated over " S(TK_STACKSIZE) " tokens in one pass."
-( int tok, int lval )
+( int tok, YYSTYPE lval )
{ if (TK_LEN() >= TK_STACKSIZE)
{ fprintf(stderr, ERR_TK);
exit(EXIT_FAILURE);
--- /dev/null
+
+#line 1 "lexer_lex.rl"
+/* Ragel State Machine for tokenizing text */
+#include <stdio.h>
+#include <string.h>
+#include <apc/parser.tab.h>
+
+extern void lexer_pushtok(int, YYSTYPE);
+
+int lexer_lex(const char*);
+int ipow(int, int);
+int ttov(const char* str, int);
+uint64_t ttor(const char* str, int);
+char* ttos(const char* str, int);
+
+
+#define MAX_TOK_LEN 64
+#define MAX_TOKENS 16
+#define MAX_STR_SIZE (MAX_TOK_LEN * MAX_TOKENS)
+
+
+
+#line 47 "lexer_lex.rl"
+
+
+
+
+#line 29 "lexer_lex.c"
+static const char _token_matcher_actions[] = {
+ 0, 1, 0, 1, 1, 1, 2
+};
+
+static const char _token_matcher_key_offsets[] = {
+ 0, 0, 7, 13, 17, 20, 27
+};
+
+static const char _token_matcher_trans_keys[] = {
+ 48, 49, 57, 65, 90, 97, 122, 48,
+ 57, 65, 70, 97, 102, 95, 120, 48,
+ 57, 95, 48, 57, 95, 48, 57, 65,
+ 70, 97, 102, 95, 65, 90, 97, 122,
+ 0
+};
+
+static const char _token_matcher_single_lengths[] = {
+ 0, 1, 0, 2, 1, 1, 1
+};
+
+static const char _token_matcher_range_lengths[] = {
+ 0, 3, 3, 1, 1, 3, 2
+};
+
+static const char _token_matcher_index_offsets[] = {
+ 0, 0, 5, 9, 13, 16, 21
+};
+
+static const char _token_matcher_indicies[] = {
+ 0, 2, 3, 3, 1, 4, 4, 4,
+ 1, 5, 6, 2, 1, 5, 2, 1,
+ 7, 4, 4, 4, 1, 8, 3, 3,
+ 1, 0
+};
+
+static const char _token_matcher_trans_targs[] = {
+ 3, 0, 4, 6, 5, 1, 2, 1,
+ 1
+};
+
+static const char _token_matcher_trans_actions[] = {
+ 0, 0, 0, 0, 0, 3, 0, 1,
+ 5
+};
+
+static const char _token_matcher_eof_actions[] = {
+ 0, 0, 0, 3, 3, 1, 5
+};
+
+static const int token_matcher_start = 1;
+static const int token_matcher_first_final = 3;
+static const int token_matcher_error = 0;
+
+static const int token_matcher_en_main = 1;
+
+
+#line 51 "lexer_lex.rl"
+
+/* 0xxdigit+ => tok_t REF, yylval.ref = uint64_t
+ [0-9]+ => tok_t NUM, yylval.val = int
+ [a-zA-Z]+ => tok_t NAME, yylval.str = char* */
+
+/* Scan filename and push the its tokens
+ onto the stack */
+int lexer_lex (const char* str)
+{
+ const char *p, *pe, *ts, *eof;
+ int cs, tok_t ; //tok_t == token type
+
+ p = ts = str;
+ pe = p + strlen(str) + 1;
+
+#line 102 "lexer_lex.c"
+ {
+ cs = token_matcher_start;
+ }
+
+#line 66 "lexer_lex.rl"
+
+#line 109 "lexer_lex.c"
+ {
+ int _klen;
+ unsigned int _trans;
+ const char *_acts;
+ unsigned int _nacts;
+ const char *_keys;
+
+ if ( p == pe )
+ goto _test_eof;
+ if ( cs == 0 )
+ goto _out;
+_resume:
+ _keys = _token_matcher_trans_keys + _token_matcher_key_offsets[cs];
+ _trans = _token_matcher_index_offsets[cs];
+
+ _klen = _token_matcher_single_lengths[cs];
+ if ( _klen > 0 ) {
+ const char *_lower = _keys;
+ const char *_mid;
+ const char *_upper = _keys + _klen - 1;
+ while (1) {
+ if ( _upper < _lower )
+ break;
+
+ _mid = _lower + ((_upper-_lower) >> 1);
+ if ( (*p) < *_mid )
+ _upper = _mid - 1;
+ else if ( (*p) > *_mid )
+ _lower = _mid + 1;
+ else {
+ _trans += (unsigned int)(_mid - _keys);
+ goto _match;
+ }
+ }
+ _keys += _klen;
+ _trans += _klen;
+ }
+
+ _klen = _token_matcher_range_lengths[cs];
+ if ( _klen > 0 ) {
+ const char *_lower = _keys;
+ const char *_mid;
+ const char *_upper = _keys + (_klen<<1) - 2;
+ while (1) {
+ if ( _upper < _lower )
+ break;
+
+ _mid = _lower + (((_upper-_lower) >> 1) & ~1);
+ if ( (*p) < _mid[0] )
+ _upper = _mid - 2;
+ else if ( (*p) > _mid[1] )
+ _lower = _mid + 2;
+ else {
+ _trans += (unsigned int)((_mid - _keys)>>1);
+ goto _match;
+ }
+ }
+ _trans += _klen;
+ }
+
+_match:
+ _trans = _token_matcher_indicies[_trans];
+ cs = _token_matcher_trans_targs[_trans];
+
+ if ( _token_matcher_trans_actions[_trans] == 0 )
+ goto _again;
+
+ _acts = _token_matcher_actions + _token_matcher_trans_actions[_trans];
+ _nacts = (unsigned int) *_acts++;
+ while ( _nacts-- > 0 )
+ {
+ switch ( *_acts++ )
+ {
+ case 0:
+#line 24 "lexer_lex.rl"
+ {
+ tok_t = REF; \
+ yylval.ref = ttor(ts, p-ts); \
+ lexer_pushtok(tok_t, yylval); \
+ ts = p; }
+ break;
+ case 1:
+#line 30 "lexer_lex.rl"
+ { tok_t = NUM; \
+ yylval.val = ttov(ts, p-ts); \
+ lexer_pushtok(tok_t, yylval); \
+ ts = p; }
+ break;
+ case 2:
+#line 35 "lexer_lex.rl"
+ { tok_t = NAME; \
+ yylval.str = ttos(ts, p-ts); \
+ lexer_pushtok(tok_t, yylval); \
+ ts = p; }
+ break;
+#line 205 "lexer_lex.c"
+ }
+ }
+
+_again:
+ if ( cs == 0 )
+ goto _out;
+ if ( ++p != pe )
+ goto _resume;
+ _test_eof: {}
+ if ( p == eof )
+ {
+ const char *__acts = _token_matcher_actions + _token_matcher_eof_actions[cs];
+ unsigned int __nacts = (unsigned int) *__acts++;
+ while ( __nacts-- > 0 ) {
+ switch ( *__acts++ ) {
+ case 0:
+#line 24 "lexer_lex.rl"
+ {
+ tok_t = REF; \
+ yylval.ref = ttor(ts, p-ts); \
+ lexer_pushtok(tok_t, yylval); \
+ ts = p; }
+ break;
+ case 1:
+#line 30 "lexer_lex.rl"
+ { tok_t = NUM; \
+ yylval.val = ttov(ts, p-ts); \
+ lexer_pushtok(tok_t, yylval); \
+ ts = p; }
+ break;
+ case 2:
+#line 35 "lexer_lex.rl"
+ { tok_t = NAME; \
+ yylval.str = ttos(ts, p-ts); \
+ lexer_pushtok(tok_t, yylval); \
+ ts = p; }
+ break;
+#line 243 "lexer_lex.c"
+ }
+ }
+ }
+
+ _out: {}
+ }
+
+#line 67 "lexer_lex.rl"
+
+ lexer_pushtok(tok_t, yylval);
+
+ printf (str);
+ return 1;
+}
+
+int ipow(int base, int exp)
+{
+ int result = 1;
+ while (exp)
+ {
+ if (exp & 1)
+ result = result * base;
+ exp = exp >> 1;
+ base *= base;
+ }
+
+ return result;
+}
+
+/* Token to Value */
+int ttov(const char* str, int len)
+{
+ int i, val = 0;
+
+ for (i = 0; i < len; i++)
+ {
+ val += ((str[len - (i + 1)] - '0') * ipow(10,i));
+ }
+
+ return val;
+}
+
+uint64_t ttor(const char* str, int len)
+{
+ int i;
+ uint64_t num = 0;
+
+ for (i = 0; i < len; i++)
+ {
+ num += ((str[len - (i + 1)] - '0') * ipow(10,i));
+ }
+
+ return num;
+}
+
+char* ttos(const char* str, int len)
+{
+ int i;
+ char token_buf[MAX_TOK_LEN];
+
+ memmove(token_buf, str, len);
+ token_buf[len+1] = '\0';
+
+ return strdup(token_buf);
+}
--- /dev/null
+/* Ragel State Machine for tokenizing text */
+#include <stdio.h>
+#include <string.h>
+#include <apc/parser.tab.h>
+
+extern void lexer_pushtok(int, YYSTYPE);
+
+int lexer_lex(const char*);
+int ipow(int, int);
+int ttov(const char* str, int);
+uint64_t ttor(const char* str, int);
+char* ttos(const char* str, int);
+
+
+#define MAX_TOK_LEN 64
+#define MAX_TOKENS 16
+#define MAX_STR_SIZE (MAX_TOK_LEN * MAX_TOKENS)
+
+
+%%{
+ machine token_matcher;
+
+ # set up yylval and tok_t to be pushed to stack
+ action set_ref {
+ tok_t = REF; \
+ yylval.ref = ttor(ts, p-ts); \
+ lexer_pushtok(tok_t, yylval); \
+ ts = p; }
+
+ action set_val { tok_t = NUM; \
+ yylval.val = ttov(ts, p-ts); \
+ lexer_pushtok(tok_t, yylval); \
+ ts = p; }
+
+ action set_name { tok_t = NAME; \
+ yylval.str = ttos(ts, p-ts); \
+ lexer_pushtok(tok_t, yylval); \
+ ts = p; }
+
+ # instantiate machines for each possible token
+ ref = '0x' xdigit+ %set_ref;
+ val = digit+ %set_val;
+ name = alpha+ %set_name;
+ tok = ref | val | name;
+
+ main := (tok . '_')* . tok;
+}%%
+
+
+%%write data;
+
+/* 0xxdigit+ => tok_t REF, yylval.ref = uint64_t
+ [0-9]+ => tok_t NUM, yylval.val = int
+ [a-zA-Z]+ => tok_t NAME, yylval.str = char* */
+
+/* Scan filename and push the its tokens
+ onto the stack */
+int lexer_lex (const char* str)
+{
+ const char *p, *pe, *ts, *eof;
+ int cs, tok_t ; //tok_t == token type
+
+ p = ts = str;
+ pe = p + strlen(str) + 1;
+ %%write init;
+ %%write exec;
+
+ lexer_pushtok(tok_t, yylval);
+
+ printf (str);
+ return 1;
+}
+
+int ipow(int base, int exp)
+{
+ int result = 1;
+ while (exp)
+ {
+ if (exp & 1)
+ result = result * base;
+ exp = exp >> 1;
+ base *= base;
+ }
+
+ return result;
+}
+
+/* Token to Value */
+int ttov(const char* str, int len)
+{
+ int i, val = 0;
+
+ for (i = 0; i < len; i++)
+ {
+ val += ((str[len - (i + 1)] - '0') * ipow(10,i));
+ }
+
+ return val;
+}
+
+uint64_t ttor(const char* str, int len)
+{
+ int i;
+ uint64_t num = 0;
+
+ for (i = 0; i < len; i++)
+ {
+ num += ((str[len - (i + 1)] - '0') * ipow(10,i));
+ }
+
+ return num;
+}
+
+char* ttos(const char* str, int len)
+{
+ int i;
+ char token_buf[MAX_TOK_LEN];
+
+ memmove(token_buf, str, len);
+ token_buf[len+1] = '\0';
+
+ return strdup(token_buf);
+}