src/apc/lexer_lex.c

   1
   2 #line 1 "lexer_lex.rl"
   3 /* Ragel State Machine for tokenizing text */
   4 #include <stdio.h>
   5 #include <string.h>
   6 #include <apc/parser.tab.h>
   7
   8 extern void lexer_pushtok(int, YYSTYPE);
   9
  10 int lexer_lex(const char*);
  11 int ipow(int, int);
  12 int ttov(const char* str, int);
  13 uint64_t ttor(const char* str, int);
  14 char* ttos(const char* str, int);
  15
  16
  17 #define MAX_TOK_LEN 64
  18 #define MAX_TOKENS 16
  19 #define MAX_STR_SIZE (MAX_TOK_LEN * MAX_TOKENS)
  20
  21
  22
  23 #line 47 "lexer_lex.rl"
  24
  25
  26
  27
  28 #line 29 "lexer_lex.c"
  29 static const char _token_matcher_actions[] = {
  30         0, 1, 0, 1, 1, 1, 2
  31 };
  32
  33 static const char _token_matcher_key_offsets[] = {
  34         0, 0, 7, 13, 17, 20, 27
  35 };
  36
  37 static const char _token_matcher_trans_keys[] = {
  38         48, 49, 57, 65, 90, 97, 122, 48,
  39         57, 65, 70, 97, 102, 95, 120, 48,
  40         57, 95, 48, 57, 95, 48, 57, 65,
  41         70, 97, 102, 95, 65, 90, 97, 122,
  42         0
  43 };
  44
  45 static const char _token_matcher_single_lengths[] = {
  46         0, 1, 0, 2, 1, 1, 1
  47 };
  48
  49 static const char _token_matcher_range_lengths[] = {
  50         0, 3, 3, 1, 1, 3, 2
  51 };
  52
  53 static const char _token_matcher_index_offsets[] = {
  54         0, 0, 5, 9, 13, 16, 21
  55 };
  56
  57 static const char _token_matcher_indicies[] = {
  58         0, 2, 3, 3, 1, 4, 4, 4,
  59         1, 5, 6, 2, 1, 5, 2, 1,
  60         7, 4, 4, 4, 1, 8, 3, 3,
  61         1, 0
  62 };
  63
  64 static const char _token_matcher_trans_targs[] = {
  65         3, 0, 4, 6, 5, 1, 2, 1,
  66         1
  67 };
  68
  69 static const char _token_matcher_trans_actions[] = {
  70         0, 0, 0, 0, 0, 3, 0, 1,
  71         5
  72 };
  73
  74 static const char _token_matcher_eof_actions[] = {
  75         0, 0, 0, 3, 3, 1, 5
  76 };
  77
  78 static const int token_matcher_start = 1;
  79 static const int token_matcher_first_final = 3;
  80 static const int token_matcher_error = 0;
  81
  82 static const int token_matcher_en_main = 1;
  83
  84
  85 #line 51 "lexer_lex.rl"
  86
  87 /*  0xxdigit+  => tok_t REF, yylval.ref = uint64_t
  88     [0-9]+  => tok_t NUM, yylval.val = int
  89     [a-zA-Z]+ => tok_t NAME, yylval.str = char*       */
  90
  91 /* Scan filename and push the its tokens
  92    onto the stack */
  93 int lexer_lex (const char* str)
  94 {
  95   const char *p, *pe, *ts, *eof;
  96   int  cs, tok_t ; //tok_t == token type
  97
  98   p = ts = str;
  99   pe = p + strlen(str) + 1;
 100
 101 #line 102 "lexer_lex.c"
 102         {
 103         cs = token_matcher_start;
 104         }
 105
 106 #line 66 "lexer_lex.rl"
 107
 108 #line 109 "lexer_lex.c"
 109         {
 110         int _klen;
 111         unsigned int _trans;
 112         const char *_acts;
 113         unsigned int _nacts;
 114         const char *_keys;
 115
 116         if ( p == pe )
 117                 goto _test_eof;
 118         if ( cs == 0 )
 119                 goto _out;
 120 _resume:
 121         _keys = _token_matcher_trans_keys + _token_matcher_key_offsets[cs];
 122         _trans = _token_matcher_index_offsets[cs];
 123
 124         _klen = _token_matcher_single_lengths[cs];
 125         if ( _klen > 0 ) {
 126                 const char *_lower = _keys;
 127                 const char *_mid;
 128                 const char *_upper = _keys + _klen - 1;
 129                 while (1) {
 130                         if ( _upper < _lower )
 131                                 break;
 132
 133                         _mid = _lower + ((_upper-_lower) >> 1);
 134                         if ( (*p) < *_mid )
 135                                 _upper = _mid - 1;
 136                         else if ( (*p) > *_mid )
 137                                 _lower = _mid + 1;
 138                         else {
 139                                 _trans += (unsigned int)(_mid - _keys);
 140                                 goto _match;
 141                         }
 142                 }
 143                 _keys += _klen;
 144                 _trans += _klen;
 145         }
 146
 147         _klen = _token_matcher_range_lengths[cs];
 148         if ( _klen > 0 ) {
 149                 const char *_lower = _keys;
 150                 const char *_mid;
 151                 const char *_upper = _keys + (_klen<<1) - 2;
 152                 while (1) {
 153                         if ( _upper < _lower )
 154                                 break;
 155
 156                         _mid = _lower + (((_upper-_lower) >> 1) & ~1);
 157                         if ( (*p) < _mid[0] )
 158                                 _upper = _mid - 2;
 159                         else if ( (*p) > _mid[1] )
 160                                 _lower = _mid + 2;
 161                         else {
 162                                 _trans += (unsigned int)((_mid - _keys)>>1);
 163                                 goto _match;
 164                         }
 165                 }
 166                 _trans += _klen;
 167         }
 168
 169 _match:
 170         _trans = _token_matcher_indicies[_trans];
 171         cs = _token_matcher_trans_targs[_trans];
 172
 173         if ( _token_matcher_trans_actions[_trans] == 0 )
 174                 goto _again;
 175
 176         _acts = _token_matcher_actions + _token_matcher_trans_actions[_trans];
 177         _nacts = (unsigned int) *_acts++;
 178         while ( _nacts-- > 0 )
 179         {
 180                 switch ( *_acts++ )
 181                 {
 182         case 0:
 183 #line 24 "lexer_lex.rl"
 184         {
 185                    tok_t = REF;                      \
 186                    yylval.ref = ttor(ts, p-ts);      \
 187                    lexer_pushtok(tok_t, yylval);     \
 188                    ts = p;   }
 189         break;
 190         case 1:
 191 #line 30 "lexer_lex.rl"
 192         { tok_t = NUM;                      \
 193                    yylval.val = ttov(ts, p-ts);      \
 194                    lexer_pushtok(tok_t, yylval);     \
 195                    ts = p;   }
 196         break;
 197         case 2:
 198 #line 35 "lexer_lex.rl"
 199         { tok_t = NAME;                    \
 200                     yylval.str = ttos(ts, p-ts);     \
 201                     lexer_pushtok(tok_t, yylval);    \
 202                     ts = p;   }
 203         break;
 204 #line 205 "lexer_lex.c"
 205                 }
 206         }
 207
 208 _again:
 209         if ( cs == 0 )
 210                 goto _out;
 211         if ( ++p != pe )
 212                 goto _resume;
 213         _test_eof: {}
 214         if ( p == eof )
 215         {
 216         const char *__acts = _token_matcher_actions + _token_matcher_eof_actions[cs];
 217         unsigned int __nacts = (unsigned int) *__acts++;
 218         while ( __nacts-- > 0 ) {
 219                 switch ( *__acts++ ) {
 220         case 0:
 221 #line 24 "lexer_lex.rl"
 222         {
 223                    tok_t = REF;                      \
 224                    yylval.ref = ttor(ts, p-ts);      \
 225                    lexer_pushtok(tok_t, yylval);     \
 226                    ts = p;   }
 227         break;
 228         case 1:
 229 #line 30 "lexer_lex.rl"
 230         { tok_t = NUM;                      \
 231                    yylval.val = ttov(ts, p-ts);      \
 232                    lexer_pushtok(tok_t, yylval);     \
 233                    ts = p;   }
 234         break;
 235         case 2:
 236 #line 35 "lexer_lex.rl"
 237         { tok_t = NAME;                    \
 238                     yylval.str = ttos(ts, p-ts);     \
 239                     lexer_pushtok(tok_t, yylval);    \
 240                     ts = p;   }
 241         break;
 242 #line 243 "lexer_lex.c"
 243                 }
 244         }
 245         }
 246
 247         _out: {}
 248         }
 249
 250 #line 67 "lexer_lex.rl"
 251
 252   lexer_pushtok(tok_t, yylval);
 253
 254   printf (str);
 255   return 1;
 256 }
 257
 258 int ipow(int base, int exp)
 259 {
 260   int result = 1;
 261   while (exp)
 262     {
 263       if (exp & 1)
 264         result = result * base;
 265       exp = exp >> 1;
 266       base *= base;
 267     }
 268
 269   return result;
 270 }
 271
 272 /*  Token to Value */
 273 int ttov(const char* str, int len)
 274 {
 275   int i, val = 0;
 276
 277   for (i = 0; i < len; i++)
 278     {
 279       val += ((str[len - (i + 1)] - '0') * ipow(10,i));
 280     }
 281
 282   return val;
 283 }
 284
 285 uint64_t ttor(const char* str, int len)
 286 {
 287   int i;
 288   uint64_t num = 0;
 289
 290   for (i = 0; i < len; i++)
 291     {
 292       num += ((str[len - (i + 1)] - '0') * ipow(10,i));
 293     }
 294
 295   return num;
 296 }
 297
 298 char* ttos(const char* str, int len)
 299 {
 300   int i;
 301   char token_buf[MAX_TOK_LEN];
 302
 303   memmove(token_buf, str, len);
 304   token_buf[len+1] = '\0';
 305
 306   return strdup(token_buf);
 307 }