\0 is a delimiter
[henge/apc.git] / src / lexer.rl
1 #include <stdio.h>
2 #include <stdint.h>
3 #include <stdlib.h>
4 #include <errno.h>
5 #include "parser.tab.h"
6 #include "apc.h"
7 #include <unistdio.h>
8 #include <unistr.h>
9 /* Public */
10 int lexer_init(void);
11 void lexer_quit(void);
12 int lexer_lexfile(uint8_t*);
13 int lexer_lexdir(uint8_t*);
14 int lexer_lexstring(uint8_t*, int);
15 //apc.c
16 static
17 yypstate* pstate;
18 static
19 yycstate* cstate;
20 /* Ring buffer for keeping lexical tokens valid for up to 255 tokens */
21 static
22 YYSTYPE lval_stack[0xFF + 1];
23 static
24 uint8_t lval_offs;
25 #define PUSHTOK(T,L) yypush_parse(pstate, T, (L), cstate)
26 #define LEXTOK(T,Y,L) do { \
27 lval_stack[lval_offs].Y = L; \
28 PUSHTOK(T,lval_stack + lval_offs); \
29 lval_offs++; \
30 ntok++; \
31 } while (0);
32 #define PUSHFACE(F) LEXTOK(FACING, face, F)
33 #define PUSHREF(R) LEXTOK(REF, ref, R)
34 #define PUSHLINK() LEXTOK(LINK, val, 0)
35 #define PUSHNUM(N) LEXTOK(NUM, val, N)
36 #define PUSHNAME(N) LEXTOK(NAME, str, N)
37 #define PUSHOP(O) LEXTOK(O, val, 0)
38 #define PUSHPATH(P) LEXTOK(PATH, str, P)
39
40 #define DEBUG 1
41
42 /* Lexstring is the main lexer for APC and is generated by ragel. It lexes file names of files
43 that have been scanned and pushes their types and values into the tok_stack, which yyparse
44 eventually calls during parsing. */
45
46 %%{
47 machine lexstring;
48
49 # set up yylval and tok_t to be pushed to stack
50 action push_ref { errno = 0;
51 lval.ref = strtoll((char*)ts,NULL,16);
52 if (errno)
53 { fprintf(stderr, "Invalid hex number in file %s\n",(char*)str);
54 exit(1);
55 }
56 PUSHREF(lval.ref);
57 }
58 action push_link { PUSHLINK(); }
59 action push_val { errno = 0;
60 lval.val = strtoll((char*)ts,NULL,10);
61 if (errno)
62 { fprintf(stderr, "strtoll could not parse %s\n", (char*)str);
63 exit(1);
64 }
65 PUSHNUM(lval.val);
66 }
67 action push_name { if(DEBUG) printf("Lexer_lexstring:: action:push_name: from %s to %s\n", ts, p);
68 PUSHNAME(ts);
69 }
70 action push_map { if(DEBUG) printf("Lexer_lexstring:: action:push_map: pushing map token\n");
71 PUSHOP(MAP);
72 }
73 action set_ts { if(DEBUG) printf("Lexer_lexstring:: action:set_ts. ts = %s\n", p);
74 ts = p; }
75 action push_SS { if(DEBUG) printf("Lexer_lexstring:: action:push_SS. p = %s\n",p);
76 PUSHOP(SS);
77 }
78 action push_S { if(DEBUG) printf("Lexer_lexstring:: action:push_S. p = %s\n", p);
79 PUSHFACE(SFACE);
80 }
81 action push_SW { if(DEBUG) printf("Lexer_lexstring:: action:push_SW. p = %s\n", p);
82 PUSHFACE(SWFACE);
83 }
84 action push_W { if(DEBUG) printf("Lexer_lexstring:: action:push_W. p = %s\n", p);
85 PUSHFACE(WFACE);
86 }
87 action push_NW { if(DEBUG) printf("Lexer_lexstring:: action:push_NW. p = %s\n", p);
88 PUSHFACE(NWFACE);
89 }
90 action push_N { if(DEBUG) printf("Lexer_lexstring:: action:push_N. p = %s\n", p);
91 PUSHFACE(NFACE);
92 }
93 action push_NE { if(DEBUG) printf("Lexer_lexstring:: action:push_NE. p = %s\n", p);
94 PUSHFACE(NEFACE);
95 }
96 action push_E { if(DEBUG) printf("Lexer_lexstring:: action:push_N. p = %s\n", p);
97 PUSHFACE(EFACE);
98 }
99 action push_SE { if(DEBUG) printf("Lexer_lexstring:: action:push_N. p = %s\n", p);
100 PUSHFACE(SEFACE);
101 }
102 action ref_error { if(DEBUG) printf("ref from %s to %s has an inappropriate amount of hex digits, it must have eight.\n", ts, p);
103 exit(1);
104 }
105 action p { if(DEBUG) printf("Lexer_lexstring:: p = %s\n", p);
106 }
107
108 N = 'N' %push_N;
109 W = 'W' %push_W;
110 S = 'S' %push_S;
111 E = 'E' %push_E;
112 NW = 'NW' %push_NW;
113 NE = 'NE' %push_NW;
114 SW = 'SW' %push_SW;
115 SE = 'SE' %push_SE;
116
117 tok_delimiter = [_\0];
118
119 direction = (N | W | S | E | NW | NE | SW | SE) ;
120 dimensions = (digit+ - '0') >set_ts %push_val 'x' (digit+ - '0') >set_ts %push_val;
121 link = '#' %push_link;
122 SS = ('+SS' %to(push_SS)) | ('+SS' %to(push_SS) link ) ;
123 ref = '0x' >set_ts alnum{8} $err(ref_error) %push_ref ;
124 val = digit+ >set_ts %push_val ;
125 name = lower >set_ts (lower | digit)* %push_name ;
126 map = '+MAP' %to(push_map);
127 tok = (name | val | ref | dimensions | map | link | SS | direction);
128
129
130 main := (tok tok_delimiter)* tok [\0];
131
132 write data nofinal noerror noprefix;
133
134 }%%
135
136 int lexer_init
137 ( void )
138 { pstate = yypstate_new();
139 cstate = yycstate_new();
140 lval_offs = 0;
141 return !pstate || !cstate;
142 }
143
144 void lexer_quit
145 ( void )
146 { if (pstate) yypstate_delete(pstate);
147 if (cstate) yycstate_delete(cstate);
148 }
149
150 int lexer_lexstring
151 ( uint8_t* str,
152 int size
153 )
154 { uint8_t *p;
155 uint8_t *ts, *pe, *eof;
156 int cs, ntok;
157 YYSTYPE lval;
158
159 ntok = 0;
160 p = ts = str;
161 pe = eof = p + size + 1;
162
163 if(DEBUG) printf("|---Begin lexstring on p = %s, pe = %s.\n",p, pe);
164
165 %%write init;
166 %%write exec;
167
168 if(DEBUG) printf("Ending lexstring of file %s, pushed %d tokens.\n",str, ntok);
169
170 return ntok;
171 }
172
173 /* Lexical analysis of a file
174 Strips a filename to its base name, then sends it to lexer_lexstring before
175 pushing a PATH token with the filename
176 Returns the number of tokens pushed to the parser.
177 */
178 int lexer_lexfile
179 ( uint8_t* filename )
180 { uint8_t* last_period,* iter,* filename_end;
181 int ntok;
182 last_period = NULL;
183 for (iter = filename; *iter; iter++)
184 switch (*iter)
185 { // Keep track of the last 'dot' in the name
186 case '.' : last_period = iter; continue;
187 // replace '_' with '\0' so bison can use strlen on them as tokens.
188 case '_' : *iter = '\0';
189 default: continue;
190 }
191 // Mark the end of the filename
192 filename_end = iter;
193 // Lex from either the last period, if present, or filename end
194 ntok = (last_period) ?
195 lexer_lexstring(filename, (int)(last_period - filename))
196 : lexer_lexstring(filename, (int)(iter - filename));
197 // Replace nulls with their original '_'
198 for (iter = filename; iter < filename_end; iter++)
199 if (*iter == '\0')
200 *iter = '_';
201 PUSHPATH(filename);
202 return ntok + 1;
203 return en_main == 1;
204 }
205
206 int lexer_lexdir
207 ( uint8_t* dirname )
208 { uint8_t* de = dirname;
209 int ntok;
210 ntok = 0;
211 de = dirname;
212 if (*de) while (*++de);
213 ntok = lexer_lexstring(dirname, (int)(de - dirname));
214 PUSHOP(CLOPEN);
215 return ntok;
216 }
217
218 int lexer_closedir
219 ( void )
220 { int ntok = 0;
221 PUSHOP(CLCLOSE);
222 return ntok;
223 }