2 \brief lexical analyzer implementation for APC
3 \details The lexer manages two FIFO stacks. One for maintaining tokens, the
4 other for maintaining a list of files to be scanned. During
5 execution, the lexer will return a token from its token queue if any
6 are present. If not, the lexer will will pop an element from its
7 file queue to 'scanner' to be tokenized. If the file queue is empty,
8 the lexer will instead call 'parsedir' to traverse the directory tree
9 and tokenize the results. If 'parsedir' does not generate any new
11 \author Jordan Lavatai
13 ----------------------------------------------------------------------------*/
27 #include <limits.h> //realpath, NAME_MAX, FPATH_MAX
32 #include "parser.tab.h"
34 #define DE_STACKSIZE 1024
37 #define TK_STACKSIZE 1024
44 int lexer_lexfile(const uint8_t*);
45 void lexer_pushtok(int, YYSTYPE
);
46 uint8_t const* lexer_get_current_filepath(void);
47 int lexer_lexfilename(uint8_t*);
48 struct dirent
* lexer_direntpa
[DE_STACKSIZE
],** lexer_direntpp
,** lexer_direntpb
;
51 int lexer_lexstring(uint8_t*, int);
53 int lexer_setstr(uint8_t*, int);
55 int scanner_init(void);
61 uint8_t const* current_filename
;
65 { YYSTYPE lval
; //token val
66 int tok_t
; //token type
67 } token_stack
[TK_STACKSIZE
], *tsp
, *tsx
;
69 /* Directory Entity Array/Stack
70 Simple array for keeping track of dirents yet to be processed by the scanner.
71 If this list is empty and there are no tokens, the lexer is done.
72 This array is populated by the scanner as an array, and popped locally by the
73 lexer as a stack, and is popped as a FIFO stack.
75 #define DE_STACK (lexer_direntpa)
76 #define DE_STACKP (lexer_direntpp)
77 #define DE_STACKB (lexer_direntpb)
78 #define DE_LEN() (DE_STACKP - DE_STACKB)
79 #define DE_INIT() (DE_STACKP = DE_STACKB = DE_STACK)
80 #define DE_POP() (*DE_STACKB++)
83 This is a FIFO stack whose pointers are a union of either a pointer to an
84 integer, or a pointer to two integers (a struct tok). This way, integers may
85 be added or removed from the stack either singularly (IPUSH/IPOP), or as a
86 full token of two integers (PUSH/POP).
87 An alignment error will occur if IPOP or IPUSH are used a non-even number of
90 #define TK_STACK (token_stack)
91 #define TK_STACKP (tsp)
92 #define TK_STACKX (tsx)
93 #define TK_LEN() (TK_STACKX - TK_STACKP)
94 #define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK)
95 #define TK_POP() (*TK_STACKP++)
96 #define TK_PUSH(T,L) (*TK_STACKX++ = (struct tok){L,T})
99 The initializer returns boolean true if an error occurs, which may be handled
106 return scanner_init();
110 If the token buffer is empty, 'lexer' will initialize the token buffer and
111 call 'lexer_scandir'. If SCAN_ERROR is returned, an error is printed
112 before sending a null return to bison. If 0 tokens are generated, the error
113 printing is skipped. In all other cases, 'yylval' is set, and the token's
114 integer representation is returned.
117 #define SCAN_ERROR -1
118 #define TK_EMPTY (TK_STACKP == TK_STACKX)
121 fprintf(stderr,__VA_ARGS__); \
128 while (DE_LEN() > 0)//lex any directory entries in our stack
130 if (lexer_lexfile((uint8_t*)DE_POP()->d_name
) == 0)
131 FAIL("Lexer failed to tokenize [%s]\n",(*DE_STACKB
)->d_name
);
133 if (TK_EMPTY
) //if there are no tokens,
134 { TK_INIT(); //initialize the token stack back to 0
136 { case SCAN_ERROR
: //if an error occurred,
137 FAIL("Scanner error\n");
138 case 0: //if the the scanner finds no dirents,
139 goto done
; //then we are done
140 default: //if we found some elements to scan,
141 goto start
; //start over and lex them
154 This receiver takes a struct tok and pushes it to the FIFO stack.
156 #define $($)#$ //stringifier
157 #define ERR_TK "Fatal: Generated over " $(TK_STACKSIZE) " tokens in one pass."
162 { if (TK_LEN() >= TK_STACKSIZE
)
163 { fprintf(stderr
, ERR_TK
);
169 /* Lexical analysis of a file
170 Strips a filename to its base name, then sends it to lexer_lex
172 #define HIDDEN_WARNING "%s is hidden and will not be parsed!\n", filename
174 ( const uint8_t *filename
176 { static uint8_t fname
[FNAME_MAX
];
177 uint8_t *last_period
= NULL
, *iter
;
179 if (*filename
== '.')
180 { fprintf (stderr
, HIDDEN_WARNING
);
183 /* Copy the filename and remove its suffix */
184 u8_strncpy(fname
,filename
,FNAME_MAX
);
186 for (iter
= fname
; *iter
; iter
++) //find the last '.' char
189 if (last_period
) //if we found one,
190 *last_period
= 0; //truncate the string there
191 /* Register the current_filename */
192 current_filename
= filename
;
193 printf("lexer_lexfilename(%s)\n",fname
);
194 return lexer_lexfilename(fname
);
197 uint8_t const* lexer_get_current_filepath
199 { static uint8_t current_path
[FPATH_MAX
];
200 static uint8_t const* last_filename
;
201 if ((!last_filename
|| last_filename
!= current_filename
) &&
202 ((uint8_t*) realpath((char*)current_filename
, (char*)current_path
) != (uint8_t*) current_path
))
203 { perror("realpath: ");
206 return (const uint8_t*)current_path
;
209 /* Scan filename and push the its tokens
211 int lexer_lexfilename
218 printf("|---- Begin lexerfilename on %s ----|\n", str
);
221 perror("Lexfilename:: str is NULL so fail\n");
223 /* Determine the filetype of str */
224 len
= u8_strlen(str
);
226 ntok
= lexer_lexstring(str
, len
);
228 /* Pass back filepath as end of statment operator */
229 filepath
= u8_strdup(lexer_get_current_filepath());
230 yylval
.str
= filepath
;
231 lexer_pushtok(NAME
, yylval
);
232 printf("Pushing filepath %s\n", filepath
);
235 printf("|---- Ending lexer_lexfilename on %s, %d tokens were lexed ----|\n", str
, ntok
);
239 /**************************/
240 /****Abandon All Hope******/
241 /**************************/
251 { int setname_len
, elename_len
, strlen
;
252 uint8_t* setname_end
, *elename_end
, *newstrt
;
253 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
254 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
258 SET_CURR_SETNAME(newstrt
);
259 SET_CURR_ELENAME(newstrt
);
261 { printf("Lexer_lexelemap:: previous file was mapfile*\n");
266 if(SETNAME_MATCHES())
270 printf("Lexer_lexelemap:: setname matches\n");
271 if(ELENAME_MATCHES())
278 UPDATE_PREV_ELENAME(newstrt
);
279 UPDATE_PREV_SETNAME(newstrt
);
281 return newstrt
- str
;
289 { int setname_len
, elename_len
;
290 uint8_t* setname_end
, *elename_end
, *newstrt
;
291 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
292 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
294 printf("Lexer_lexelemodel:: Begin str is %s\n", str
);
298 SET_CURR_SETNAME(newstrt
);
299 SET_CURR_ELENAME(newstrt
);
300 if(SETNAME_MATCHES())
301 { printf("Lexer_lexelemodel:: curr_setname(%s) matches prev_setname (%s)\n", curr_setname
, prev_setname
);
303 printf("Lexer_lexelemodel:: Deleted setname, newstrt is now %s\n", newstrt
);
306 if(ELENAME_MATCHES())
307 { printf("Lexer_lexelemodel:: elename matches\n");
313 UPDATE_PREV_ELENAME(newstrt
);
314 UPDATE_PREV_SETNAME(newstrt
);
316 return newstrt
- str
;
322 { int setname_len
, elename_len
;
323 uint8_t* setname_end
, *elename_end
, *newstrt
;
324 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
325 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
329 SET_CURR_SETNAME(newstrt
);
333 if( SETNAME_MATCHES())
338 UPDATE_PREV_SETNAME(newstrt
);
340 return newstrt
- str
;
346 { int setname_len
, elename_len
;
347 uint8_t* setname_end
, *elename_end
, *newstrt
;
348 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
349 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
353 SET_CURR_SETNAME(newstrt
);
354 if( SETNAME_MATCHES())
358 UPDATE_PREV_SETNAME(newstrt
);
360 return newstrt
- str
;
367 { int setname_len
, elename_len
;
368 uint8_t* setname_end
, *elename_end
, *newstrt
;
369 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
370 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
374 SET_CURR_SETNAME(newstrt
);
375 if( SETNAME_MATCHES())
377 if(REF((NEXT_TOK(newstrt
)))) //if NAME REF REF
379 UPDATE_PREV_SETNAME(newstrt
);
381 return newstrt
- str
;
388 { int setname_len
, elename_len
;
389 uint8_t* setname_end
, *elename_end
, *newstrt
;
390 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
391 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
395 SET_CURR_SETNAME(newstrt
);
396 SET_CURR_ELENAME(newstrt
);
397 if(SETNAME_MATCHES())
399 if(REF(NEXT_TOK(newstrt
))) //NAME REF REF, where is set_label
403 return newstrt
- str
;
409 { int setname_len
, elename_len
;
410 uint8_t* setname_end
, *elename_end
;
411 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
412 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
422 { int setname_len
, elename_len
;
423 uint8_t* setname_end
, *elename_end
, *newstrt
;
424 uint8_t curr_setname
[MAX_SETNAME_LEN
] = {0};
425 uint8_t curr_elename
[MAX_ELENAME_LEN
] = {0};
429 SET_CURR_SETNAME(newstrt
);
430 printf("prev_setname %s, curr_setname %s\n", prev_setname
, curr_setname
);
431 if(SETNAME_MATCHES())
437 return newstrt
- str
;
444 #define REF(STR) (STR[0] <= 0x39 && STR[0] >= 0x30)
445 #define DEL_FTOK(STR) (STR = u8_strchr(STR, '_') + 1)
446 #define NEXT_TOK(STR) (u8_strchr(STR, '_') + 1)
447 #define SET_CURR_SETNAME(STR) \
449 printf("Lexer_lexX:: setting curr_setname of str(%s)\n", STR); \
450 setname_end = u8_chr(STR, FNAME_MAX, '_'); \
451 setname_len = setname_end - str; \
452 u8_move(curr_setname, STR, setname_len); \
453 printf("Lexer_lexX:: curr_setname is now %s\n",curr_setname); \
455 #define SET_CURR_ELENAME(STR) \
457 printf("Lexer_lexX:: setting curr_elename of str(%s)\n", STR); \
458 setname_end = u8_chr(STR, FNAME_MAX, '_') + 1; \
459 if(REF(setname_end)) \
460 setname_end = u8_chr(setname_end, FNAME_MAX, '_') + 1; \
461 elename_end = u8_chr(setname_end, FNAME_MAX, '_'); \
462 elename_len = elename_end - setname_end; \
463 u8_move(curr_elename, setname_end, elename_len); \
464 printf("Lexer_lexX:: curr_elename is now %s\n", curr_elename); \
467 #define SETNAME_MATCHES() (u8_strcmp(curr_setname, prev_setname) == 0)
468 #define ELENAME_MATCHES() (u8_strcmp(curr_elename, prev_elename) == 0)
469 #define UPDATE_PREV_SETNAME(STR) \
471 printf("Lexer_lexX:: updating prev_setname from (%s)", prev_setname); \
472 u8_set(prev_setname , (ucs4_t) 0, MAX_SETNAME_LEN ); \
473 u8_move(prev_setname, curr_setname, setname_len); \
474 printf(" to %s\n", prev_setname); \
476 #define UPDATE_PREV_ELENAME(STR) \
478 u8_set(prev_elename , (ucs4_t) 0, MAX_ELENAME_LEN ); \
479 u8_move(prev_elename, curr_elename, elename_len); \
481 #define PREV_MAPFILE() (TK_STACKX - 5)->tok_t == MOPEN || (TK_STACKX-3)->tok_t == MOPEN
482 #define SET_MAPSTR(STR) (STR = u8_strstr(STR, map_key))
488 /* int lexer_lexmapfile */
489 /* #define INC_X() */
490 /* (int height, int width) */
494 /* /\* Give scanner_scanpixels a buffer and a len. Iterate through */
495 /* buf with buf[n]. If n == 0, do nothing. if n has a value, push x, */
496 /* push y, push (z = n << 24), push (ref_id = n >> 8) *\/ */
497 /* //scanner_scanpixels() */
499 /* for(i = 0; i < len; i++) */
500 /* if(buf[i] == 0) */
509 /* fname_bytes = (uint8_t*)(DE_POP()->d_name); */
510 /* printf("d_name is %s\n", fname_bytes); */
511 /* for (fnp = filename, i = 0; i < FNAME_MAX; i += unit_size, fnp++) */
512 /* { unit_size = u8_mblen(fname_bytes + i, min(4, FNAME_MAX - i)); */
513 /* if (u8_mbtouc(fnp, fname_bytes + i, unit_size) == -1) //add ucs4 char to the filename */
514 /* FAIL("Lexer failed to convert ^%s to unicode\n", (fname_bytes + i)); */
515 /* if (*fnp == 0) //added a terminating char */
518 /* if(u8_mbtouc(filename, DE_POP()->d_name, FNAME_MAXy) == -1) */
519 /* FAIL("Lexer failed to convert d_name into uint8_t\n"); */
520 /* ulc_fprintf(stdout, "filename is %11U\n c", filename); */