/*!@file
\brief lexical analyzer implementation for APC
- \details this lexer scans a root directory given from the command line
- for subdirectories and files structured for the APC grammar.
+ \details The lexer manages two FIFO stacks. One for maintaining tokens, the
+ other for maintaining a list of files to be scanned. During
+ execution, the lexer will return a token from its token queue if any
+ are present. If not, the lexer will will pop an element from its
+ file queue to 'scanner' to be tokenized. If the file queue is empty,
+ the lexer will instead call 'parsedir' to traverse the directory tree
+ and tokenize the results. If 'parsedir' does not generate any new
+ tokens, we are done.
\author Jordan Lavatai
\date Aug 2016
----------------------------------------------------------------------------*/
#include <stdlib.h>
#include <dirent.h>
/* Local */
-#include "parser.tab.h"
-#define TOKEN_BUF_SIZE 1024
-#define ENTITY_BUF_SIZE 512
-#define SCANDIR_ERROR -1
-
-int lexer_init(void);
-int lexer(void);
-
-extern //main.c
-const char* cargs['Z'];
-
+//#include "parser.tab.h"
+#ifndef DP_STACKSIZE
+#define DP_STACKSIZE 1024
+#endif
+#ifndef TK_STACKSIZE
+#define TK_STACKSIZE 1024
+#endif
+/* Public */
+struct tok
+{ int lval;
+ int tok;
+};
+int lexer_init(void);
+int lexer(void);
+inline
+void lexer_pushtok(int int);
+struct dirent* lexer_direntpa[DP_STACKSIZE];
+/* Private */
static inline
int scan(void);
static inline
-int qcomp(const void*, const void*);
-
-/* Directory Listing Stack
- FILO Stack for keeping an open DIR* at each directory depth for treewalk.
- This stack is depth-safe, checking its depth during push operations, but not
- during pop operations, to ensure the thread doesn't open too many files at
- once (512 in c runtime), or traverse too far through symbolic links.
- A directory listing includes a DIR* and all DIR-typed entity in the directory
- as recognized by dirent, populated externally (and optionally).
- This stack behaves abnormally by incrementing its PUSH operation prior to
- evaluation, and the POP operations after evaluation. This behavior allows
- the 'DL_CURDEPTH' operation to map to the current element in the 'dl_stack'
- array, and it is always treated as the "current depth". This also allows us
- to init the root directory to 'directory_list_stack'[0] and pop it in a safe
- and explicit manner.
-*/
-#define S(S)#S
-#ifndef DL_DEPTHMAX
-#define DL_DEPTHMAX 64
-#endif
-#ifndef DL_CHILDMAX
-#define DL_CHILDMAX DL_DEPTHMAX //square tree
-#endif
-#define DL_STACK (directory_list_stack)
-#define DL_STACKP (dls)
-#define DL_STACKSIZE (DL_DEPTHMAX + 1) //+1 because push increments early
-#define DL_LEN (DL_STACKP - DL_STACK)
-#define DL_CURDEPTH (DL_LEN)
-#define DL_CD_STACK ((*DL_STACKP).child_entity_stack)
-#define DL_CD_STACKP ((*DL_STACKP).cds)
-#define DL_CD_STACKSIZE (DL_DEPTHMAX) //square tree
-#define DL_CD_LEN (DL_CD_STACKP - DL_CD_STACK)
-#define ERR_CHILD "Fatal: Maximum of " S(MAX_CHILD) \
- " child directories exceeded for directory %s at depth %i\n" \
- ,DL_STACK[DL_DEPTH()].label,DL_DEPTH()
-#define ERR_DEPTH "Fatal: Maximum directory depth of " S(DL_DEPTHMAX) \
- " exceeded during directory scan\n"
-#define DL_INIT() (DL_STACKP = DL_STACK)
-#define DL_CD_INIT() (DL_CD_STACKP = DL_CD_STACK)
-#define DL_POP() ((DIR*)(*DL_STACKP--))
-#define DL_CD_POP() (*--DL_CD_STACKP)
-#define DL_PUSH(D) (*(DIR**)(++DL_STACKP) = DIRP)
-#define DL_CD_PUSH(E) (*DL_CD_STACKP++ = E)
-#define DL_SAFEPUSH(D) \
- do { \
- if (DL_DEPTH() >= DL_DEPTHMAX) \
- { fprintf(stderr, ERR_DEPTH); \
- exit(EXIT_FAILURE); \
- } \
- if ((*(DIR**)(++DL_STACKP) = D) == NULL) \
- { perror("DL_SAFEPUSH"); \
- exit(EXIT_FAILURE); \
- } \
- } while(0)
-#define DL_CD_POPULATE() \
- do { \
- DL_CD_INIT(); \
- while ((*DL_CD_STACKP = readdir(*DL_STACKP)) != NULL) \
- { switch((*DL_CD_STACKP)->d_type) \
- { case DT_REG: \
- case DT_DIR: \
- DL_CD_STACKP++; \
- break; \
- default: \
- } \
- } \
- qsort(DL_CD_STACKP, DL_CD_LEN, sizeof *DL_CD_STACKP, qcomp); \
- } while (0)
-
+int dredge_current_depth(void);
+static
+struct dirent** dps;
static
-struct dirlist
-{ DIR* dirp;
- struct dirent** child_directory_stack[DL_CD_STACKSIZE],*** cds;
- const char* label;
-} directory_list_stack[DL_STACKSIZE],* dls;
+struct tok token_stack[TK_STACKSIZE];
static
+union tokp
+{ int* i;
+ struct tok* t;
+} tks, tkx;
-/* Directory Entity Stack
- Simple stack for keeping track of dirents still being processed at the
- current depth.
+/* Directory Entity Array/Stack
+ Simple array for keeping track of dirents yet to be processed by the scanner.
+ If this list is empty and there are no tokens, the lexer is done.
*/
-#define CE_STACK (child_entity_stack)
-#define CE_STACKP (ces)
-#define CE_STACKSIZE 1024
-static
-struct dirent** child_entity_stack[CE_STACKSIZE],*** ces;
-#define CE_INIT() (CE_STACKP = CE_STACK)
-#define CE_POP() (*--CE_STACK)
-#define CE_PUSH(E) (*CE_STACK++ = E)
+#define DP_STACK (lexer_direntpa)
+#define DP_STACKP (dps)
+#define DP_LEN() (DP_STACKP - DP_STACK)
+#define DP_INIT() (DP_STACKP = DP_STACK)
+#define DP_POP() (*--DP_STACKP)
/* Token Stack
This is a FIFO stack whose pointers are a union of either a pointer to an
times in a sequence!
*/
#define TK_STACK (token_stack)
-#define TK_STACKP (tks.t)
-#define TK_STACKPI (tks.i)
-#define TK_STACKX (tkx.t)
-#define TK_STACKXI (tkx.i)
-#ifndef TK_STACKSIZE
-#define TK_STACKSIZE 1024
-#endif
-#define TK_EMPTY (TK_STACKP == TK_STACKX)
-#define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK)
-#define TK_POP() (*TK_STACKP++)
-#define TK_IPOP() (*TK_STACKPI++);
-#define TK_PUSH(TOK,LVAL) (*TKSTACKX++ = (struct tok){(LVAL),(TOK)})
-#define TK_IPUSH(I) (*TKSTACKXI++ = (I))
-static
-struct tok
-{ int lval;
- int tok;
-} token_stack[TK_STACKSIZE];
-static
-union tokp
-{ int* i;
- struct tok* t;
-} tks, tkx;
+#define TK_STACKP (tks.t)
+#define TK_STACKPI (tks.i)
+#define TK_STACKX (tkx.t)
+#define TK_STACKXI (tkx.i)
+#define TK_LEN() (TK_STACKP - TK_STACKX)
+#define TK_INIT() (TK_STACKP = TK_STACKX = TK_STACK)
+#define TK_POP() (*TK_STACKP++)
+#define TK_POPI() (*TK_STACKPI++);
+#define TK_PUSH(T) (*TKSTACKX++ = T)
+#define TK_PUSHI(I) (*TKSTACKXI++ = (I))
+extern //main.c
+const char* cargs['Z'];
+
+extern //scanner.c
+int scanner_init(void);
+extern //scanner.c
+int scanner(struct dirent**);
/* Initializer
- The initializer returns boolean true if opendir contains an error.
+ The initializer returns boolean true if an error occurs, which may be handled with standard errno.
*/
-int lexer_init()
-#define ERR_UF "Fatal: Unknown file [%s] encountered in directory %s", \
- DL_CD_STACK[i]->d_name, DL_STACK[DL_DEPTH]
-{ int i = 0;
- TK_INIT();
- DL_INIT();
- DL_STACK[0].label = cargs['r'] ? cargs['r'] : "./";
- if ((DL_STACK[0].dirp = opendir(DL_STACK[0].label)) == NULL)
- return -1;
- errno = 0;
- while ((DL_CD_STACK[i] = readdir(DL_STACK[0].dirp)) != NULL)
- switch(DL_CD_STACK[i]->d_type)
- {
- default:
- fprintf(stderr, "Fatal: Unkown File %s\n", DL_CD_STACK[i]->d_name);
- }
- return (errno);
+int lexer_init
+()
+{ TK_INIT();
+ DP_INIT();
+ return scanner_init();
}
/* Lexer
printing is skipped. In all other cases, 'yylval' is set, and the token's
integer representation is returned.
*/
-int lexer()
-{ if (TOK_BUF_EMPTY)
- { TOK_BUF_INIT();
- switch (lexer_scan)
- { case SCANDIR_ERROR:
+int lexer
+#define SCAN_ERROR -1
+#define TK_EMPTY (TK_STACKP == TK_STACKX)
+()
+{ if (TK_EMPTY)
+ { switch (parsedir())
+ { case SCAN_ERROR:
perror("lexer_scan");
case 0:
yylval = 0;
return 0;
default:
+ break;
}
}
- yylval = TOK_BUF_IPOP();
- return TOK_BUF_IPOP();
+ yylval = TK_IPOP();
+ return TK_IPOP();
}
-
-static inline
-int lexer_scan()
-{ static DIR*
-}
-
-/* Scanner
- Scans a filename from its alphabetically ordered list of file elements
- and tokenizes the result. If the file list is empty, then the stack of
- directory elements will be popped and processed as they are encountered.
-
- Returns the number of tokens generated.
+/* Token Receiver
+ This receiver takes a struct tok and pushes it to the FIFO stack.
*/
-#define MAX_ENTITIES 256
-static
-int lexer_scan()
-{ static struct dirent* entity;
- static struct dirent* files[MAX_ENTITIES] = {0};
- static int num_files = 0;
-#define NO_FILES (num_files == 0)
-
- if (NO_FILES)
-
-
- //sort out files and directories, grow directories from bottom up
- while ((entity = readdir(dirp)) != NULL)
- { switch (entity->d_type)
- { case DT_LNK:
- case DT_REG:
- files[num_files++] = entity;
- break;
- case DT_DIR:
- *(dirs - num_dirs++) = entity;
- break;
- case DT_UNKNOWN:
- default:
- printf("Ignoring unknown file: %s\n", entity->d_name);
- break;
- }
+inline
+void lexer_pushtok
+#define ERR_TK "Fatal: Generated over " S(TK_STACKSIZE) " tokens in one pass."
+( struct tok token )
+{ if (TK_LEN >= TK_STACKSIZE)
+ { fprintf(stderr, ERR_TK);
+ exit(EXIT_FAILURE);
}
- if (errno)
- perror("readdir");
- qsort(&files[0], num_files, sizeof struct dirent*, qalpha);
- num_ents = scandirat(dirfd, ".", &namelist, scanfilter, scancompar);
- if (num_ents < 0)
- { perror("scandirat");
- return -1;
- }
- //process files
-
- //recurse into directories
- return tbx - tbp;
+ TK_PUSH(token);
}
-/* Quicksort comparison function
- sort each dirent encountered first by its file type (regular files first)
- and then by their alphabetical ordering.
-*/
-static
-int qcompar
-( const void* a,
- const void* b
- )
-{
+/* Lexical Analysis
+ Ragel state machine for tokenizing text.
+ */
+void lexer_lex
+(const char* str)
+{ struct tok token;
+ token.TOK = 1;
+ token.LVAL = 2;
+ lexer_pushtok(token);
+ printf (str);
}
+/* init_file:
+ if (lsp != NULL)
+ while ((c = *lsp++) == *csp)
+ { switch (c)
+ { case DELIM:
+ delimeters_skipped++;
+ default:
+ csp++; //delayed to ensure csp is the start of scannable text
+ break;
+ }
+ }
+ last_string = string;
+ scan_text:
+ return scanner_tokenize(csp);
+*/