Debugging text for lexer + directory spec CLI
[henge/webcc.git] / src / apc / scanner.c
1 /*!@file
2 \brief APC Directory Scanner
3 \details This hand-written parser/scanner traverses a directory tree and
4 tokenizes elements of the structure which correspond to APC grammar.
5 The parser is implemented as a 2D stack which populates a list of
6 child directories at each depth, handling only the leaf nodes
7 (regular files) of the directory open at the current depth to
8 conserve memory and speed up traversal.
9 The scanner works with the lexer to lexically analyze text, and
10 assumes the existence of an external 'lex' function
11 \author Jordan Lavatai
12 \date Aug 2016
13 ----------------------------------------------------------------------------*/
14 /* Standard */
15 #include <stdio.h> //print
16 #include <errno.h> //errno
17 /* Posix */
18 #include <err.h> //warnx
19 #include <stdlib.h> //exit
20 #include <unistd.h> //chdir
21 #include <dirent.h> //opendir
22
23 #include "parser.tab.h"
24 /* Public */
25 int scanner_init(void);
26 int scanner(void);
27 /* Private */
28 #ifndef DL_STACKSIZE
29 #define DL_STACKSIZE 64
30 #endif
31 #ifndef DL_CD_STACKSIZE
32 #define DL_CD_STACKSIZE DL_STACKSIZE //square tree
33 #endif
34 extern //lexer.c
35 int lexer_lex(const char*);
36 extern //lexer.c
37 void lexer_pushtok(int, int);
38 static
39 int dredge_current_depth(void);
40 extern //lexer.c
41 struct dirent* lexer_direntpa[];
42 extern //SRC_DIR/bin/tools/apc.c
43 const char* cargs['Z'];
44
45 struct dirlist
46 { DIR* dirp;
47 struct dirent* child_directory_stack[DL_CD_STACKSIZE],** cds;
48 } directory_list_stack[DL_STACKSIZE + 1],* dls; //+1 for the root dir
49
50 /* Directory Listing Stack
51 FILO Stack for keeping an open DIR* at each directory depth for treewalk.
52 This stack is depth-safe, checking its depth during push operations, but not
53 during pop operations, to ensure the thread doesn't open too many files at
54 once (512 in c runtime), or traverse too far through symbolic links.
55 A directory listing includes a DIR* and all DIR-typed entity in the directory
56 as recognized by dirent, populated externally (and optionally).
57 This stack behaves abnormally by incrementing its PUSH operation prior to
58 evaluation, and the POP operations after evaluation. This behavior allows
59 the 'DL_CURDEPTH' operation to map to the current element in the 'dl_stack'
60 array, and it is always treated as the "current depth". This also allows us
61 to init the root directory to 'directory_list_stack'[0] and pop it in a safe
62 and explicit manner.
63 */
64 #define DL_STACK (directory_list_stack)
65 #define DL_STACKP (dls)
66 #define DL_CD_STACK ((*DL_STACKP).child_directory_stack)
67 #define DL_CD_STACKP ((*DL_STACKP).cds)
68 #define DL_CURDIR() ((*DL_STACKP).dirp)
69 #define DL_LEN() (DL_STACKP - DL_STACK)
70 #define DL_CD_LEN() (DL_CD_STACKP - DL_CD_STACK)
71 #define DL_INIT() (DL_STACKP = DL_STACK)
72 #define DL_CD_INIT() (DL_CD_STACKP = DL_CD_STACK)
73 #define DL_POP() ((*DL_STACKP--).dirp)
74 #define DL_CD_POP() (*--DL_CD_STACKP)
75 #define DL_PUSH(D) ((*++DL_STACKP).dirp = D)
76 #define DL_CD_PUSH(E) (*DL_CD_STACKP++ = E)
77
78
79 /* Initializer
80 Initializer expects a function pointer to its lexical analysis function.
81 Sets up stack pointers and returns boolean true if 'opendir' encounters an
82 error, or if dredge_current_depth returns boolean true.
83 */
84 int scanner_init
85 #define CWDSTR "./"
86 #define ROOTDIR (cargs['d'] ? cargs['d'] : CWDSTR)
87 ()
88 { DL_INIT();
89 DL_STACK[0].dirp = opendir(ROOTDIR);
90 return !chdir(ROOTDIR) && (DL_STACK[0].dirp == NULL || dredge_current_depth() == 0);
91 }
92
93 /* Scanner
94 The main driver of the scanner will advance the current treewalk state and
95 tokenize tree-based push/pop operations. It will call 'lexer_lex' to
96 tokenize directory names prior to making a push operation. safe checking for
97 all returns from the filesystem handler will exit on serious system errors.
98
99 after pushing a new directory to the directory list, the scanner will dredge
100 the directory and alphabetically sort all file entries into the lexer's file
101 array, while placing all subdirectory entries in the current depth's child
102 directory stack to the scanned later.
103
104 Returns the number of elements added to the lexer's file array.
105 */
106 int scanner
107 #define $($)#$ //stringifier
108 #define ERR_CHILD "Fatal: Maximum of " $(DL_CD_STACKSIZE) \
109 " child directories exceeded for directory at depth %i\n" \
110 ,DL_LEN()
111 #define ERR_DEPTH "Fatal: Maximum directory depth of " $(DL_STACKSIZE) \
112 " exceeded during directory scan\n"
113 #define ERR_DL "Fatal: Directory List Stack Corruption %x\n", DL_LEN()
114 #define TOK_CLOPEN 0x55, 0 //TODO
115 #define TOK_CLCLOSE 0x56, 0 //TODO
116 ()
117 { struct dirent* direntp;
118 struct DIR* DIRp;
119 parse:
120 if (DL_CD_LEN() >= DL_CD_STACKSIZE)//fail if maxchildren exceeded
121 { fprintf(stderr, ERR_CHILD);
122 goto fail;
123 }
124 if (DL_CD_LEN() > 0) //There are entities to process at this depth
125 { if ((direntp = DL_CD_POP()) == NULL) //If the dirent is null, the library
126 goto libfail; //function in dirent has failed
127 lexer_lex(direntp->d_name); //lex the directory name
128 if (DL_LEN() >= DL_STACKSIZE) //fail if maxdepth exceeded
129 { fprintf(stderr, ERR_DEPTH);
130 goto fail;
131 }
132 if (chdir(direntp->d_name)) //move into the new directory
133 goto libfail;
134 DL_PUSH(opendir(CWDSTR));
135 if (DL_CURDIR() == NULL) //open the cwd
136 goto libfail;
137 lexer_pushtok(TOK_CLOPEN); //Push "Open Directory" token
138 return dredge_current_depth(); //Filter and sort the current depth
139 }
140 else if (DL_LEN() >= 0) //Any dirs left? (Including root)
141 { if (closedir(DL_POP())) //close the directory we just left
142 goto libfail;
143 lexer_pushtok(TOK_CLCLOSE); //Push "Close Directory" token
144 if (DL_LEN() == -1) //If we just popped root, we're done
145 return 0;
146 if (!chdir("..")) //Move up a directory and start over
147 goto parse;
148 }
149 fprintf(stderr, ERR_DL);
150 libfail:
151 perror("parsedir");
152 fail:
153 exit(EXIT_FAILURE);
154 }
155
156 /* Directory Entity Sort and Filter (Dredge)
157 This filter removes all unhandled file types, and places any 'DT_DIR' type
158 files in the current Directory List's directory stack. Upon finishing,
159 the 'CE_STACK' is sorted alphabetically, and the current 'DL_CD_STACK' is
160 populated. Prints warnings for unhandled files.
161
162 Returns -1 if 'readdir' encounters an error, otherwise returns the number of
163 directory entries sent to the external 'lexer_direntpa' array.
164 */
165 typedef
166 int (*qcomp)(const void*, const void*);
167 static inline
168 int dredge_current_depth
169 #define READDIR_ERROR (-1)
170 #define READDIR_DONE (0)
171 #define DPS_LEN() (direntpp - lexer_direntpa)
172 #define DPS_PUSH(E) (*direntpp++ = E)
173 ()
174 { struct dirent** direntpp = lexer_direntpa;
175 DIR* cwd = DL_CURDIR();
176 struct dirent* direntp;
177 DL_CD_INIT();
178 scan_next:
179 if ((direntp = readdir(cwd)) != NULL)
180 { switch (direntp->d_type)
181 { case DT_REG:
182 printf("String to tokenize %s\n", direntp->d_name);
183 DPS_PUSH(direntp);
184 goto scan_next;
185 case DT_DIR:
186 if (*(direntp->d_name) == '.') //skip hidden files and relative dirs
187 goto scan_next;
188 printf("Pushing child directory %s\n", direntp->d_name);
189 DL_CD_PUSH(direntp);
190 goto scan_next;
191 case DT_UNKNOWN:
192 warnx("unknown file %s: ignoring", direntp->d_name);
193 default:
194 goto scan_next;
195 }
196 }
197 if (errno)
198 return -1;
199 qsort(lexer_direntpa, DPS_LEN(), sizeof direntp, (qcomp)alphasort);
200 return DPS_LEN();
201 }
202