a60de185e8b405998089bf7ebed2bb5a129d7e19
[henge/webcc.git] / src / apc / scanner.c
1 /*!@file
2 \brief APC Directory Scanner
3 \details This hand-written parser/scanner traverses a directory tree and
4 tokenizes elements of the structure which correspond to APC grammar.
5 The parser is implemented as a 2D stack which populates a list of
6 child directories at each depth, handling only the leaf nodes
7 (regular files) of the directory open at the current depth to
8 conserve memory and speed up traversal.
9 The scanner works with the lexer to lexically analyze text, and
10 assumes the existence of an external 'lex' function
11 \author Jordan Lavatai
12 \date Aug 2016
13 ----------------------------------------------------------------------------*/
14 /* Standard */
15 #include <stdio.h> //print
16 #include <errno.h> //errno
17 /* Posix */
18 #include <err.h> //warnx
19 #include <stdlib.h> //exit
20 #include <unistd.h> //chdir
21 #include <dirent.h> //opendir
22
23 #include "parser.tab.h"
24 /* Public */
25 int scanner_init(void);
26 void scanner_quit(void);
27 int scanner(void);
28 /* Private */
29 #ifndef DL_STACKSIZE
30 #define DL_STACKSIZE 64
31 #endif
32 #ifndef DL_CD_STACKSIZE
33 #define DL_CD_STACKSIZE DL_STACKSIZE //square tree
34 #endif
35 extern //lexer.c
36 int lexer_lex(const char*);
37 extern //lexer.c
38 void lexer_pushtok(int, int);
39 static
40 int dredge_current_depth(void);
41 extern //lexer.c
42 struct dirent* lexer_direntpa[], **lexer_direntpp;
43 extern //SRC_DIR/bin/tools/apc.c
44 const char* cargs['Z'];
45
46 struct dirlist
47 { DIR* dirp;
48 struct dirent* child_directory_stack[DL_CD_STACKSIZE],** cds;
49 } directory_list_stack[DL_STACKSIZE + 1],* dls; //+1 for the root dir
50
51 /* Directory Listing Stack
52 FILO Stack for keeping an open DIR* at each directory depth for treewalk.
53 This stack is depth-safe, checking its depth during push operations, but not
54 during pop operations, to ensure the thread doesn't open too many files at
55 once (512 in c runtime), or traverse too far through symbolic links.
56 A directory listing includes a DIR* and all DIR-typed entity in the directory
57 as recognized by dirent, populated externally (and optionally).
58 This stack behaves abnormally by incrementing its PUSH operation prior to
59 evaluation, and the POP operations after evaluation. This behavior allows
60 the 'DL_CURDEPTH' operation to map to the current element in the 'dl_stack'
61 array, and it is always treated as the "current depth". This also allows us
62 to init the root directory to 'directory_list_stack'[0] and pop it in a safe
63 and explicit manner.
64 */
65 #define DL_STACK (directory_list_stack)
66 #define DL_STACKP (dls)
67 #define DL_CD_STACK ((*DL_STACKP).child_directory_stack)
68 #define DL_CD_STACKP ((*DL_STACKP).cds)
69 #define DL_CURDIR() ((*DL_STACKP).dirp)
70 #define DL_LEN() (DL_STACKP - DL_STACK)
71 #define DL_CD_LEN() (DL_CD_STACKP - DL_CD_STACK)
72 #define DL_INIT() (DL_STACKP = DL_STACK)
73 #define DL_CD_INIT() (DL_CD_STACKP = DL_CD_STACK)
74 #define DL_POP() ((*DL_STACKP--).dirp)
75 #define DL_CD_POP() (*--DL_CD_STACKP)
76 #define DL_PUSH(D) ((*++DL_STACKP).dirp = D)
77 #define DL_CD_PUSH(E) (*DL_CD_STACKP++ = E)
78
79
80 /* Initializer
81 Initializer expects a function pointer to its lexical analysis function.
82 Sets up stack pointers and returns boolean true if 'opendir' encounters an
83 error, or if dredge_current_depth returns boolean true.
84 */
85 int scanner_init
86 #define CWDSTR "./"
87 #define ROOTDIR (cargs['d'] ? cargs['d'] : CWDSTR)
88 ()
89 { DL_INIT();
90 DL_STACK[0].dirp = opendir(ROOTDIR);
91 printf("Root dir %s\n",ROOTDIR);
92 return !chdir(ROOTDIR) && (DL_STACK[0].dirp == NULL || dredge_current_depth() == -1);
93 }
94
95 /* Quit */
96 void scanner_quit
97 ()
98 { if (DL_CURDIR())
99 closedir(DL_CURDIR());
100 }
101
102 /* Scanner
103 The main driver of the scanner will advance the current treewalk state and
104 tokenize tree-based push/pop operations. It will call 'lexer_lex' to
105 tokenize directory names prior to making a push operation. safe checking for
106 all returns from the filesystem handler will exit on serious system errors.
107
108 after pushing a new directory to the directory list, the scanner will dredge
109 the directory and alphabetically sort all file entries into the lexer's file
110 array, while placing all subdirectory entries in the current depth's child
111 directory stack to be scanned later.
112
113 Returns the number of tokens generated on success, -1 on error.
114 */
115 int scanner
116 #define $($)#$ //stringifier
117 #define ERR_CHILD "Fatal: Maximum of " $(DL_CD_STACKSIZE) \
118 " child directories exceeded for directory at depth %i\n" \
119 ,DL_LEN()
120 #define ERR_DEPTH "Fatal: Maximum directory depth of " $(DL_STACKSIZE) \
121 " exceeded during directory scan\n"
122 #define ERR_DL "Fatal: Directory List Stack Corruption %x\n", DL_LEN()
123 ()
124 { struct dirent* direntp;
125 struct DIR* DIRp;
126 int ntok = 0;
127 scan:
128 if (DL_CD_LEN() >= DL_CD_STACKSIZE)//fail if maxchildren exceeded
129 { fprintf(stderr, ERR_CHILD);
130 goto fail;
131 }
132 if (DL_CD_LEN() > 0) //There are entities to process
133 { if ((direntp = DL_CD_POP()) == NULL)//If the dirent is null, the library
134 goto libfail; //function in dirent has failed
135 ntok += lexer_lex(direntp->d_name); //lex the directory name
136 if (DL_LEN() >= DL_STACKSIZE) //fail if maxdepth exceeded
137 { fprintf(stderr, ERR_DEPTH);
138 goto fail;
139 }
140 if (chdir(direntp->d_name)) //move into the new directory
141 goto libfail;
142 DL_PUSH(opendir(CWDSTR));
143 if (DL_CURDIR() == NULL) //open the cwd
144 goto libfail;
145 lexer_pushtok(CLOPEN, 0); //Push "Open Directory" token
146 ntok++;
147 return dredge_current_depth(); //Filter and sort the current depth
148 }
149 else if (DL_LEN() >= 0) //Any dirs left? (Including root)
150 { if (closedir(DL_POP())) //close the directory we just left
151 goto libfail;
152 if (DL_LEN() == -1) //If we just popped root,
153 goto done; //we're done
154 lexer_pushtok(CLCLOSE, 0); //Else push "Close Directory" token,
155 ntok++;
156 if (!chdir("..")) //move up a directory and
157 goto scan; //start over
158 }
159 fprintf(stderr, ERR_DL);
160 libfail:
161 perror("parsedir");
162 fail:
163 return -1;
164 done:
165 return ntok;
166 }
167
168 /* Directory Entity Sort and Filter (Dredge)
169 This filter removes all unhandled file types, and places any 'DT_DIR' type
170 files in the current Directory List's directory stack. Upon finishing,
171 the 'CE_STACK' is sorted alphabetically, and the current 'DL_CD_STACK' is
172 populated. Prints warnings for unhandled files.
173
174 Returns -1 if 'readdir' encounters an error, otherwise returns the number of
175 directory entries sent to the external 'lexer_direntpa' array.
176 */
177 typedef //so we can typecast dirent's 'alphasort()' to take const void*s
178 int (*qcomp)(const void*, const void*);
179 static inline
180 int dredge_current_depth
181 #define READDIR_ERROR (-1)
182 #define READDIR_DONE (0)
183 #define DPS_LEN() (lexer_direntpp - lexer_direntpa)
184 #define DPS_PUSH(E) (*lexer_direntpp++ = E)
185 ()
186 { struct dirent** direntpp = lexer_direntpa;
187 DIR* cwd = DL_CURDIR();
188 struct dirent* direntp;
189 DL_CD_INIT();
190 scan_next:
191 if ((direntp = readdir(cwd)) != NULL)
192 { switch (direntp->d_type)
193 { case DT_REG:
194 DPS_PUSH(direntp);
195 goto scan_next;
196 case DT_DIR:
197 if (*(direntp->d_name) == '.') //skip hidden files and relative dirs
198 goto scan_next;
199 DL_CD_PUSH(direntp);
200 goto scan_next;
201 case DT_UNKNOWN:
202 warnx("unknown file %s: ignoring", direntp->d_name);
203 default:
204 goto scan_next;
205 }
206 }
207 if (errno)
208 return -1;
209 qsort(lexer_direntpa, DPS_LEN(), sizeof direntp, (qcomp)alphasort);
210 return DPS_LEN();
211 }
212
213