07f18ce113effebd6c4414eee8d5bc83fa329f25
[henge/webcc.git] / src / apc / scanner.c
1 /*!@file
2 \brief APC Directory Scanner
3 \details This hand-written parser/scanner traverses a directory tree and
4 tokenizes elements of the structure which correspond to APC grammar.
5 The parser is implemented as a 2D stack which populates a list of
6 child directories at each depth, handling only the leaf nodes
7 (regular files) of the directory open at the current depth to
8 conserve memory and speed up traversal.
9 The scanner works with the lexer to lexically analyze text, and
10 assumes the existence of an external 'lex' function
11 \author Jordan Lavatai
12 \date Aug 2016
13 ----------------------------------------------------------------------------*/
14 /* Standard */
15 #include <stdio.h> //print
16 #include <string.h> //strncmp
17 #include <errno.h> //errno
18 #include <ctype.h> //tolower
19 /* Posix */
20 #include <err.h> //warnx
21 #include <stdlib.h> //exit
22 #include <unistd.h> //chdir
23 #include <dirent.h> //opendir
24 #include <unistr.h> //unicode strings
25 /* Internal */
26 #include "parser.tab.h"
27 /* Public */
28 int scanner_init(void);
29 void scanner_quit(void);
30 int scanner(void);
31 int scanner_scanpixels(int*,int);
32 /* Private */
33 extern //lexer.c
34 int lexer_lexstring(const ucs4_t*);
35 extern //lexer.c
36 void lexer_pushtok(int, int);
37 static
38 int dredge_current_depth(void);
39 /* Mem */
40 extern //lexer.c
41 struct dirent* lexer_direntpa[], **lexer_direntpp;
42 extern //SRC_DIR/bin/tools/apc.c
43 const char* cargs['Z'];
44 #ifndef DL_STACKSIZE
45 #define DL_STACKSIZE 64
46 #endif
47 #ifndef DL_CD_STACKSIZE
48 #define DL_CD_STACKSIZE DL_STACKSIZE //square tree
49 #endif
50 static
51 struct dirlist
52 { DIR* dirp;
53 struct dirent* child_directory_stack[DL_CD_STACKSIZE],** cds;
54 } directory_list_stack[DL_STACKSIZE + 1],* dls; //+1 for the root dir
55 static
56 FILE* current_open_file = NULL;
57
58 /* Directory Listing Stack
59 FILO Stack for keeping an open DIR* at each directory depth for treewalk.
60 This stack is depth-safe, checking its depth during push operations, but not
61 during pop operations, to ensure the thread doesn't open too many files at
62 once (512 in c runtime), or traverse too far through symbolic links.
63 A directory listing includes a DIR* and all DIR-typed entity in the directory
64 as recognized by dirent, populated externally (and optionally).
65 This stack behaves abnormally by incrementing its PUSH operation prior to
66 evaluation, and the POP operations after evaluation. This behavior allows
67 the 'DL_CURDEPTH' operation to map to the current element in the 'dl_stack'
68 array, and it is always treated as the "current depth". This also allows us
69 to init the root directory to 'directory_list_stack'[0] and pop it in a safe
70 and explicit manner.
71 */
72 #define DL_STACK (directory_list_stack)
73 #define DL_STACKP (dls)
74 #define DL_CD_STACK ((*DL_STACKP).child_directory_stack)
75 #define DL_CD_STACKP ((*DL_STACKP).cds)
76 #define DL_CURDIR() ((*DL_STACKP).dirp)
77 #define DL_LEN() (DL_STACKP - DL_STACK)
78 #define DL_CD_LEN() (DL_CD_STACKP - DL_CD_STACK)
79 #define DL_INIT() (DL_STACKP = DL_STACK)
80 #define DL_CD_INIT() (DL_CD_STACKP = DL_CD_STACK)
81 #define DL_POP() ((*DL_STACKP--).dirp)
82 #define DL_CD() (*DL_CD_STACKP)
83 #define DL_CD_CURNAME() (DL_CD()->d_name)
84 #define DL_CD_POP() (*--DL_CD_STACKP)
85 #define DL_PUSH(D) ((*++DL_STACKP).dirp = D)
86 #define DL_CD_PUSH(E) (*DL_CD_STACKP++ = E)
87
88
89 /* Initializer
90 Initializer expects a function pointer to its lexical analysis function.
91 Sets up stack pointers and returns boolean true if 'opendir' encounters an
92 error, or if dredge_current_depth returns boolean true.
93 */
94 int scanner_init
95 #define CWDSTR "./"
96 #define ROOTDIR (cargs['d'] ? cargs['d'] : CWDSTR)
97 ()
98 { DL_INIT();
99 DL_STACK[0].dirp = opendir(ROOTDIR);
100 if (current_open_file != NULL)
101 { fclose(current_open_file);
102 current_open_file = NULL;
103 }
104 printf("Root dir %s\n",ROOTDIR);
105 return !chdir(ROOTDIR) && (DL_STACK[0].dirp == NULL || dredge_current_depth() == -1);
106 }
107
108 /* Quit */
109 void scanner_quit
110 ()
111 { if (DL_CURDIR())
112 closedir(DL_CURDIR());
113 }
114
115 /* Scanner
116 The main driver of the scanner will advance the current treewalk state and
117 tokenize tree-based push/pop operations. It will call 'lexer_lex' to
118 tokenize directory names prior to making a push operation. safe checking for
119 all returns from the filesystem handler will exit on serious system errors.
120
121 after pushing a new directory to the directory list, the scanner will dredge
122 the directory and alphabetically sort all file entries into the lexer's file
123 array, while placing all subdirectory entries in the current depth's child
124 directory stack to be scanned later.
125
126 Returns the number of tokens generated on success, -1 on error.
127 */
128 int scanner
129 #define $($)#$ //stringifier
130 #ifdef _DIRENT_HAVE_D_NAMLEN
131 #define MAX_DNAME _D_ALLOC_NAMLEN(DL_CD())
132 #else
133 #define MAX_DNAME 1024
134 #endif
135 #define ERR_CHILD "Fatal: Maximum of " $(DL_CD_STACKSIZE) \
136 " child directories exceeded for directory at depth %i\n" \
137 ,DL_LEN()
138 #define ERR_DEPTH "Fatal: Maximum directory depth of " $(DL_STACKSIZE) \
139 " exceeded during directory scan\n"
140 #define ERR_DL "Fatal: Directory List Stack Corruption %x\n", DL_LEN()
141 ()
142 { static ucs4_t uc_dname[MAX_DNAME] = {0};
143 int ntok = 0;
144 scan:
145 if (DL_CD_LEN() >= DL_CD_STACKSIZE)//fail if maxchildren exceeded
146 { fprintf(stderr, ERR_CHILD);
147 goto fail;
148 }
149 if (DL_CD_LEN() > 0) //There are entities to process
150 { if (DL_CD_POP() == NULL) //If the dirent is null, then the
151 goto libfail; //lib function in dirent has failed
152 if (u8_mbtouc(uc_dname, DL_CD_CURNAME(), MAX_DNAME) < 0) //convert to ucs4
153 goto libfail;
154 ntok += lexer_lexstring(uc_dname); //lex the directory name
155 if (DL_LEN() >= DL_STACKSIZE) //fail if maxdepth exceeded
156 { fprintf(stderr, ERR_DEPTH);
157 goto fail;
158 }
159 if (chdir(DL_CD_CURNAME())) //move into the new directory
160 goto libfail;
161 DL_PUSH(opendir(CWDSTR));
162 if (DL_CURDIR() == NULL) //open the cwd
163 goto libfail;
164 lexer_pushtok(CLOPEN, 0); //Push "Open Directory" token
165 ntok++;
166 return dredge_current_depth(); //Filter and sort the current depth
167 }
168 else if (DL_LEN() >= 0) //Any dirs left? (Including root)
169 { if (closedir(DL_POP())) //close the directory we just left
170 goto libfail;
171 if (DL_LEN() == -1) //If we just popped root,
172 goto done; //we're done
173 lexer_pushtok(CLCLOSE, 0); //Else push "Close Directory" token,
174 ntok++;
175 if (!chdir("..")) //move up a directory and
176 goto scan; //start over
177 }
178 fprintf(stderr, ERR_DL);
179 libfail:
180 perror("scanner: ");
181 fail:
182 return -1;
183 done:
184 return ntok;
185 }
186
187 /* Scan Pixels
188 Scans up to 'len' pixels from the current file into 'buf'.
189 Returns the number of pixels scanned from the file, or -1 on error
190 */
191 int scanner_scanpixels
192 ( int* buf,
193 int max_len
194 )
195 { static int col_len, row_len, row;
196 //Open the current file if not yet open
197 if (current_open_file == NULL)
198 { if ((current_open_file = fopen(DL_CD_CURNAME(),"rb")) == NULL)
199 { perror("fopen: ");
200 return -1;
201 }
202 //Verify file header, get row_len/col_len
203 if (read_img_header(&row_len, &col_len))
204 return -1;
205 row = 0;
206 }
207 //Read pixels into the buffer if there are rows left in the image
208 if (row++ < row_len)
209 //TODO: return read_img_pixels(buf, col_len);
210 printf("SCANPIXELS NOT IMPLEMENTED\n.");
211 //Close the file and return 0
212 fclose(current_open_file);
213 current_open_file = NULL;
214 return 0;
215 }
216
217 /* Directory Entity Sort and Filter (Dredge)
218 This filter removes all unhandled file types, and places any 'DT_DIR' type
219 files in the current Directory List's directory stack. Upon finishing,
220 the 'CE_STACK' is sorted alphabetically, and the current 'DL_CD_STACK' is
221 populated. Prints warnings for unhandled files.
222
223 Returns -1 if 'readdir' encounters an error, otherwise returns the number of
224 directory entries sent to the external 'lexer_direntpa' array.
225 */
226 typedef //so we can typecast dirent's 'alphasort()' to take const void*s
227 int (*qcomp)(const void*, const void*);
228 static inline
229 int dredge_current_depth
230 #define READDIR_ERROR (-1)
231 #define READDIR_DONE (0)
232 #define DPS_LEN() (lexer_direntpp - lexer_direntpa)
233 #define DPS_PUSH(E) (*lexer_direntpp++ = E)
234 ()
235 { struct dirent** direntpp = lexer_direntpa;
236 DIR* cwd = DL_CURDIR();
237 struct dirent* direntp;
238 DL_CD_INIT();
239 scan_next:
240 if ((direntp = readdir(cwd)) != NULL)
241 { switch (direntp->d_type)
242 { case DT_REG:
243 DPS_PUSH(direntp);
244 goto scan_next;
245 case DT_DIR:
246 if (*(direntp->d_name) == '.') //skip hidden files and relative dirs
247 goto scan_next;
248 DL_CD_PUSH(direntp);
249 goto scan_next;
250 case DT_UNKNOWN:
251 warnx("unknown file %s: ignoring", direntp->d_name);
252 default:
253 goto scan_next;
254 }
255 }
256 if (errno)
257 return -1;
258 qsort(lexer_direntpa, DPS_LEN(), sizeof direntp, (qcomp)alphasort);
259 return DPS_LEN();
260 }