wip
[henge/apc.git] / src / scanner.c
1 /*!@file
2 \brief APC Directory Scanner
3 \details This hand-written parser/scanner traverses a directory tree and
4 tokenizes elements of the structure which correspond to APC grammar.
5 The parser is implemented as a 2D stack which populates a list of
6 child directories at each depth, handling only the leaf nodes
7 (regular files) of the directory open at the current depth to
8 conserve memory and speed up traversal.
9 The scanner works with the lexer to lexically analyze text, and
10 assumes the existence of an external 'lex' function
11 \author Jordan Lavatai
12 \date Aug 2016
13 ----------------------------------------------------------------------------*/
14 /* Standard */
15 #include <stdio.h> //print
16 #include <string.h> //strncmp
17 #include <errno.h> //errno
18 #include <ctype.h> //tolower
19 /* Posix */
20 #include <err.h> //warnx
21 #include <stdlib.h> //exit
22 #include <unistd.h> //chdir
23 #include <dirent.h> //opendir
24 #include <unistr.h> //unicode strings
25 /* Internal */
26 #include "parser.tab.h"
27 /* Public */
28 int scanner_init(void);
29 void scanner_quit(void);
30 int scanner(void);
31 int scanner_scanpixels(int*,int);
32 /* Private */
33 extern //lexer.c
34 int lexer_lexstring(const char*);
35 extern //lexer.c
36 void lexer_pushtok(int, int);
37 static
38 int dredge_current_depth(void);
39 /* Mem */
40 extern //lexer.c
41 struct dirent* lexer_direntpa[], **lexer_direntpp;
42 extern //SRC_DIR/bin/tools/apc.c
43 const char* cargs['Z'];
44 #define DL_STACKSIZE 64
45 #define DL_CD_STACKSIZE DL_STACKSIZE //square tree
46
47 static
48 struct dirlist
49 { DIR* dirp;
50 struct dirent* child_directory_stack[DL_CD_STACKSIZE],** cds;
51 } directory_list_stack[DL_STACKSIZE + 1],* dls; //+1 for the root dir
52 static
53 FILE* current_open_file = NULL;
54
55 /* Directory Listing Stack
56 FILO Stack for keeping an open DIR* at each directory depth for treewalk.
57 This stack is depth-safe, checking its depth during push operations, but not
58 during pop operations, to ensure the thread doesn't open too many files at
59 once (512 in c runtime), or traverse too far through symbolic links.
60 A directory listing includes a DIR* and all DIR-typed entity in the directory
61 as recognized by dirent, populated externally (and optionally).
62 This stack behaves abnormally by incrementing its PUSH operation prior to
63 evaluation, and the POP operations after evaluation. This behavior allows
64 the 'DL_CURDEPTH' operation to map to the current element in the 'dl_stack'
65 array, and it is always treated as the "current depth". This also allows us
66 to init the root directory to 'directory_list_stack'[0] and pop it in a safe
67 and explicit manner.
68 */
69 #define DL_STACK (directory_list_stack)
70 #define DL_STACKP (dls)
71 #define DL_CD_STACK ((*DL_STACKP).child_directory_stack)
72 #define DL_CD_STACKP ((*DL_STACKP).cds)
73 #define DL_CURDIR() ((*DL_STACKP).dirp)
74 #define DL_LEN() ((int)(DL_STACKP - DL_STACK))
75 #define DL_CD_LEN() ((int)(DL_CD_STACKP - DL_CD_STACK))
76 #define DL_INIT() (DL_STACKP = DL_STACK)
77 #define DL_CD_INIT() (DL_CD_STACKP = DL_CD_STACK)
78 #define DL_POP() ((*DL_STACKP--).dirp)
79 #define DL_CD() (*DL_CD_STACKP)
80 #define DL_CD_CURNAME() (DL_CD()->d_name)
81 #define DL_CD_POP() (*--DL_CD_STACKP)
82 #define DL_PUSH(D) ((*++DL_STACKP).dirp = D)
83 #define DL_CD_PUSH(E) (*DL_CD_STACKP++ = E)
84
85
86 /* Initializer
87 Initializer expects a function pointer to its lexical analysis function.
88 Sets up stack pointers and returns boolean true if 'opendir' encounters an
89 error, or if dredge_current_depth returns boolean true.
90 */
91 int scanner_init
92 #define CWDSTR "./"
93 #define ROOTDIR (cargs['d'] ? cargs['d'] : CWDSTR)
94 ()
95 { DL_INIT();
96 DL_STACK[0].dirp = opendir(ROOTDIR);
97 if (current_open_file != NULL)
98 { fclose(current_open_file);
99 current_open_file = NULL;
100 }
101 printf("Root dir %s\n",ROOTDIR);
102 return !chdir(ROOTDIR) && (DL_STACK[0].dirp == NULL || dredge_current_depth() == -1);
103 }
104
105 /* Quit */
106 void scanner_quit
107 ()
108 { if (DL_CURDIR())
109 closedir(DL_CURDIR());
110 }
111
112 /* Scanner
113 The main driver of the scanner will advance the current treewalk state and
114 tokenize tree-based push/pop operations. It will call 'lexer_lex' to
115 tokenize directory names prior to making a push operation. safe checking for
116 all returns from the filesystem handler will exit on serious system errors.
117
118 after pushing a new directory to the directory list, the scanner will dredge
119 the directory and alphabetically sort all file entries into the lexer's file
120 array, while placing all subdirectory entries in the current depth's child
121 directory stack to be scanned later.
122
123 Returns the number of tokens generated on success, -1 on error.
124 */
125 int scanner
126 #define $($)#$ //stringifier
127 #define ERR_CHILD "Fatal: Maximum of " $(DL_CD_STACKSIZE) " child " \
128 "directories exceeded for directory at depth %i\n",DL_LEN()
129 #define ERR_DEPTH "Fatal: Maximum directory depth of " $(DL_STACKSIZE) \
130 " exceeded during directory scan\n"
131 #define ERR_DL "Fatal: Directory List Stack Corruption %i\n", DL_LEN()
132 ()
133 { int ntok = 0;
134 scan:
135 if (DL_CD_LEN() >= DL_CD_STACKSIZE)//fail if maxchildren exceeded
136 { fprintf(stderr, ERR_CHILD);
137 goto fail;
138 }
139 if (DL_CD_LEN() > 0) //There are entities to process
140 { if (DL_CD_POP() == NULL) //If the dirent is null, then the
141 goto libfail; //lib function in dirent has failed
142 ntok += lexer_lexstring(DL_CD_CURNAME());//lex the directory name
143 if (DL_LEN() >= DL_STACKSIZE) //fail if maxdepth exceeded
144 { fprintf(stderr, ERR_DEPTH);
145 goto fail;
146 }
147 if (chdir(DL_CD_CURNAME())) //move into the new directory
148 goto libfail;
149 if (DL_CURDIR() == NULL) //open the cwd
150 goto libfail;
151 lexer_pushtok(CLOPEN, 0); //Push "Open Directory" token
152 ntok++;
153 return dredge_current_depth(); //Filter and sort the current depth
154 }
155 else if (DL_LEN() >= 0) //Any dirs left? (Including root)
156 { if (closedir(DL_POP())) //close the directory we just left
157 goto libfail;
158 if (DL_LEN() == -1) //If we just popped root,
159 goto done; //we're done
160 lexer_pushtok(CLCLOSE, 0); //Else push "Close Directory" token,
161 ntok++;
162 if (!chdir("..")) //move up a directory and
163 goto scan; //start over
164 }
165 fprintf(stderr, ERR_DL);
166 libfail:
167 perror("scanner: ");
168 fail:
169 return -1;
170 done:
171 return ntok;
172 }
173
174 /* Scan Pixels
175 Scans up to 'len' pixels from the current file into 'buf'.
176 Returns the number of pixels scanned from the file, or -1 on error
177 */
178 int scanner_scanpixels
179 ( int* buf,
180 int max_len
181 )
182 { static int /*col_len,*/ row_len = 0, row;
183 //Open the current file if not yet open
184 if (current_open_file == NULL)
185 { if ((current_open_file = fopen(DL_CD_CURNAME(),"rb")) == NULL)
186 { perror("fopen: ");
187 return -1;
188 }
189 //Verify file header, get row_len/col_len
190 //if (read_img_header(&row_len, &col_len))
191 //return -1;
192 row = 0;
193 }
194 //Read pixels into the buffer if there are rows left in the image
195 if (row++ < row_len)
196 //TODO: return read_img_pixels(buf, col_len);
197 printf("SCANPIXELS NOT IMPLEMENTED\n.");
198 //Close the file and return 0
199 fclose(current_open_file);
200 current_open_file = NULL;
201 return 0;
202 }
203
204 /* Directory Entity Sort and Filter (Dredge)
205 This filter removes all unhandled file types, and places any 'DT_DIR' type
206 files in the current Directory List's directory stack. Upon finishing,
207 the 'CE_STACK' is sorted alphabetically, and the current 'DL_CD_STACK' is
208 populated. Prints warnings for unhandled files.
209
210 Returns -1 if 'readdir' encounters an error, otherwise returns the number of
211 directory entries sent to the external 'lexer_direntpa' array.
212 */
213 typedef //so we can typecast dirent's 'alphasort()' to take const void*s
214 int (*qcomp)(const void*, const void*);
215 static inline
216 int dredge_current_depth
217 #define READDIR_ERROR (-1)
218 #define READDIR_DONE (0)
219 #define DPS_LEN() (lexer_direntpp - lexer_direntpa)
220 #define DPS_PUSH(E) (*lexer_direntpp++ = E)
221 ()
222 { DIR* cwd = DL_CURDIR();
223 struct dirent* direntp;
224 DL_CD_INIT();
225 scan_next:
226 errno = 0;
227 direntp = readdir(cwd);
228 if (errno)
229 return -1;
230 if (direntp != NULL)
231 { switch (direntp->d_type)
232 { case DT_REG:
233 DPS_PUSH(direntp);
234 goto scan_next;
235 case DT_DIR:
236 if (*(direntp->d_name) == '.') //skip hidden files and relative dirs
237 goto scan_next;
238 DL_CD_PUSH(direntp);
239 goto scan_next;
240 case DT_UNKNOWN:
241 warnx("unknown file %s: ignoring", direntp->d_name);
242 default:
243 goto scan_next;
244 }
245 }
246 qsort(lexer_direntpa, DPS_LEN(), sizeof direntp, (qcomp)alphasort);
247 return DPS_LEN();
248 }