Merge branch 'master' of hack_attack:the_march
[henge/webcc.git] / src / apc / scanner.c
1 /*!@file
2 \brief APC Directory Scanner
3 \details This hand-written parser/scanner traverses a directory tree and
4 tokenizes elements of the structure which correspond to APC grammar.
5 The parser is implemented as a 2D stack which populates a list of
6 child directories at each depth, handling only the leaf nodes
7 (regular files) of the directory open at the current depth to
8 conserve memory and speed up traversal.
9 The scanner works with the lexer to lexically analyze text, and
10 assumes the existence of an external 'lex' function
11 \author Jordan Lavatai
12 \date Aug 2016
13 ----------------------------------------------------------------------------*/
14 /* Standard */
15 #include <stdio.h> //print
16 #include <errno.h> //errno
17 /* Posix */
18 #include <err.h> //warnx
19 #include <stdlib.h> //exit
20 #include <unistd.h> //chdir
21 #include <dirent.h> //opendir
22 /* Public */
23 int scanner_init(void);
24 int scanner(void);
25 /* Private */
26 #ifndef DL_STACKSIZE
27 #define DL_STACKSIZE 64
28 #endif
29 #ifndef DL_CD_STACKSIZE
30 #define DL_CD_STACKSIZE DL_STACKSIZE //square tree
31 #endif
32 extern //lexer.c
33 int lexer_lex(const char*);
34 extern //lexer.c
35 void lexer_pushtok(int, int);
36 static
37 int dredge_current_depth(void);
38 extern //lexer.c
39 struct dirent* lexer_direntpa[];
40 extern //main.c
41 const char* cargs['Z'];
42
43 struct dirlist
44 { DIR* dirp;
45 struct dirent* child_directory_stack[DL_CD_STACKSIZE],** cds;
46 } directory_list_stack[DL_STACKSIZE + 1],* dls; //+1 for the root dir
47
48 /* Directory Listing Stack
49 FILO Stack for keeping an open DIR* at each directory depth for treewalk.
50 This stack is depth-safe, checking its depth during push operations, but not
51 during pop operations, to ensure the thread doesn't open too many files at
52 once (512 in c runtime), or traverse too far through symbolic links.
53 A directory listing includes a DIR* and all DIR-typed entity in the directory
54 as recognized by dirent, populated externally (and optionally).
55 This stack behaves abnormally by incrementing its PUSH operation prior to
56 evaluation, and the POP operations after evaluation. This behavior allows
57 the 'DL_CURDEPTH' operation to map to the current element in the 'dl_stack'
58 array, and it is always treated as the "current depth". This also allows us
59 to init the root directory to 'directory_list_stack'[0] and pop it in a safe
60 and explicit manner.
61 */
62 #define DL_STACK (directory_list_stack)
63 #define DL_STACKP (dls)
64 #define DL_CD_STACK ((*DL_STACKP).child_directory_stack)
65 #define DL_CD_STACKP ((*DL_STACKP).cds)
66 #define DL_CURDIR() ((*DL_STACKP).dirp)
67 #define DL_LEN() (DL_STACKP - DL_STACK)
68 #define DL_CD_LEN() (DL_CD_STACKP - DL_CD_STACK)
69 #define DL_INIT() (DL_STACKP = DL_STACK)
70 #define DL_CD_INIT() (DL_CD_STACKP = DL_CD_STACK)
71 #define DL_POP() ((*DL_STACKP--).dirp)
72 #define DL_CD_POP() (*--DL_CD_STACKP)
73 #define DL_PUSH(D) ((*++DL_STACKP).dirp = D)
74 #define DL_CD_PUSH(E) (*DL_CD_STACKP++ = E)
75
76
77 /* Initializer
78 Initializer expects a function pointer to its lexical analysis function.
79 Sets up stack pointers and returns boolean true if 'opendir' encounters an
80 error, or if dredge_current_depth returns boolean true.
81 */
82 int scanner_init
83 #define CWDSTR "./"
84 #define ROOTDIR (cargs['r'] ? cargs['r'] : CWDSTR)
85 ()
86 { DL_INIT();
87 DL_STACK[0].dirp = opendir(ROOTDIR);
88 return DL_STACK[0].dirp == NULL || dredge_current_depth() == 0;
89 }
90
91 /* Scanner
92 The main driver of the scanner will advance the current treewalk state and
93 tokenize tree-based push/pop operations. It will call 'lexer_lex' to
94 tokenize directory names prior to making a push operation. safe checking for
95 all returns from the filesystem handler will exit on serious system errors.
96
97 after pushing a new directory to the directory list, the scanner will dredge
98 the directory and alphabetically sort all file entries into the lexer's file
99 array, while placing all subdirectory entries in the current depth's child
100 directory stack to the scanned later.
101
102 Returns the number of elements added to the lexer's file array.
103 */
104 int scanner
105 #define S(S)#S //stringifier
106 #define ERR_CHILD "Fatal: Maximum of " S(DL_CD_STACKSIZE) \
107 " child directories exceeded for directory at depth %i\n" \
108 ,DL_LEN()
109 #define ERR_DEPTH "Fatal: Maximum directory depth of " S(DL_STACKSIZE) \
110 " exceeded during directory scan\n"
111 #define ERR_DL "Fatal: Directory List Stack Corruption %x\n", DL_LEN()
112 #define TOK_CLOPEN 0x55, 0 //TODO
113 #define TOK_CLCLOSE 0x56, 0 //TODO
114 ()
115 { struct dirent* direntp;
116 parse:
117 if (DL_CD_LEN() >= DL_CD_STACKSIZE)//fail if maxchildren exceeded
118 { fprintf(stderr, ERR_CHILD);
119 goto fail;
120 }
121 if (DL_CD_LEN() > 0) //There are entities to process at this depth
122 { if (direntp = DL_CD_POP()) //If the dirent is null, the library
123 goto libfail; //function in dirent has failed
124 lexer_lex(direntp->d_name); //lex the directory name
125 if (DL_LEN() >= DL_STACKSIZE) //fail if maxdepth exceeded
126 { fprintf(stderr, ERR_DEPTH);
127 goto fail;
128 }
129 if (chdir(direntp->d_name)) //move into the new directory
130 goto libfail;
131 if (DL_PUSH(opendir(CWDSTR))) //open the cwd
132 goto libfail;
133 lexer_pushtok(TOK_CLOPEN); //Push "Open Directory" token
134 return dredge_current_depth(); //Filter and sort the current depth
135 }
136 else if (DL_LEN() >= 0) //Any dirs left? (Including root)
137 { if (closedir(DL_POP())) //close the directory we just left
138 goto libfail;
139 lexer_pushtok(TOK_CLCLOSE); //Push "Close Directory" token
140 if (DL_LEN() == -1) //If we just popped root, we're done
141 return 0;
142 if (!chdir("..")) //Move up a directory and start over
143 goto parse;
144 }
145 fprintf(stderr, ERR_DL);
146 libfail:
147 perror("parsedir");
148 fail:
149 exit(EXIT_FAILURE);
150 }
151
152 /* Directory Entity Sort and Filter (Dredge)
153 This filter removes all unhandled file types, and places any 'DT_DIR' type
154 files in the current Directory List's directory stack. Upon finishing,
155 the 'CE_STACK' is sorted alphabetically, and the current 'DL_CD_STACK' is
156 populated. Prints warnings for unhandled files.
157
158 Returns -1 if 'readdir' encounters an error, otherwise returns the number of
159 directory entries sent to the external 'lexer_direntpa' array.
160 */
161 typedef
162 int (*qcomp)(const void*, const void*);
163 static inline
164 int dredge_current_depth
165 #define READDIR_ERROR (-1)
166 #define READDIR_DONE (0)
167 #define DPS_LEN() (direntpp - lexer_direntpa)
168 #define DPS_PUSH(E) (*direntpp++ = E)
169 ()
170 { struct dirent** direntpp = lexer_direntpa;
171 DIR* cwd = DL_CURDIR();
172 struct dirent* direntp;
173 DL_CD_INIT();
174 scan_next:
175 if ((direntp = readdir(cwd)) != NULL)
176 { switch (direntp->d_type)
177 { case DT_REG:
178 DPS_PUSH(direntp);
179 goto scan_next;
180 case DT_DIR:
181 DL_CD_PUSH(direntp);
182 goto scan_next;
183 case DT_UNKNOWN:
184 warnx("unknown file %s: ignoring", direntp->d_name);
185 default:
186 goto scan_next;
187 }
188 }
189 if (errno)
190 return -1;
191 qsort(lexer_direntpa, DPS_LEN(), sizeof direntp, (qcomp)alphasort);
192 return DPS_LEN();
193 }
194