d9aa426e5f8b523dd4b8cd48de056f4cf7410e6d
[henge/apc.git] / src / scanner.c
1 /*!@file
2 \brief APC Directory Scanner
3 \details This hand-written parser/scanner traverses a directory tree and
4 tokenizes elements of the structure which correspond to APC grammar.
5 The parser is implemented as a 2D stack which populates a list of
6 child directories at each depth, handling only the leaf nodes
7 (regular files) of the directory open at the current depth to
8 conserve memory and speed up traversal.
9 The scanner works with the lexer to lexically analyze text, and
10 assumes the existence of an external 'lex' function
11 \author Jordan Lavatai
12 \date Aug 2016
13 ----------------------------------------------------------------------------*/
14 /* Standard */
15 #include <stdio.h> //print
16 #include <errno.h> //errno
17 /* Posix */
18 #include <err.h> //warnx
19 #include <stdlib.h> //exit
20 #include <unistd.h> //chdir
21 #include <dirent.h> //opendir
22 #include <unistr.h> //unicode strings
23 #include <string.h> //strlen
24 /* Internal */
25 #include "parser.tab.h"
26 /* Public */
27 int scanner_init(void);
28 void scanner_quit(void);
29 int scanner_scanpath(char const*);
30 int scanner_scandir(DIR*);
31 yypstate* apc_pstate;
32 yycstate* apc_cstate;
33 /* Private */
34 extern //lexer.rl
35 int lexer_lexfile(uint8_t*);
36 extern //lexer.rl
37 int lexer_lexdir(uint8_t*);
38 extern //lexer.rl
39 void lexer_closedir(void);
40
41 /* Init
42 Establishes yy states
43 */
44 int scanner_init
45 ( void )
46 { if (apc_pstate != NULL || apc_cstate != NULL)
47 scanner_quit();
48 apc_pstate = yypstate_new();
49 apc_cstate = yycstate_new();
50 return (apc_pstate == NULL || apc_cstate == NULL);
51 }
52
53 /* Quit
54 Free initialized memory
55 */
56 void scanner_quit
57 ( void )
58 { yypstate_delete(apc_pstate);
59 yycstate_delete(apc_cstate);
60 apc_pstate = NULL;
61 apc_cstate = NULL;
62 }
63
64 /* Scan the provided path
65 Changes working directory to the provided pathname and, if successful, sends
66 a directory stream of the provided path to scanner_scandir
67 */
68 int scanner_scanpath
69 ( char const* pathname )
70 { DIR* dirp;
71 errno = 0;
72 if ((dirp = opendir(pathname)) == NULL || errno)
73 { fprintf(stderr, "Path %s could not be accessed\n", pathname);
74 return -1;
75 }
76 if (chdir(pathname))
77 return -1;
78 return scanner_scandir(dirp);
79 }
80
81 /* Scan directory stream
82 Recursively scans the provided directory, sending CLOPEN and CLCLOSE tokens
83 to the parser when entering new directories (classes)
84 */
85 int scanner_scandir
86 ( DIR* dirp )
87 { DIR* cdirp;
88 struct dirent* direntp;
89 scan_next_dirent:
90 errno = 0;
91 direntp = readdir(dirp);
92 if (errno)
93 goto libfail;
94 if (direntp != NULL)
95 { if (*(direntp->d_name) == '.') //skip hidden or relative files
96 goto scan_next_dirent;
97 switch (direntp->d_type)
98 { case DT_REG:
99 lexer_lexfile((uint8_t*)direntp->d_name);
100 goto scan_next_dirent;
101 case DT_DIR:
102 lexer_lexdir((uint8_t*)direntp->d_name); //lex the dirname
103 if (chdir(direntp->d_name)) //change to the specified dir
104 goto libfail;
105 errno = 0;
106 if ((cdirp = opendir(".")) == NULL || errno) //open it
107 goto libfail;
108 if(scanner_scandir(cdirp)) //scan the directory
109 goto libfail;
110 if (chdir("..")) //return to the parent dir
111 goto libfail;
112 lexer_closedir(); //push "Close Directory" token
113 goto scan_next_dirent; //continue scan
114 case DT_UNKNOWN:
115 warnx("unknown file %s: ignoring", direntp->d_name);
116 default:
117 goto scan_next_dirent;
118 }
119 }
120 return closedir(dirp);
121 libfail:
122 perror("scanner_scandir");
123 return -1;
124 }