prototype IR and parser

[henge/webcc.git] / src / apc / ir.h
diff --git a/src/apc/ir.h b/src/apc/ir.h

index ef54476..d8a7ec3 100644 (file)
--- a/src/apc/ir.h
+++ b/src/apc/ir.h
@@ -1,97 +1,392 @@
-/* Structures allocated for and updated during parse time that
-   are the IR before writing to the output file */
+/*!@file
+  \brief   Intermediate Representation (IR) between Directory Structure and Engine Grammar
+  \details The IR serves as a storage structure that is populated during the
+           parsing of the input directory structure. After parsing is complete,
+           the IR will be condensed (removed of excess allocated space) and then
+           output as the Engine Grammar. In this file we describe the semantic actions
+           that are called at each step, and the memory buffers that they populate.
+           See parser.y for the description on how the input grammar is constructed,
+           and where/when semantic actions are called.
+           TODO: or just write it here.
+  \author  Jordan Lavatai
+  \date    Aug 2016
+  ----------------------------------------------------------------------------*/
+
+
+#include <stdint.h>
+//#include <apc/mem.h>TODO:
  
  #define BUF_SIZE 256
-#define MAX_SUBCLASSES 16
  #define MAX_SETS 256
  #define MAX_ELES 256
-#define MAX_REFS 256
+#define MAX_QUADS 256
  #define MAX_MODELS 256
+#define MAX_POSTS 256
+#define MAX_CLASS_DEPTH 256
+#define MAX_CLASSES 256
+#define MAX_FRAMES 256
+/*  All bufs are of pointers to their respective structs. When a buf is full */
+/*  (number of data structs pointers >= max number of data struct pointers),        */
+/*  we need to allocate a more pointers for that buf. Allocate these       */
+/*  pointers a page at a time (1024 = Page bytes (4096)/bytes per pointer(4))   */
+/*  TODO: Account for different page sizes in different system                  */
+#define PTRS_IN_PAGE 1024
+
+/*  General: All information from the directory structure is stored in        */
+/*  five buffers that comprise the IR: cdat_buf, odat_buf, vdat_buf, ref_buf  */
+/*  and link_buf. Each buf corresponds to the data structure that it stores.  */
+/*  The storage techique for all bufs (except cdat) is the same. Each bufs member first */
+/*  populates its struct and then allocates the space for the next member     */
+/*  and increments the buf index. This means that we have to allocate the     */
+/*  very first member of each buf at ir_init(), so that we don't segfault     */
+/*  as the first member attempts to access memory that its previous member    */
+/*  didn't allocate (because it doesnt exist). We access the buf members      */
+/*  through standard array indexing but conceal the tediousness of array      */
+/*  indexing with macros. E.g. without macros, acessing an elements name      */
+/*  member would look like (split up to not go over line char limit):         */
+/*  (*cdat_stackp)->set_list[(*cdat_stackp)->num_sets] */
+/*     .ele_list[(*cdat_stackp)->set_list[(*cdat_stackp->num_sets)].num_ele].name */
+
+/* For cdats in cdat_buf, we allocate the memory for a cdat once a cdat
+   is recognized in the grammar. Cdat_buf is different from the other bufs
+   because cdats have a root cdat that all cdats are a subclass of. This root
+   cdat can have a set_list like other cdats.                                */
  
  
  
-struct ref {
-  int x, y, z, objref;
-};
+
+/*  Elements: Ele stands for element and has two representations in the IR.   */
+/*  In the cdat_buf eles store their name, cdat_idx (their classes index in   */
+/*  the cdat_buf) and the ref_id (refer to ref ). In the odat_buf, eles store */
+/*  their object data (odat). At output time, the ref_id is dereferenced to   */
+/*  determine the elements odat which is the data that the engine expects     */
+/*  from an element.                                                          */
  
  struct ele {
-  int odat_id;
-  int parent_id;//offset into class set_stack
+  char name[32];
+  uint64_t ref_id;
+  int cdat_idx;
  };
  
+/*  Sets: The set is similar to the ele, but it contains a list of its    */
+/*  elements. The set is populated at parse time AFTER the elements are   */
+/*  populated, due to the nature of bottom up parsing.                    */
+
  struct set {
-  int odat_id;
-  int parent_id;//offset into CB
+  char name[32];
+  uint64_t ref_id;
+  int cdat_idx;
    int num_ele;
-  int ele_index; //same as num_ele?
    struct ele ele_list[MAX_ELES];
  };
  
+/*  Cdats: A cdat is a class data structure. Cdats serve as the central       */
+/*  data types of the IR. At output, the cdat_buf is iterated through and     */
+/*  each is written to the output file. For each cdat, sets and element       */
+/*  ref_ids must be dereferenced to determine the odat information. Cdats     */
+/*  contain pointers to their subclasses so that the relationship between     */
+/*  classes can be determined, but the subclasses are not represented inside  */
+/*  of the cdat itself but rather in the subsequent cdats in cdat_buf. We     */
+/*  can determine the number of subclasses (the last index into cdat_buf      */
+/*  that represents a subclass of some arbitrary cdat) each cdat has by       */
+/*  incrementing num_classes during parse time.                               */
+/*  TODO: Should classes point to their parent class?                         */
  
-//8 ids for each direction
-//fdat_id ordered by alphabetical direction
-struct model {
-  char label[32];
-  int fdat_id[8];
+struct cdat {
+  char name[32];
+  int idx;
+  int num_classes;
+  int num_sets;
+  struct cdat* class_list[MAX_CLASSES];
+  struct set set_list[MAX_SETS];
  };
  
-struct vdat {
-  char label[32];
-  int num_models;
-  int msi; //model_stack_index
-  struct model model_list[MAX_MODELS];
+/* There are an unknown amount of cdats at compile time, so we maintain    */
+/*   a cdat_buf of cdat pointers that can be expanded as needed.           */
+struct cdat* cdat_buf[PTRS_IN_PAGE];
+int num_cdats = 0;
+int curr_max_cdats = PTRS_IN_PAGE;
+
+/* The cdat_stack is a stack pointers to cdat pointers, the top of which is
+   the cdat that is currently being parsed. Whenever a new cdat is recognized
+   by the grammar (CLOPEN), a cdat is pushed onto the cdat_stack, and we refer
+   to this cdat through the macro CURR_CDAT. By keeping a cdat_stack, we have
+   access to the current cdat so that the elements and sets can populate themselves
+   in the cdat accordingly. */
+
+struct cdat* cdat_stack[PTRS_IN_PAGE];
+struct cdat** cdat_stackp;
+
+/* Refs: Each set/ele has a reference to its object data (odat) through a ref_id.
+   Ref_ids are unsigned 64 byte integers that map to the hex values RGBA. During
+   the construction of the directory structure, users can choose a RGBA value for
+   each object that any other object can refer to via links (see link). If a user
+   does not choose an RGBA value, then the object is given one from the system space.
+   We maintain a doubly linked list of refs in the ref_buf at parse time so that
+   links can be resolved after the parsing of the directory structure is complete.
+   For every 16th ref, we create a post so that we can reduce on the search time for
+   a random access. */
+
+struct ref {
+  int type;
+  struct ref* nextref;
+  struct ref* lastref;
+  struct odat* odatp;
+  uint64_t ref_id; //0xFFFFFF->digit
  };
  
-struct cdat {
-  char label[32];
-  int num_subclasses;
-  int num_sets;
-  int subclass_index;
-  int set_index;
-  struct cdat* subclass_list[MAX_SUBCLASSES];
-  struct set set_list[MAX_SETS];
+/* Like the cdat_buf, ref_buf stores pointers to refs and can
+   increase in size */
+struct ref* ref_buf[PTRS_IN_PAGE];
+int num_refs = 0;
+int curr_max_refs = PTRS_IN_PAGE;
+uint64_t ss_ref_id = 0x00FFFFFF; /* system space for ref_ids */
+
+
+/* posts for ref_buf */
+struct ref posts[MAX_POSTS];
+int num_posts;
+
+/* Links: At parse time, a set/ele can include a link in their
+   grammar representation instead of the actual data and this signifies
+   to the APC that that set/ele wishes to use the data of another
+   set/ele, either its video data (vdat) or object data (odat). The link
+   itself contains the type of link it is, the ref_id OR name, and
+   which set/ele created the link. During parse time, links can be made
+   to o/vdats that have yet to be parsed. In order to accomodate for this,
+   we resolve all links AFTER parse time by iterating through the link_buf,
+   finding the ref_id that was stored for some object (if the ref_id exists),
+   and creating a relative pointer from the original object to the data that
+   was linked */
+
+/* Svlinks stand for short vlink, which is a link to a vdat
+   TODO: diff btwn vlink*/
+
+struct svlink {
+  uint64_t ref_id;
+};
+
+/* A vlink is what it sounds like, a link to a vdat
+ TODO: model link? */
+struct vlink {
+  uint64_t ref_id;
+  char anim_name[32];
+};
+
+/* Olinks are links to odats */
+struct olink {
+  uint64_t ref_id;
+};
+
+union link_t {
+  struct olink olink;
+  struct vlink vlink;
+  struct svlink svlink;
+};
+
+struct link {
+  int type; //1 = olink, 2 = vlink, 3 = svlink
+  union link_t link_t;
+  int cdat_idx;
+  int set_idx;
+  int ele_idx;
+};
+/* link_buf contains all the links that
+   we encountered during parse time that need
+   to be resolved to an offset at output time.
+   This does not include quad refs, because
+   those are already known to need to be resolved */
+struct link* link_buf[PTRS_IN_PAGE];
+int num_links = 0;
+int curr_max_links = PTRS_IN_PAGE;
+
+
+/* Odats: Odats consist of the object data necessary for
+   each object. Odats are sometimes referred to as archetypes
+   at compile-time, in order to distinguish the difference from
+   a runtime object and a compile-time object.
+   TODO: Need more info about objects at runtime, to described
+         the reasoning behind odat structure at compile-time*/
+
+/* Each set has a quad_list or a list of quads. The quad_list
+   is the ? */
+struct quad {
+  int x, y, z;
+  uint64_t ref_id; //rgba
+};
+
+struct root {
+  int x, y, z;
  };
  
-//Element or a set
  struct odat {
-  char label[32];
+  char name[32];
    int vdat_id;
-  int class_id;
-  int num_ref;
-  int ref_index;
-  struct ref ref_list[MAX_REFS];
+  int cdat_idx;
+  int hitbox;
+  struct root root;
+  struct ref* refp; /* pointer to it's ref on ref_list */
+  int num_quads;
+  struct quad quad_list[MAX_QUADS];
+};
+
+/* Populated and allocated same way as other bufs */
+struct odat* odat_buf[PTRS_IN_PAGE];
+int curr_max_odats = PTRS_IN_PAGE;
+int num_odats = 0;
+
+/* A framesheet is a grouping of animation frames in
+   a single direction (N,W,S,E) */
+struct framesheet {
+  int width;
+  int height;
+  int num_frames;
+  void* frames[MAX_FRAMES];
  };
  
+/* A model is a collection of framesheets for every
+   direction (N,W,S,E,NW,NE,SW,SE)*/
+/* NAMED spritesheet */
+struct model {
+  char name[32];
+  struct framesheet spritesheet[8]; //one for each
+};
+
+/* Vdat: Vdats are the video data of each object. They can not be
+   created as a stand alone object (because they consist solely
+   of animation information and not the skeleton on which the
+   animation manipulates). Vdats have a list of models for every
+   animation that the vdats odat can do for that vdat*/
+struct vdat {
+  struct odat* creator; //pointer to odat that made this vdat
+  int num_models;
+  struct model model_list[MAX_MODELS];
+};
  
-struct cdat cdat_buf[BUF_SIZE];
-struct odat odat_buf[BUF_SIZE];
-struct vdat vdat_buf[BUF_SIZE];
  
-//indexes for buffers
-int cbi = 0;
-int vbi = 0;
-int obi = 0;
+struct vdat* vdat_buf[PTRS_IN_PAGE];
+int curr_max_vdats = PTRS_IN_PAGE;
+int num_vdats = 0;
  
+/* The initalization function of the IR. Mallocs the
+   first c/v/odat and the first links and refs and
+   inits the cdat_stack */
  void
-insert_set(void);
+ir_init(void);
+
+/* mallocs memory for a new cdat. If the cdat_buf
+   is full, mallocs another 1024 cdat pointers. */
+void
+malloc_cdat(void);
  
+/* Called after the cdat open operator has been recognized in grammar. Allocates
+   the space for a cdat on the cdat_buf, pushes that pointer onto
+   the cdat_stack */
  void
-insert_ref(int, int, int, int);
+push_cdat(char*);
  
+/* Called after a cdat end operator has been recognized in grammar. Sets
+   top stack cdat ** to null and decrements stack pointer */
  void
-inc_cbi(void);
+pop_cdat(void);
  
+/* Called after an odat has been populated. Allocates memory for
+   the next odat. */
  void
-set_class_label(char*);
+inc_odat(void);
  
+/* Called after an vdat has been populated. Allocates memory for
+   the next vdat. */
  void
-inc_subclass_index(void);
+inc_vdat(void);
  
  void
-inc_subclass_index(void);
+inc_link(void);
  
  void
  inc_ref(void);
  
+/* Called in the reduction of a set. While both odats (eles and sets)
+   have identical label terminals, we are unable to give a single grammatical rule
+   for both due to how we allocate odats in the odat buf. Due to the
+   nature of bottom up parsing, all the elements will be inserted into the
+   odat_buf first, and then the set that contains these element is inserted. Since
+   the sets label comes before the element list in the grammar, we would be giving an element
+   a set label in its respective odat, which would then be replaced by the
+   elements label. Instead, we store the label in the sets representation inside
+   CURR_CDAT and after we are done parsing the element_list and know that the CURR_ODAT
+   is the set, we populate the sets label members in CURR_ODAT with the values we stored
+   previously in CURR_CDAT. */
+void
+insert_set_label(char*, uint64_t);
+
+/* Populate the sets representation in CURR_CDAT with a ref_id and insert a link
+   into the link_buf that will resolve the ref_id to an actual odat after parse time. */
  void
-inc_models(void);
+insert_set_olink(uint64_t);
+
+/* Put the vlink in the link_buf to be processed after parsetime */
+void
+insert_set_vlink(uint64_t, char*);
+
+/* Put svlink in the link_buf to be processed after parsetime */
+void
+insert_set_svlink(uint64_t);
+
+/* Called for every set reduction except for sets with olinks. Populates the
+   set data structures in the CDAT and in the ODAT. Uses the name and ref_id
+   from insert_set_label. Also inserts a ref into the ref_buf with the CURR_ODAT
+   pointer so that we can also resolve the odat from its ref_id. */
+void
+insert_set(void);
+
+/* Insertion of eles is practically equivalent to how sets are inserted because both
+   share the same data type (ODAT). Like sets, eles have links, labels
+   and odats. Eles have the added notion of a parent set, and so must be inserted
+   into said parent set, but this is the only place they truly differ from sets. */
+
+void
+insert_ele_label(char*, uint64_t);
+
+void
+insert_ele_olink(uint64_t);
+
+void
+insert_ele_vlink(uint64_t, char*);
+
+void
+insert_ele_svlink(uint64_t);
+
+void
+insert_ele(void);
+
+/* Created as a seperate function, instead of setting the ODATS vdat_id and
+   calling inc_vdat() inside of insert_set(), to account for the set reduction
+   where a vdat is not created (o/v/svlinks). Because insert_set/ele is always
+   called before insert_vdat, and thus increments the CURR_ODAT to be the next
+   ODAT to be populated, insert_vdat() targets the last ODAT that was populated,
+   via PREV_ODAT. */
+void
+insert_vdat(void);
+
+/* Inserts the hitbox into the CURR_ODAT */
+void
+insert_hitbox(int);
+
+/* Inserts the root into the CURR_ODAT */
+void
+insert_root(int, int, int);
+
+/* Inserts a quad into the CURR_ODAT */
+void
+insert_quad(int, int, int, uint64_t);
+
+void
+insert_model(void);
+
+void
+insert_framesheet(char, char*, uint64_t, int, int, int);
+
+void
+insert_frame_pointer(char, void*);
+