struct-vis/main.c


								//

								// @TODO

								// long-term goals:

								//      - support C++!

								//      - support Rust with repr(C) structs to the extent that it is possible!

								//      - support Golang to the extent that it is possible!

								//      - support Odin?!

								//

								// short-term:

								//      - bitfields!

								//      - robustness! change [64] byte name fields to pointers!

								//      - find all files in folder of a given type!

								//


								#define STB_C_LEXER_DEFINITIONS


								#define STB_C_LEX_0_IS_EOF             Y  // if Y, ends parsing at '\0'; if N, returns '\0' as token

								#define STB_C_LEX_USE_STDLIB           Y  // use strtod,strtol for parsing #s; otherwise inaccurate hack

								#define STB_C_LEX_DOLLAR_IDENTIFIER    N  // allow $ as an identifier character


								#define STB_C_LEX_DEFINE_ALL_TOKEN_NAMES  Y   // if Y, all CLEX_ token names are defined, even if never returned

								                                              // leaving it as N should help you catch config bugs


								#define STB_C_LEX_DISCARD_PREPROCESSOR    Y   // discard C-preprocessor directives (e.g. after prepocess

								                                              // still have #line, #pragma, etc)


								#define STB_C_LEX_MULTILINE_DSTRINGS   N  // allow newlines in double-quoted strings

								#define STB_C_LEX_MULTILINE_SSTRINGS   N  // allow newlines in single-quoted strings

								#define STB_C_LEX_FLOAT_NO_DECIMAL     N  // allow floats that have no decimal point if they have an exponent


								#define STB_C_LEX_C_IDENTIFIERS     Y   //  "[_a-zA-Z][_a-zA-Z0-9]*"               CLEX_id

								#define STB_C_LEX_C_COMMENTS        Y   //  "/* comment */"

								#define STB_C_LEX_CPP_COMMENTS      Y   //  "// comment to end of line\n"


								#define STB_C_LEX_INTEGERS_AS_DOUBLES  N  // parses integers as doubles so they can be larger than 'int', but only if STB_C_LEX_STDLIB==N

								#define STB_C_LEX_C_DECIMAL_INTS    N   //  "0|[1-9][0-9]*"                        CLEX_intlit

								#define STB_C_LEX_C_HEX_INTS        N   //  "0x[0-9a-fA-F]+"                       CLEX_intlit

								#define STB_C_LEX_C_OCTAL_INTS      N   //  "[0-7]+"                               CLEX_intlit

								#define STB_C_LEX_C_DECIMAL_FLOATS  N   //  "[0-9]*(.[0-9]*([eE][-+]?[0-9]+)?)     CLEX_floatlit

								#define STB_C_LEX_C99_HEX_FLOATS    N   //  "0x{hex}+(.{hex}*)?[pP][-+]?{hex}+     CLEX_floatlit

								#define STB_C_LEX_C_DQ_STRINGS      N   //  double-quote-delimited strings with escapes  CLEX_dqstring

								#define STB_C_LEX_C_SQ_STRINGS      N   //  single-quote-delimited strings with escapes  CLEX_ssstring

								#define STB_C_LEX_C_CHARS           N   //  single-quote-delimited character with escape CLEX_charlits

								#define STB_C_LEX_C_COMPARISONS     N   //  "==" CLEX_eq  "!=" CLEX_noteq   "<=" CLEX_lesseq  ">=" CLEX_greatereq

								#define STB_C_LEX_C_LOGICAL         N   //  "&&"  CLEX_andand   "||"  CLEX_oror

								#define STB_C_LEX_C_SHIFTS          N   //  "<<"  CLEX_shl      ">>"  CLEX_shr

								#define STB_C_LEX_C_INCREMENTS      N   //  "++"  CLEX_plusplus "--"  CLEX_minusminus

								#define STB_C_LEX_C_ARROW           N   //  "->"  CLEX_arrow

								#define STB_C_LEX_EQUAL_ARROW       N   //  "=>"  CLEX_eqarrow

								#define STB_C_LEX_C_BITWISEEQ       N   //  "&="  CLEX_andeq    "|="  CLEX_oreq     "^="  CLEX_xoreq

								#define STB_C_LEX_C_ARITHEQ         N   //  "+="  CLEX_pluseq   "-="  CLEX_minuseq

								                                        //  "*="  CLEX_muleq    "/="  CLEX_diveq    "%=" CLEX_modeq

								                                        //  if both STB_C_LEX_SHIFTS & STB_C_LEX_ARITHEQ:

								                                        //                      "<<=" CLEX_shleq    ">>=" CLEX_shreq


								#define STB_C_LEX_PARSE_SUFFIXES    N   // letters after numbers are parsed as part of those numbers, and must be in suffix list below

								#define STB_C_LEX_DECIMAL_SUFFIXES  ""  // decimal integer suffixes e.g. "uUlL" -- these are returned as-is in string storage

								#define STB_C_LEX_HEX_SUFFIXES      ""  // e.g. "uUlL"

								#define STB_C_LEX_OCTAL_SUFFIXES    ""  // e.g. "uUlL"

								#define STB_C_LEX_FLOAT_SUFFIXES    ""  //


								#define STB_C_LEXER_IMPLEMENTATION

								#include "stb_c_lexer.h"


								#include <inttypes.h> // strtoimax

								#include <limits.h>

								#include <stdio.h> // fread, fseek, ftell

								#include <stdlib.h> // malloc, free

								#include <stdarg.h> // va_start, va_list, va_end

								#include <stdint.h>

								#include <string.h> // memcmp

								#include <stdbool.h>


								static inline void die(const char* format, ...) {

								    va_list args;

								    va_start(args, format);

								    vprintf(format, args);

								    va_end(args);

								    exit(1);

								}


								static inline char* readWholeFile(const char* filepath, size_t *outSize) {

								    FILE *fp = fopen(filepath, "rb");

								    if (fp == NULL) {

								        die("failed to open file: %s", filepath);

								    }

								    fseek(fp, 0, SEEK_END);

								    size_t size = ftell(fp);

								    fseek(fp, 0L, SEEK_SET);

								    char *buffer = (char*) malloc(size + 1);

								    fread(buffer, sizeof (char), size, fp);

								    buffer[size] = '\0';

								    fclose(fp);


								    if (outSize != NULL) *outSize = size;


								    return buffer;

								}


								static inline bool isWhitespace(char c) {

								    return c == ' ' || c == '\r' || c == '\n' || c == '\f' || c == '\t';

								}


								static inline char* eatWhitespace(char* input) {

								    char* orig = input;

								    char c;

								    while ((c = *input) != '\0') {

								        if (!isWhitespace(c)) return input;

								        input++;

								    }

								    return orig;

								}


								// de-duplicates whitespace

								static inline char* findNthLastCharOccurence(char* string, int length, char c, int n) {

								    char* out = NULL;

								    int _n = 0;

								    for (int i = length - 1; i > 0; i--) {

								        if (string[i] == c) _n++;

								        if (_n == n) return string + i;

								        while (isWhitespace(string[i]) && i > 0) {

								            i--;

								        }

								    }

								    return out;

								}


								static inline int strWrite(char *dest, const char *src, int maxCount) {

								    int i = 0;

								    for (; i < maxCount; i++) {

								        if (src[i] == '\0') {

								            break;

								        }


								        dest[i] = src[i];

								    }

								    dest[i] = '\0';

								    return i;

								}

								struct Declaration {

								    char type[64];

								    char name[64];

								    ssize_t size;

								    ssize_t align;

								    bool isBitfield;

								};


								struct StructInfo {

								    char name[64];

								    char alias[64];

								    const char *filename;

								    int lineNumber, lineOffset;


								    ssize_t size;

								    struct Declaration declarations[16];

								    int numDeclarations;

								};


								static inline void printStructInfo(struct StructInfo *structInfo) {

								    printf("%s - %d:%d", structInfo->filename, structInfo->lineNumber, structInfo->lineOffset);

								    printf(" - %s", structInfo->name[0] == '\0' ? "(anonymous struct)" : structInfo->name);

								    printf(", %s\n", structInfo->alias[0] == '\0' ? "(c++ style, no typedef alias)" : structInfo->alias);

								    printf(" - total size: %ld\n", structInfo->size);

								    for (int i = 0; i < structInfo->numDeclarations; i++) {

								        struct Declaration *decl = structInfo->declarations + i;

								        printf("\tdecl name: %s, type: %s, size: %ld, alignment: %ld\n", decl->name, decl->type, decl->size, decl->align);

								    }

								}


								static int capacityAllStructs = 64;

								static int numAllStructs = 0;

								static struct StructInfo *allStructs;


								void pushStructInfo(struct StructInfo *structInfo) {

								    if (numAllStructs >= capacityAllStructs) {

								        capacityAllStructs *= 1.5;

								        allStructs = realloc(allStructs, sizeof(struct StructInfo) * (capacityAllStructs));

								    }


								    memcpy((void*) &allStructs[numAllStructs++], (void*) structInfo, sizeof(struct StructInfo));

								}


								#include "table.h"

								#include "visualization.h"


								#define STORE_SIZE 1024*1000

								static const int store_size = STORE_SIZE;

								static char store[STORE_SIZE] = { 0 };

								#undef STORE_SIZE


								struct Array {

								    unsigned int length;

								    unsigned int capacity;

								    void* data;

								};

								void push(struct Array* array, void* item) {


								}


								void parseType() {


								}


								static inline bool shouldSkipConst(char* nullTerminated) {

								    // @HACK skip all instances of 'const'

								    size_t bounds = sizeof("const");

								    for (int i = 0; i < bounds; i++) {

								        char c = nullTerminated[i];

								        if (c != "const"[i]) return false;

								    }


								    return true;

								}


								static inline void finalizeDeclaration(

								    char lineBuffer[128],

								    int lookback,

								    int numAsterisks,

								    int numDeclarations,

								    int arrayVal,

								    struct StructInfo *structInfo

								) {

								    printf("LINE BUFFER: |%s|, arrayVal: %d\n", lineBuffer, arrayVal);

								    // we're at the end of a line of declarations.

								    // we can learn some interesting stuff by looking back now.

								    char typeBuffer[64] = { 0 };

								    char *cursor = findNthLastCharOccurence(lineBuffer, 128, ' ', lookback);

								    if (cursor == NULL) {

								        die("panic when finalizing a declaration");

								    }


								    struct Declaration *decl = structInfo->declarations + structInfo->numDeclarations;

								    int diff = (int)(cursor - lineBuffer);

								    int count = strWrite(typeBuffer, lineBuffer, diff);

								    int multiplier = 1;

								    if (arrayVal != -1) {

								        multiplier = arrayVal;

								    }


								    ssize_t totalSize = 0;

								    TableEntry *entry = lookup(typeTable, typeBuffer);

								    if (numAsterisks == 0) {

								        if (entry == NULL) {

								            // this is likely a new/unknown type in the program. enter it into the type table with an unknown size.

								            printf("warning: unknown field size and alignment in struct field: %s\n", typeBuffer);

								            insertPadZeroes(typeTable, typeBuffer, -1, -1);

								            decl->size = -1;

								            decl->align = -1;


								        } else {

								            decl->size = entry->size * multiplier;

								            decl->align = entry->align;

								        }

								    } else {

								        decl->size = sizeof(void*) * multiplier;

								        decl->align = sizeof(void*);

								    }


								    // we could have multiple declarations (comma separated)

								    // they will have to be the same type, except for bitfields (kill me)

								    // so we'll just copy the type from the first decl, and just move the cursor

								    // to find the other name.

								    for (int i = 0; i < numDeclarations; i++) {

								        decl = structInfo->declarations + structInfo->numDeclarations;

								        totalSize += decl->size;

								        structInfo->numDeclarations++;


								        // write in the type name field.

								        // for looking up size in the table, we don't want to include the '*'

								        // but for storing the type name of the decl, we probably do.

								        for (int i = 0; i < numAsterisks; i++) {

								            count += strWrite(typeBuffer + count, "*", 1);

								        }

								        strWrite(decl->type, typeBuffer, 64);


								        // figure out the name of this field.

								        char* nameStart;

								        char c;

								        while ((c = *cursor) != '\0') {

								            if (!isWhitespace(c)) {

								                nameStart = cursor;

								                break;

								            }

								            cursor++;

								        }

								        char* nameEnd;

								        while ((c = *cursor) != '\0') {

								            if (isWhitespace(c)) {

								                nameEnd = cursor;

								                break;

								            }

								            cursor++;

								        }

								        int count = strWrite(decl->name, nameStart, (int) (nameEnd-nameStart));

								        if (arrayVal != -1) {

								            snprintf(decl->name + count, 64 - count, "[%d]", arrayVal);

								        }

								    }


								    structInfo->size += totalSize;

								}


								void parseStructDeclaration(struct StructInfo *structInfo, stb_lexer *lexer) {

								    bool somethingWasConst = false;

								    bool numDeclarations = 1;

								    int numAsterisks = 0;

								    int soFar = 0;

								    int lookback = 2;


								    // for parsing things like 'char name[12]'

								    char* lastOpenBracket = NULL;

								    int arrayVal = -1;


								    char lineBuffer[128] = { 0 };

								    do {

								        switch (lexer->token) {

								            case 260: {

								                // we don't record const because it's annoying.

								                if (shouldSkipConst(lexer->string)) { somethingWasConst = true; break; }


								                soFar += strWrite(lineBuffer + soFar, lexer->string, 64);

								                soFar += strWrite(lineBuffer + soFar, " ", 1);

								            } break;


								            case ',':

								                numDeclarations++;

								                lookback++;

								                break;


								            case '*':

								                numAsterisks++;

								                break;


								            case '[':

								                lastOpenBracket = lexer->where_firstchar;

								                break;


								            case ']':

								                arrayVal = strtoimax(lastOpenBracket + 1, &lexer->where_firstchar, 10);

								                if (arrayVal == 0) arrayVal = -1;

								                break;


								            case ';': {

								                finalizeDeclaration(lineBuffer, lookback, numAsterisks, numDeclarations, arrayVal, structInfo);

								            } return;

								        }

								    } while (stb_c_lexer_get_token(lexer) != 0);

								}


								//

								// the token in the lexer is a 'struct' keyword. we want to get the identifiers, and the nested declarations.

								//

								// <struct-or-union-specifier> ::= <struct-or-union> <identifier> { {<struct-declaration>}+ }

								//                               | <struct-or-union> { {<struct-declaration>}+ }

								//                               | <struct-or-union> <identifier>

								void parseStruct(const char *filename, stb_lexer *lexer, bool isClass) {

								    int result = stb_c_lexer_get_token(lexer);

								    if (result == 0) die("failed to parse struct");


								    stb_lex_location location = { 0 };

								    stb_c_lexer_get_location(lexer, lexer->where_firstchar, &location);


								    struct StructInfo structInfo = { 0 };

								    structInfo.filename        = filename;

								    structInfo.lineNumber      = location.line_number;

								    structInfo.lineOffset      = location.line_offset;

								    structInfo.numDeclarations = 0;

								    structInfo.size            = 0;


								    switch (lexer->token) {

								        case 260: {

								            char tempNameBuffer[64] = { 0 };

								            strWrite(tempNameBuffer, lexer->string, 64);


								            // maybe a named struct.

								            result = stb_c_lexer_get_token(lexer);

								            if (result == 0) die("failed to parse struct");


								            if (lexer->token == '{') {

								                strWrite(structInfo.name, tempNameBuffer, 64);


								            } else {

								                return;

								            }

								        } break;


								        case '{': {} break;


								        default: return;

								    }


								    int balancer = 1;

								    while (stb_c_lexer_get_token(lexer) != 0) {

								        switch (lexer->token) {

								            case '}': if (--balancer == 0) goto checkTypeAlias;

								            case '{':     ++balancer;      break;


								            case 260: {

								                parseStructDeclaration(&structInfo, lexer);

								            } break;

								        }

								    }


								checkTypeAlias:

								    result = stb_c_lexer_get_token(lexer);

								    if (result == 0) die("unexpected end of stream when parsing a struct");


								    if (lexer->token == 260) {

								        // we have a type alias for the struct.

								        // @NOTE @TODO this could also conceivably by the __attribute__ thingy: https://stackoverflow.com/questions/14671253/is-there-a-gcc-keyword-to-allow-structure-reordering

								        strWrite(structInfo.alias, lexer->string, 64);

								    }


								    pushStructInfo(&structInfo);

								}


								void parseTypedef(stb_lexer *lexer) {


								}


								void parseFile(const char *filepath) {

								    printf("parsing file %s...\n", filepath);

								    size_t size;

								    char *buffer = readWholeFile(filepath, &size);


								    stb_lexer lexer;

								    stb_c_lexer_init(&lexer, buffer, buffer + size + 1, store, store_size);


								    while (stb_c_lexer_get_token(&lexer) != 0) {

								        switch (lexer.token) {

								            case 260: { // token is a string

								                const uint64_t LE_STRUCT  = 0x0000746375727473U;

								                const uint64_t LE_CLASS   = 0x0000007373616C63U;

								                const uint64_t LE_TYPEDEF = 0x0066656465707974U;


								                uint64_t t = *((uint64_t*)(lexer.string));

								                     if ((t                     ) == LE_TYPEDEF) { parseTypedef(&lexer); }

								                else if ((t & 0x00FFFFFFFFFFFFFF) == LE_STRUCT)  { parseStruct(filepath, &lexer, false); }

								                else if ((t & 0x0000FFFFFFFFFFFF) == LE_CLASS)   { parseStruct(filepath, &lexer, true); }

								            } break;

								        }

								    }

								    free(buffer);

								    memset(store, 0, store_size);

								}


								// http://www.catb.org/esr/structure-packing/

								int main(int argc, char* argv[]) {

								    // @TODO check for flag -fshort-enums

								    allStructs = malloc(sizeof(struct StructInfo) * capacityAllStructs);

								    typeTable = initTable();


								    if (CHAR_BIT != 8) {

								        printf("warning - CHAR_BIT != 8\n");

								    }


								    if (false) {

								        printf("CHAR_BIT       = %d\n", CHAR_BIT);

								        printf("MB_LEN_MAX     = %d\n\n", MB_LEN_MAX);


								        printf("CHAR_MIN       = %+d\n", CHAR_MIN);

								        printf("CHAR_MAX       = %+d\n", CHAR_MAX);

								        printf("SCHAR_MIN      = %+d\n", SCHAR_MIN);

								        printf("SCHAR_MAX      = %+d\n", SCHAR_MAX);

								        printf("UCHAR_MAX      = %u\n\n", UCHAR_MAX);


								        printf("SHRT_MIN       = %+d\n", SHRT_MIN);

								        printf("SHRT_MAX       = %+d\n", SHRT_MAX);

								        printf("USHRT_MAX      = %u\n\n", USHRT_MAX);


								        printf("INT_MIN        = %+d\n", INT_MIN);

								        printf("INT_MAX        = %+d\n", INT_MAX);

								        printf("UINT_MAX       = %u\n\n", UINT_MAX);


								        printf("LONG_MIN       = %+ld\n", LONG_MIN);

								        printf("LONG_MAX       = %+ld\n", LONG_MAX);

								        printf("ULONG_MAX      = %lu\n\n", ULONG_MAX);


								        printf("LLONG_MIN      = %+lld\n", LLONG_MIN);

								        printf("LLONG_MAX      = %+lld\n", LLONG_MAX);

								        printf("ULLONG_MAX     = %llu\n\n", ULLONG_MAX);


								        printf("PTRDIFF_MIN    = %td\n", PTRDIFF_MIN);

								        printf("PTRDIFF_MAX    = %+td\n", PTRDIFF_MAX);

								        printf("SIZE_MAX       = %zu\n", SIZE_MAX);

								        printf("SIG_ATOMIC_MIN = %+jd\n",(intmax_t)SIG_ATOMIC_MIN);

								        printf("SIG_ATOMIC_MAX = %+jd\n",(intmax_t)SIG_ATOMIC_MAX);

								        printf("WCHAR_MIN      = %+jd\n",(intmax_t)WCHAR_MIN);

								        printf("WCHAR_MAX      = %+jd\n",(intmax_t)WCHAR_MAX);

								        printf("WINT_MIN       = %jd\n", (intmax_t)WINT_MIN);

								        printf("WINT_MAX       = %jd\n", (intmax_t)WINT_MAX);

								    }


								    if (argc < 2) {

								        //die("provide a list of c/c++ files and/or headers to anaylze.");

								        parseFile(__FILE__);

								        parseFile("table.h");

								        parseFile("visualization.h");

								        parseFile("stb_c_lexer.h");

								    }


								    for (int i = 1; i < argc; i++) {

								        const char *filepath = argv[i];

								        parseFile(filepath);

								    }


								    outputHtml();


								    return 0;

								}