// // @TODO // long-term goals: // - support C++! // - support Rust with repr(C) structs to the extent that it is possible! // - support Golang to the extent that it is possible! // - support Odin?! // // short-term: // - bitfields! // - robustness! change [64] byte name fields to pointers! // - find all files in folder of a given type! // #define STB_C_LEXER_DEFINITIONS #define STB_C_LEX_0_IS_EOF Y // if Y, ends parsing at '\0'; if N, returns '\0' as token #define STB_C_LEX_USE_STDLIB Y // use strtod,strtol for parsing #s; otherwise inaccurate hack #define STB_C_LEX_DOLLAR_IDENTIFIER N // allow $ as an identifier character #define STB_C_LEX_DEFINE_ALL_TOKEN_NAMES Y // if Y, all CLEX_ token names are defined, even if never returned // leaving it as N should help you catch config bugs #define STB_C_LEX_DISCARD_PREPROCESSOR Y // discard C-preprocessor directives (e.g. after prepocess // still have #line, #pragma, etc) #define STB_C_LEX_MULTILINE_DSTRINGS N // allow newlines in double-quoted strings #define STB_C_LEX_MULTILINE_SSTRINGS N // allow newlines in single-quoted strings #define STB_C_LEX_FLOAT_NO_DECIMAL N // allow floats that have no decimal point if they have an exponent #define STB_C_LEX_C_IDENTIFIERS Y // "[_a-zA-Z][_a-zA-Z0-9]*" CLEX_id #define STB_C_LEX_C_COMMENTS Y // "/* comment */" #define STB_C_LEX_CPP_COMMENTS Y // "// comment to end of line\n" #define STB_C_LEX_INTEGERS_AS_DOUBLES N // parses integers as doubles so they can be larger than 'int', but only if STB_C_LEX_STDLIB==N #define STB_C_LEX_C_DECIMAL_INTS N // "0|[1-9][0-9]*" CLEX_intlit #define STB_C_LEX_C_HEX_INTS N // "0x[0-9a-fA-F]+" CLEX_intlit #define STB_C_LEX_C_OCTAL_INTS N // "[0-7]+" CLEX_intlit #define STB_C_LEX_C_DECIMAL_FLOATS N // "[0-9]*(.[0-9]*([eE][-+]?[0-9]+)?) CLEX_floatlit #define STB_C_LEX_C99_HEX_FLOATS N // "0x{hex}+(.{hex}*)?[pP][-+]?{hex}+ CLEX_floatlit #define STB_C_LEX_C_DQ_STRINGS N // double-quote-delimited strings with escapes CLEX_dqstring #define STB_C_LEX_C_SQ_STRINGS N // single-quote-delimited strings with escapes CLEX_ssstring #define STB_C_LEX_C_CHARS N // single-quote-delimited character with escape CLEX_charlits #define STB_C_LEX_C_COMPARISONS N // "==" CLEX_eq "!=" CLEX_noteq "<=" CLEX_lesseq ">=" CLEX_greatereq #define STB_C_LEX_C_LOGICAL N // "&&" CLEX_andand "||" CLEX_oror #define STB_C_LEX_C_SHIFTS N // "<<" CLEX_shl ">>" CLEX_shr #define STB_C_LEX_C_INCREMENTS N // "++" CLEX_plusplus "--" CLEX_minusminus #define STB_C_LEX_C_ARROW N // "->" CLEX_arrow #define STB_C_LEX_EQUAL_ARROW N // "=>" CLEX_eqarrow #define STB_C_LEX_C_BITWISEEQ N // "&=" CLEX_andeq "|=" CLEX_oreq "^=" CLEX_xoreq #define STB_C_LEX_C_ARITHEQ N // "+=" CLEX_pluseq "-=" CLEX_minuseq // "*=" CLEX_muleq "/=" CLEX_diveq "%=" CLEX_modeq // if both STB_C_LEX_SHIFTS & STB_C_LEX_ARITHEQ: // "<<=" CLEX_shleq ">>=" CLEX_shreq #define STB_C_LEX_PARSE_SUFFIXES N // letters after numbers are parsed as part of those numbers, and must be in suffix list below #define STB_C_LEX_DECIMAL_SUFFIXES "" // decimal integer suffixes e.g. "uUlL" -- these are returned as-is in string storage #define STB_C_LEX_HEX_SUFFIXES "" // e.g. "uUlL" #define STB_C_LEX_OCTAL_SUFFIXES "" // e.g. "uUlL" #define STB_C_LEX_FLOAT_SUFFIXES "" // #define STB_C_LEXER_IMPLEMENTATION #include "stb_c_lexer.h" #include // strtoimax #include #include // fread, fseek, ftell #include // malloc, free #include // va_start, va_list, va_end #include #include // memcmp #include static inline void die(const char* format, ...) { va_list args; va_start(args, format); vprintf(format, args); va_end(args); exit(1); } static inline char* readWholeFile(const char* filepath, size_t *outSize) { FILE *fp = fopen(filepath, "rb"); if (fp == NULL) { die("failed to open file: %s", filepath); } fseek(fp, 0, SEEK_END); size_t size = ftell(fp); fseek(fp, 0L, SEEK_SET); char *buffer = (char*) malloc(size + 1); fread(buffer, sizeof (char), size, fp); buffer[size] = '\0'; fclose(fp); if (outSize != NULL) *outSize = size; return buffer; } static inline bool isWhitespace(char c) { return c == ' ' || c == '\r' || c == '\n' || c == '\f' || c == '\t'; } static inline char* eatWhitespace(char* input) { char* orig = input; char c; while ((c = *input) != '\0') { if (!isWhitespace(c)) return input; input++; } return orig; } // de-duplicates whitespace static inline char* findNthLastCharOccurence(char* string, int length, char c, int n) { char* out = NULL; int _n = 0; for (int i = length - 1; i > 0; i--) { if (string[i] == c) _n++; if (_n == n) return string + i; while (isWhitespace(string[i]) && i > 0) { i--; } } return out; } static inline int strWrite(char *dest, const char *src, int maxCount) { int i = 0; for (; i < maxCount; i++) { if (src[i] == '\0') { break; } dest[i] = src[i]; } dest[i] = '\0'; return i; } struct Declaration { char type[64]; char name[64]; ssize_t size; ssize_t align; bool isBitfield; }; struct StructInfo { char name[64]; char alias[64]; const char *filename; int lineNumber, lineOffset; ssize_t size; struct Declaration declarations[16]; int numDeclarations; }; static inline void printStructInfo(struct StructInfo *structInfo) { printf("%s - %d:%d", structInfo->filename, structInfo->lineNumber, structInfo->lineOffset); printf(" - %s", structInfo->name[0] == '\0' ? "(anonymous struct)" : structInfo->name); printf(", %s\n", structInfo->alias[0] == '\0' ? "(c++ style, no typedef alias)" : structInfo->alias); printf(" - total size: %ld\n", structInfo->size); for (int i = 0; i < structInfo->numDeclarations; i++) { struct Declaration *decl = structInfo->declarations + i; printf("\tdecl name: %s, type: %s, size: %ld, alignment: %ld\n", decl->name, decl->type, decl->size, decl->align); } } static int capacityAllStructs = 64; static int numAllStructs = 0; static struct StructInfo *allStructs; void pushStructInfo(struct StructInfo *structInfo) { if (numAllStructs >= capacityAllStructs) { capacityAllStructs *= 1.5; allStructs = realloc(allStructs, sizeof(struct StructInfo) * (capacityAllStructs)); } memcpy((void*) &allStructs[numAllStructs++], (void*) structInfo, sizeof(struct StructInfo)); } #include "table.h" #include "visualization.h" #define STORE_SIZE 1024*1000 static const int store_size = STORE_SIZE; static char store[STORE_SIZE] = { 0 }; #undef STORE_SIZE struct Array { unsigned int length; unsigned int capacity; void* data; }; void push(struct Array* array, void* item) { } void parseType() { } static inline bool shouldSkipConst(char* nullTerminated) { // @HACK skip all instances of 'const' size_t bounds = sizeof("const"); for (int i = 0; i < bounds; i++) { char c = nullTerminated[i]; if (c != "const"[i]) return false; } return true; } static inline void finalizeDeclaration( char lineBuffer[128], int lookback, int numAsterisks, int numDeclarations, int arrayVal, struct StructInfo *structInfo ) { printf("LINE BUFFER: |%s|, arrayVal: %d\n", lineBuffer, arrayVal); // we're at the end of a line of declarations. // we can learn some interesting stuff by looking back now. char typeBuffer[64] = { 0 }; char *cursor = findNthLastCharOccurence(lineBuffer, 128, ' ', lookback); if (cursor == NULL) { die("panic when finalizing a declaration"); } struct Declaration *decl = structInfo->declarations + structInfo->numDeclarations; int diff = (int)(cursor - lineBuffer); int count = strWrite(typeBuffer, lineBuffer, diff); int multiplier = 1; if (arrayVal != -1) { multiplier = arrayVal; } ssize_t totalSize = 0; TableEntry *entry = lookup(typeTable, typeBuffer); if (numAsterisks == 0) { if (entry == NULL) { // this is likely a new/unknown type in the program. enter it into the type table with an unknown size. printf("warning: unknown field size and alignment in struct field: %s\n", typeBuffer); insertPadZeroes(typeTable, typeBuffer, -1, -1); decl->size = -1; decl->align = -1; } else { decl->size = entry->size * multiplier; decl->align = entry->align; } } else { decl->size = sizeof(void*) * multiplier; decl->align = sizeof(void*); } // we could have multiple declarations (comma separated) // they will have to be the same type, except for bitfields (kill me) // so we'll just copy the type from the first decl, and just move the cursor // to find the other name. for (int i = 0; i < numDeclarations; i++) { decl = structInfo->declarations + structInfo->numDeclarations; totalSize += decl->size; structInfo->numDeclarations++; // write in the type name field. // for looking up size in the table, we don't want to include the '*' // but for storing the type name of the decl, we probably do. for (int i = 0; i < numAsterisks; i++) { count += strWrite(typeBuffer + count, "*", 1); } strWrite(decl->type, typeBuffer, 64); // figure out the name of this field. char* nameStart; char c; while ((c = *cursor) != '\0') { if (!isWhitespace(c)) { nameStart = cursor; break; } cursor++; } char* nameEnd; while ((c = *cursor) != '\0') { if (isWhitespace(c)) { nameEnd = cursor; break; } cursor++; } int count = strWrite(decl->name, nameStart, (int) (nameEnd-nameStart)); if (arrayVal != -1) { snprintf(decl->name + count, 64 - count, "[%d]", arrayVal); } } structInfo->size += totalSize; } void parseStructDeclaration(struct StructInfo *structInfo, stb_lexer *lexer) { bool somethingWasConst = false; bool numDeclarations = 1; int numAsterisks = 0; int soFar = 0; int lookback = 2; // for parsing things like 'char name[12]' char* lastOpenBracket = NULL; int arrayVal = -1; char lineBuffer[128] = { 0 }; do { switch (lexer->token) { case 260: { // we don't record const because it's annoying. if (shouldSkipConst(lexer->string)) { somethingWasConst = true; break; } soFar += strWrite(lineBuffer + soFar, lexer->string, 64); soFar += strWrite(lineBuffer + soFar, " ", 1); } break; case ',': numDeclarations++; lookback++; break; case '*': numAsterisks++; break; case '[': lastOpenBracket = lexer->where_firstchar; break; case ']': arrayVal = strtoimax(lastOpenBracket + 1, &lexer->where_firstchar, 10); if (arrayVal == 0) arrayVal = -1; break; case ';': { finalizeDeclaration(lineBuffer, lookback, numAsterisks, numDeclarations, arrayVal, structInfo); } return; } } while (stb_c_lexer_get_token(lexer) != 0); } // // the token in the lexer is a 'struct' keyword. we want to get the identifiers, and the nested declarations. // // ::= { {}+ } // | { {}+ } // | void parseStruct(const char *filename, stb_lexer *lexer, bool isClass) { int result = stb_c_lexer_get_token(lexer); if (result == 0) die("failed to parse struct"); stb_lex_location location = { 0 }; stb_c_lexer_get_location(lexer, lexer->where_firstchar, &location); struct StructInfo structInfo = { 0 }; structInfo.filename = filename; structInfo.lineNumber = location.line_number; structInfo.lineOffset = location.line_offset; structInfo.numDeclarations = 0; structInfo.size = 0; switch (lexer->token) { case 260: { char tempNameBuffer[64] = { 0 }; strWrite(tempNameBuffer, lexer->string, 64); // maybe a named struct. result = stb_c_lexer_get_token(lexer); if (result == 0) die("failed to parse struct"); if (lexer->token == '{') { strWrite(structInfo.name, tempNameBuffer, 64); } else { return; } } break; case '{': {} break; default: return; } int balancer = 1; while (stb_c_lexer_get_token(lexer) != 0) { switch (lexer->token) { case '}': if (--balancer == 0) goto checkTypeAlias; case '{': ++balancer; break; case 260: { parseStructDeclaration(&structInfo, lexer); } break; } } checkTypeAlias: result = stb_c_lexer_get_token(lexer); if (result == 0) die("unexpected end of stream when parsing a struct"); if (lexer->token == 260) { // we have a type alias for the struct. // @NOTE @TODO this could also conceivably by the __attribute__ thingy: https://stackoverflow.com/questions/14671253/is-there-a-gcc-keyword-to-allow-structure-reordering strWrite(structInfo.alias, lexer->string, 64); } pushStructInfo(&structInfo); } void parseTypedef(stb_lexer *lexer) { } void parseFile(const char *filepath) { printf("parsing file %s...\n", filepath); size_t size; char *buffer = readWholeFile(filepath, &size); stb_lexer lexer; stb_c_lexer_init(&lexer, buffer, buffer + size + 1, store, store_size); while (stb_c_lexer_get_token(&lexer) != 0) { switch (lexer.token) { case 260: { // token is a string const uint64_t LE_STRUCT = 0x0000746375727473U; const uint64_t LE_CLASS = 0x0000007373616C63U; const uint64_t LE_TYPEDEF = 0x0066656465707974U; uint64_t t = *((uint64_t*)(lexer.string)); if ((t ) == LE_TYPEDEF) { parseTypedef(&lexer); } else if ((t & 0x00FFFFFFFFFFFFFF) == LE_STRUCT) { parseStruct(filepath, &lexer, false); } else if ((t & 0x0000FFFFFFFFFFFF) == LE_CLASS) { parseStruct(filepath, &lexer, true); } } break; } } free(buffer); memset(store, 0, store_size); } // http://www.catb.org/esr/structure-packing/ int main(int argc, char* argv[]) { // @TODO check for flag -fshort-enums allStructs = malloc(sizeof(struct StructInfo) * capacityAllStructs); typeTable = initTable(); if (CHAR_BIT != 8) { printf("warning - CHAR_BIT != 8\n"); } if (false) { printf("CHAR_BIT = %d\n", CHAR_BIT); printf("MB_LEN_MAX = %d\n\n", MB_LEN_MAX); printf("CHAR_MIN = %+d\n", CHAR_MIN); printf("CHAR_MAX = %+d\n", CHAR_MAX); printf("SCHAR_MIN = %+d\n", SCHAR_MIN); printf("SCHAR_MAX = %+d\n", SCHAR_MAX); printf("UCHAR_MAX = %u\n\n", UCHAR_MAX); printf("SHRT_MIN = %+d\n", SHRT_MIN); printf("SHRT_MAX = %+d\n", SHRT_MAX); printf("USHRT_MAX = %u\n\n", USHRT_MAX); printf("INT_MIN = %+d\n", INT_MIN); printf("INT_MAX = %+d\n", INT_MAX); printf("UINT_MAX = %u\n\n", UINT_MAX); printf("LONG_MIN = %+ld\n", LONG_MIN); printf("LONG_MAX = %+ld\n", LONG_MAX); printf("ULONG_MAX = %lu\n\n", ULONG_MAX); printf("LLONG_MIN = %+lld\n", LLONG_MIN); printf("LLONG_MAX = %+lld\n", LLONG_MAX); printf("ULLONG_MAX = %llu\n\n", ULLONG_MAX); printf("PTRDIFF_MIN = %td\n", PTRDIFF_MIN); printf("PTRDIFF_MAX = %+td\n", PTRDIFF_MAX); printf("SIZE_MAX = %zu\n", SIZE_MAX); printf("SIG_ATOMIC_MIN = %+jd\n",(intmax_t)SIG_ATOMIC_MIN); printf("SIG_ATOMIC_MAX = %+jd\n",(intmax_t)SIG_ATOMIC_MAX); printf("WCHAR_MIN = %+jd\n",(intmax_t)WCHAR_MIN); printf("WCHAR_MAX = %+jd\n",(intmax_t)WCHAR_MAX); printf("WINT_MIN = %jd\n", (intmax_t)WINT_MIN); printf("WINT_MAX = %jd\n", (intmax_t)WINT_MAX); } if (argc < 2) { //die("provide a list of c/c++ files and/or headers to anaylze."); parseFile(__FILE__); parseFile("table.h"); parseFile("visualization.h"); parseFile("stb_c_lexer.h"); } for (int i = 1; i < argc; i++) { const char *filepath = argv[i]; parseFile(filepath); } outputHtml(); return 0; }