You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
516 lines
18 KiB
516 lines
18 KiB
//
|
|
// @TODO
|
|
// long-term goals:
|
|
// - support C++!
|
|
// - support Rust with repr(C) structs to the extent that it is possible!
|
|
// - support Golang to the extent that it is possible!
|
|
// - support Odin?!
|
|
//
|
|
// short-term:
|
|
// - bitfields!
|
|
// - robustness! change [64] byte name fields to pointers!
|
|
// - find all files in folder of a given type!
|
|
//
|
|
|
|
|
|
#define STB_C_LEXER_DEFINITIONS
|
|
|
|
#define STB_C_LEX_0_IS_EOF Y // if Y, ends parsing at '\0'; if N, returns '\0' as token
|
|
#define STB_C_LEX_USE_STDLIB Y // use strtod,strtol for parsing #s; otherwise inaccurate hack
|
|
#define STB_C_LEX_DOLLAR_IDENTIFIER N // allow $ as an identifier character
|
|
|
|
#define STB_C_LEX_DEFINE_ALL_TOKEN_NAMES Y // if Y, all CLEX_ token names are defined, even if never returned
|
|
// leaving it as N should help you catch config bugs
|
|
|
|
#define STB_C_LEX_DISCARD_PREPROCESSOR Y // discard C-preprocessor directives (e.g. after prepocess
|
|
// still have #line, #pragma, etc)
|
|
|
|
#define STB_C_LEX_MULTILINE_DSTRINGS N // allow newlines in double-quoted strings
|
|
#define STB_C_LEX_MULTILINE_SSTRINGS N // allow newlines in single-quoted strings
|
|
#define STB_C_LEX_FLOAT_NO_DECIMAL N // allow floats that have no decimal point if they have an exponent
|
|
|
|
|
|
#define STB_C_LEX_C_IDENTIFIERS Y // "[_a-zA-Z][_a-zA-Z0-9]*" CLEX_id
|
|
#define STB_C_LEX_C_COMMENTS Y // "/* comment */"
|
|
#define STB_C_LEX_CPP_COMMENTS Y // "// comment to end of line\n"
|
|
|
|
#define STB_C_LEX_INTEGERS_AS_DOUBLES N // parses integers as doubles so they can be larger than 'int', but only if STB_C_LEX_STDLIB==N
|
|
#define STB_C_LEX_C_DECIMAL_INTS N // "0|[1-9][0-9]*" CLEX_intlit
|
|
#define STB_C_LEX_C_HEX_INTS N // "0x[0-9a-fA-F]+" CLEX_intlit
|
|
#define STB_C_LEX_C_OCTAL_INTS N // "[0-7]+" CLEX_intlit
|
|
#define STB_C_LEX_C_DECIMAL_FLOATS N // "[0-9]*(.[0-9]*([eE][-+]?[0-9]+)?) CLEX_floatlit
|
|
#define STB_C_LEX_C99_HEX_FLOATS N // "0x{hex}+(.{hex}*)?[pP][-+]?{hex}+ CLEX_floatlit
|
|
#define STB_C_LEX_C_DQ_STRINGS N // double-quote-delimited strings with escapes CLEX_dqstring
|
|
#define STB_C_LEX_C_SQ_STRINGS N // single-quote-delimited strings with escapes CLEX_ssstring
|
|
#define STB_C_LEX_C_CHARS N // single-quote-delimited character with escape CLEX_charlits
|
|
#define STB_C_LEX_C_COMPARISONS N // "==" CLEX_eq "!=" CLEX_noteq "<=" CLEX_lesseq ">=" CLEX_greatereq
|
|
#define STB_C_LEX_C_LOGICAL N // "&&" CLEX_andand "||" CLEX_oror
|
|
#define STB_C_LEX_C_SHIFTS N // "<<" CLEX_shl ">>" CLEX_shr
|
|
#define STB_C_LEX_C_INCREMENTS N // "++" CLEX_plusplus "--" CLEX_minusminus
|
|
#define STB_C_LEX_C_ARROW N // "->" CLEX_arrow
|
|
#define STB_C_LEX_EQUAL_ARROW N // "=>" CLEX_eqarrow
|
|
#define STB_C_LEX_C_BITWISEEQ N // "&=" CLEX_andeq "|=" CLEX_oreq "^=" CLEX_xoreq
|
|
#define STB_C_LEX_C_ARITHEQ N // "+=" CLEX_pluseq "-=" CLEX_minuseq
|
|
// "*=" CLEX_muleq "/=" CLEX_diveq "%=" CLEX_modeq
|
|
// if both STB_C_LEX_SHIFTS & STB_C_LEX_ARITHEQ:
|
|
// "<<=" CLEX_shleq ">>=" CLEX_shreq
|
|
|
|
#define STB_C_LEX_PARSE_SUFFIXES N // letters after numbers are parsed as part of those numbers, and must be in suffix list below
|
|
#define STB_C_LEX_DECIMAL_SUFFIXES "" // decimal integer suffixes e.g. "uUlL" -- these are returned as-is in string storage
|
|
#define STB_C_LEX_HEX_SUFFIXES "" // e.g. "uUlL"
|
|
#define STB_C_LEX_OCTAL_SUFFIXES "" // e.g. "uUlL"
|
|
#define STB_C_LEX_FLOAT_SUFFIXES "" //
|
|
|
|
#define STB_C_LEXER_IMPLEMENTATION
|
|
#include "stb_c_lexer.h"
|
|
|
|
#include <inttypes.h> // strtoimax
|
|
#include <limits.h>
|
|
#include <stdio.h> // fread, fseek, ftell
|
|
#include <stdlib.h> // malloc, free
|
|
#include <stdarg.h> // va_start, va_list, va_end
|
|
#include <stdint.h>
|
|
#include <string.h> // memcmp
|
|
#include <stdbool.h>
|
|
|
|
|
|
static inline void die(const char* format, ...) {
|
|
va_list args;
|
|
va_start(args, format);
|
|
vprintf(format, args);
|
|
va_end(args);
|
|
exit(1);
|
|
}
|
|
|
|
static inline char* readWholeFile(const char* filepath, size_t *outSize) {
|
|
FILE *fp = fopen(filepath, "rb");
|
|
if (fp == NULL) {
|
|
die("failed to open file: %s", filepath);
|
|
}
|
|
fseek(fp, 0, SEEK_END);
|
|
size_t size = ftell(fp);
|
|
fseek(fp, 0L, SEEK_SET);
|
|
char *buffer = (char*) malloc(size + 1);
|
|
fread(buffer, sizeof (char), size, fp);
|
|
buffer[size] = '\0';
|
|
fclose(fp);
|
|
|
|
if (outSize != NULL) *outSize = size;
|
|
|
|
return buffer;
|
|
}
|
|
|
|
static inline bool isWhitespace(char c) {
|
|
return c == ' ' || c == '\r' || c == '\n' || c == '\f' || c == '\t';
|
|
}
|
|
|
|
static inline char* eatWhitespace(char* input) {
|
|
char* orig = input;
|
|
char c;
|
|
while ((c = *input) != '\0') {
|
|
if (!isWhitespace(c)) return input;
|
|
input++;
|
|
}
|
|
return orig;
|
|
}
|
|
|
|
// de-duplicates whitespace
|
|
static inline char* findNthLastCharOccurence(char* string, int length, char c, int n) {
|
|
char* out = NULL;
|
|
int _n = 0;
|
|
for (int i = length - 1; i > 0; i--) {
|
|
if (string[i] == c) _n++;
|
|
if (_n == n) return string + i;
|
|
while (isWhitespace(string[i]) && i > 0) {
|
|
i--;
|
|
}
|
|
}
|
|
return out;
|
|
}
|
|
|
|
static inline int strWrite(char *dest, const char *src, int maxCount) {
|
|
int i = 0;
|
|
for (; i < maxCount; i++) {
|
|
if (src[i] == '\0') {
|
|
break;
|
|
}
|
|
|
|
dest[i] = src[i];
|
|
}
|
|
dest[i] = '\0';
|
|
return i;
|
|
}
|
|
struct Declaration {
|
|
char type[64];
|
|
char name[64];
|
|
ssize_t size;
|
|
ssize_t align;
|
|
bool isBitfield;
|
|
};
|
|
|
|
struct StructInfo {
|
|
char name[64];
|
|
char alias[64];
|
|
const char *filename;
|
|
int lineNumber, lineOffset;
|
|
|
|
ssize_t size;
|
|
struct Declaration declarations[16];
|
|
int numDeclarations;
|
|
};
|
|
|
|
static inline void printStructInfo(struct StructInfo *structInfo) {
|
|
printf("%s - %d:%d", structInfo->filename, structInfo->lineNumber, structInfo->lineOffset);
|
|
printf(" - %s", structInfo->name[0] == '\0' ? "(anonymous struct)" : structInfo->name);
|
|
printf(", %s\n", structInfo->alias[0] == '\0' ? "(c++ style, no typedef alias)" : structInfo->alias);
|
|
printf(" - total size: %ld\n", structInfo->size);
|
|
for (int i = 0; i < structInfo->numDeclarations; i++) {
|
|
struct Declaration *decl = structInfo->declarations + i;
|
|
printf("\tdecl name: %s, type: %s, size: %ld, alignment: %ld\n", decl->name, decl->type, decl->size, decl->align);
|
|
}
|
|
}
|
|
|
|
static int capacityAllStructs = 64;
|
|
static int numAllStructs = 0;
|
|
static struct StructInfo *allStructs;
|
|
|
|
void pushStructInfo(struct StructInfo *structInfo) {
|
|
if (numAllStructs >= capacityAllStructs) {
|
|
capacityAllStructs *= 1.5;
|
|
allStructs = realloc(allStructs, sizeof(struct StructInfo) * (capacityAllStructs));
|
|
}
|
|
|
|
memcpy((void*) &allStructs[numAllStructs++], (void*) structInfo, sizeof(struct StructInfo));
|
|
}
|
|
|
|
#include "table.h"
|
|
#include "visualization.h"
|
|
|
|
|
|
#define STORE_SIZE 1024*1000
|
|
static const int store_size = STORE_SIZE;
|
|
static char store[STORE_SIZE] = { 0 };
|
|
#undef STORE_SIZE
|
|
|
|
struct Array {
|
|
unsigned int length;
|
|
unsigned int capacity;
|
|
void* data;
|
|
};
|
|
void push(struct Array* array, void* item) {
|
|
|
|
}
|
|
|
|
void parseType() {
|
|
|
|
}
|
|
|
|
static inline bool shouldSkipConst(char* nullTerminated) {
|
|
// @HACK skip all instances of 'const'
|
|
size_t bounds = sizeof("const");
|
|
for (int i = 0; i < bounds; i++) {
|
|
char c = nullTerminated[i];
|
|
if (c != "const"[i]) return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline void finalizeDeclaration(
|
|
char lineBuffer[128],
|
|
int lookback,
|
|
int numAsterisks,
|
|
int numDeclarations,
|
|
int arrayVal,
|
|
struct StructInfo *structInfo
|
|
) {
|
|
printf("LINE BUFFER: |%s|, arrayVal: %d\n", lineBuffer, arrayVal);
|
|
// we're at the end of a line of declarations.
|
|
// we can learn some interesting stuff by looking back now.
|
|
char typeBuffer[64] = { 0 };
|
|
char *cursor = findNthLastCharOccurence(lineBuffer, 128, ' ', lookback);
|
|
if (cursor == NULL) {
|
|
die("panic when finalizing a declaration");
|
|
}
|
|
|
|
struct Declaration *decl = structInfo->declarations + structInfo->numDeclarations;
|
|
int diff = (int)(cursor - lineBuffer);
|
|
int count = strWrite(typeBuffer, lineBuffer, diff);
|
|
int multiplier = 1;
|
|
if (arrayVal != -1) {
|
|
multiplier = arrayVal;
|
|
}
|
|
|
|
ssize_t totalSize = 0;
|
|
TableEntry *entry = lookup(typeTable, typeBuffer);
|
|
if (numAsterisks == 0) {
|
|
if (entry == NULL) {
|
|
// this is likely a new/unknown type in the program. enter it into the type table with an unknown size.
|
|
printf("warning: unknown field size and alignment in struct field: %s\n", typeBuffer);
|
|
insertPadZeroes(typeTable, typeBuffer, -1, -1);
|
|
decl->size = -1;
|
|
decl->align = -1;
|
|
|
|
} else {
|
|
decl->size = entry->size * multiplier;
|
|
decl->align = entry->align;
|
|
}
|
|
} else {
|
|
decl->size = sizeof(void*) * multiplier;
|
|
decl->align = sizeof(void*);
|
|
}
|
|
|
|
// we could have multiple declarations (comma separated)
|
|
// they will have to be the same type, except for bitfields (kill me)
|
|
// so we'll just copy the type from the first decl, and just move the cursor
|
|
// to find the other name.
|
|
for (int i = 0; i < numDeclarations; i++) {
|
|
decl = structInfo->declarations + structInfo->numDeclarations;
|
|
totalSize += decl->size;
|
|
structInfo->numDeclarations++;
|
|
|
|
// write in the type name field.
|
|
// for looking up size in the table, we don't want to include the '*'
|
|
// but for storing the type name of the decl, we probably do.
|
|
for (int i = 0; i < numAsterisks; i++) {
|
|
count += strWrite(typeBuffer + count, "*", 1);
|
|
}
|
|
strWrite(decl->type, typeBuffer, 64);
|
|
|
|
// figure out the name of this field.
|
|
char* nameStart;
|
|
char c;
|
|
while ((c = *cursor) != '\0') {
|
|
if (!isWhitespace(c)) {
|
|
nameStart = cursor;
|
|
break;
|
|
}
|
|
cursor++;
|
|
}
|
|
char* nameEnd;
|
|
while ((c = *cursor) != '\0') {
|
|
if (isWhitespace(c)) {
|
|
nameEnd = cursor;
|
|
break;
|
|
}
|
|
cursor++;
|
|
}
|
|
int count = strWrite(decl->name, nameStart, (int) (nameEnd-nameStart));
|
|
if (arrayVal != -1) {
|
|
snprintf(decl->name + count, 64 - count, "[%d]", arrayVal);
|
|
}
|
|
}
|
|
|
|
structInfo->size += totalSize;
|
|
}
|
|
|
|
void parseStructDeclaration(struct StructInfo *structInfo, stb_lexer *lexer) {
|
|
bool somethingWasConst = false;
|
|
bool numDeclarations = 1;
|
|
int numAsterisks = 0;
|
|
int soFar = 0;
|
|
int lookback = 2;
|
|
|
|
// for parsing things like 'char name[12]'
|
|
char* lastOpenBracket = NULL;
|
|
int arrayVal = -1;
|
|
|
|
char lineBuffer[128] = { 0 };
|
|
do {
|
|
switch (lexer->token) {
|
|
case 260: {
|
|
// we don't record const because it's annoying.
|
|
if (shouldSkipConst(lexer->string)) { somethingWasConst = true; break; }
|
|
|
|
soFar += strWrite(lineBuffer + soFar, lexer->string, 64);
|
|
soFar += strWrite(lineBuffer + soFar, " ", 1);
|
|
} break;
|
|
|
|
case ',':
|
|
numDeclarations++;
|
|
lookback++;
|
|
break;
|
|
|
|
case '*':
|
|
numAsterisks++;
|
|
break;
|
|
|
|
case '[':
|
|
lastOpenBracket = lexer->where_firstchar;
|
|
break;
|
|
|
|
case ']':
|
|
arrayVal = strtoimax(lastOpenBracket + 1, &lexer->where_firstchar, 10);
|
|
if (arrayVal == 0) arrayVal = -1;
|
|
break;
|
|
|
|
case ';': {
|
|
finalizeDeclaration(lineBuffer, lookback, numAsterisks, numDeclarations, arrayVal, structInfo);
|
|
} return;
|
|
}
|
|
} while (stb_c_lexer_get_token(lexer) != 0);
|
|
}
|
|
|
|
//
|
|
// the token in the lexer is a 'struct' keyword. we want to get the identifiers, and the nested declarations.
|
|
//
|
|
// <struct-or-union-specifier> ::= <struct-or-union> <identifier> { {<struct-declaration>}+ }
|
|
// | <struct-or-union> { {<struct-declaration>}+ }
|
|
// | <struct-or-union> <identifier>
|
|
void parseStruct(const char *filename, stb_lexer *lexer, bool isClass) {
|
|
int result = stb_c_lexer_get_token(lexer);
|
|
if (result == 0) die("failed to parse struct");
|
|
|
|
stb_lex_location location = { 0 };
|
|
stb_c_lexer_get_location(lexer, lexer->where_firstchar, &location);
|
|
|
|
struct StructInfo structInfo = { 0 };
|
|
structInfo.filename = filename;
|
|
structInfo.lineNumber = location.line_number;
|
|
structInfo.lineOffset = location.line_offset;
|
|
structInfo.numDeclarations = 0;
|
|
structInfo.size = 0;
|
|
|
|
switch (lexer->token) {
|
|
case 260: {
|
|
char tempNameBuffer[64] = { 0 };
|
|
strWrite(tempNameBuffer, lexer->string, 64);
|
|
|
|
// maybe a named struct.
|
|
result = stb_c_lexer_get_token(lexer);
|
|
if (result == 0) die("failed to parse struct");
|
|
|
|
if (lexer->token == '{') {
|
|
strWrite(structInfo.name, tempNameBuffer, 64);
|
|
|
|
} else {
|
|
return;
|
|
}
|
|
} break;
|
|
|
|
case '{': {} break;
|
|
|
|
default: return;
|
|
}
|
|
|
|
int balancer = 1;
|
|
while (stb_c_lexer_get_token(lexer) != 0) {
|
|
switch (lexer->token) {
|
|
case '}': if (--balancer == 0) goto checkTypeAlias;
|
|
case '{': ++balancer; break;
|
|
|
|
case 260: {
|
|
parseStructDeclaration(&structInfo, lexer);
|
|
} break;
|
|
}
|
|
}
|
|
|
|
checkTypeAlias:
|
|
result = stb_c_lexer_get_token(lexer);
|
|
if (result == 0) die("unexpected end of stream when parsing a struct");
|
|
|
|
if (lexer->token == 260) {
|
|
// we have a type alias for the struct.
|
|
// @NOTE @TODO this could also conceivably by the __attribute__ thingy: https://stackoverflow.com/questions/14671253/is-there-a-gcc-keyword-to-allow-structure-reordering
|
|
strWrite(structInfo.alias, lexer->string, 64);
|
|
}
|
|
|
|
pushStructInfo(&structInfo);
|
|
}
|
|
|
|
void parseTypedef(stb_lexer *lexer) {
|
|
|
|
}
|
|
|
|
|
|
void parseFile(const char *filepath) {
|
|
printf("parsing file %s...\n", filepath);
|
|
size_t size;
|
|
char *buffer = readWholeFile(filepath, &size);
|
|
|
|
stb_lexer lexer;
|
|
stb_c_lexer_init(&lexer, buffer, buffer + size + 1, store, store_size);
|
|
|
|
while (stb_c_lexer_get_token(&lexer) != 0) {
|
|
switch (lexer.token) {
|
|
case 260: { // token is a string
|
|
const uint64_t LE_STRUCT = 0x0000746375727473U;
|
|
const uint64_t LE_CLASS = 0x0000007373616C63U;
|
|
const uint64_t LE_TYPEDEF = 0x0066656465707974U;
|
|
|
|
uint64_t t = *((uint64_t*)(lexer.string));
|
|
if ((t ) == LE_TYPEDEF) { parseTypedef(&lexer); }
|
|
else if ((t & 0x00FFFFFFFFFFFFFF) == LE_STRUCT) { parseStruct(filepath, &lexer, false); }
|
|
else if ((t & 0x0000FFFFFFFFFFFF) == LE_CLASS) { parseStruct(filepath, &lexer, true); }
|
|
} break;
|
|
}
|
|
}
|
|
free(buffer);
|
|
memset(store, 0, store_size);
|
|
}
|
|
|
|
// http://www.catb.org/esr/structure-packing/
|
|
int main(int argc, char* argv[]) {
|
|
// @TODO check for flag -fshort-enums
|
|
allStructs = malloc(sizeof(struct StructInfo) * capacityAllStructs);
|
|
typeTable = initTable();
|
|
|
|
if (CHAR_BIT != 8) {
|
|
printf("warning - CHAR_BIT != 8\n");
|
|
}
|
|
|
|
if (false) {
|
|
printf("CHAR_BIT = %d\n", CHAR_BIT);
|
|
printf("MB_LEN_MAX = %d\n\n", MB_LEN_MAX);
|
|
|
|
printf("CHAR_MIN = %+d\n", CHAR_MIN);
|
|
printf("CHAR_MAX = %+d\n", CHAR_MAX);
|
|
printf("SCHAR_MIN = %+d\n", SCHAR_MIN);
|
|
printf("SCHAR_MAX = %+d\n", SCHAR_MAX);
|
|
printf("UCHAR_MAX = %u\n\n", UCHAR_MAX);
|
|
|
|
printf("SHRT_MIN = %+d\n", SHRT_MIN);
|
|
printf("SHRT_MAX = %+d\n", SHRT_MAX);
|
|
printf("USHRT_MAX = %u\n\n", USHRT_MAX);
|
|
|
|
printf("INT_MIN = %+d\n", INT_MIN);
|
|
printf("INT_MAX = %+d\n", INT_MAX);
|
|
printf("UINT_MAX = %u\n\n", UINT_MAX);
|
|
|
|
printf("LONG_MIN = %+ld\n", LONG_MIN);
|
|
printf("LONG_MAX = %+ld\n", LONG_MAX);
|
|
printf("ULONG_MAX = %lu\n\n", ULONG_MAX);
|
|
|
|
printf("LLONG_MIN = %+lld\n", LLONG_MIN);
|
|
printf("LLONG_MAX = %+lld\n", LLONG_MAX);
|
|
printf("ULLONG_MAX = %llu\n\n", ULLONG_MAX);
|
|
|
|
printf("PTRDIFF_MIN = %td\n", PTRDIFF_MIN);
|
|
printf("PTRDIFF_MAX = %+td\n", PTRDIFF_MAX);
|
|
printf("SIZE_MAX = %zu\n", SIZE_MAX);
|
|
printf("SIG_ATOMIC_MIN = %+jd\n",(intmax_t)SIG_ATOMIC_MIN);
|
|
printf("SIG_ATOMIC_MAX = %+jd\n",(intmax_t)SIG_ATOMIC_MAX);
|
|
printf("WCHAR_MIN = %+jd\n",(intmax_t)WCHAR_MIN);
|
|
printf("WCHAR_MAX = %+jd\n",(intmax_t)WCHAR_MAX);
|
|
printf("WINT_MIN = %jd\n", (intmax_t)WINT_MIN);
|
|
printf("WINT_MAX = %jd\n", (intmax_t)WINT_MAX);
|
|
}
|
|
|
|
if (argc < 2) {
|
|
//die("provide a list of c/c++ files and/or headers to anaylze.");
|
|
parseFile(__FILE__);
|
|
parseFile("table.h");
|
|
parseFile("visualization.h");
|
|
parseFile("stb_c_lexer.h");
|
|
}
|
|
|
|
for (int i = 1; i < argc; i++) {
|
|
const char *filepath = argv[i];
|
|
parseFile(filepath);
|
|
}
|
|
|
|
outputHtml();
|
|
|
|
return 0;
|
|
}
|
|
|