visualize the data structures in a C program
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

536 lines
18 KiB

2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
  1. //
  2. // @TODO
  3. // long-term goals:
  4. // - support C++!
  5. // - support Rust with repr(C) structs to the extent that it is possible!
  6. // - support Golang to the extent that it is possible!
  7. // - support Odin?!
  8. //
  9. // short-term:
  10. // - bitfields!
  11. // - robustness! change [64] byte name fields to pointers!
  12. // - find all files in folder of a given type!
  13. //
  14. #define STB_C_LEXER_DEFINITIONS
  15. #define STB_C_LEX_0_IS_EOF Y // if Y, ends parsing at '\0'; if N, returns '\0' as token
  16. #define STB_C_LEX_USE_STDLIB Y // use strtod,strtol for parsing #s; otherwise inaccurate hack
  17. #define STB_C_LEX_DOLLAR_IDENTIFIER N // allow $ as an identifier character
  18. #define STB_C_LEX_DEFINE_ALL_TOKEN_NAMES Y // if Y, all CLEX_ token names are defined, even if never returned
  19. // leaving it as N should help you catch config bugs
  20. #define STB_C_LEX_DISCARD_PREPROCESSOR Y // discard C-preprocessor directives (e.g. after prepocess
  21. // still have #line, #pragma, etc)
  22. #define STB_C_LEX_MULTILINE_DSTRINGS N // allow newlines in double-quoted strings
  23. #define STB_C_LEX_MULTILINE_SSTRINGS N // allow newlines in single-quoted strings
  24. #define STB_C_LEX_FLOAT_NO_DECIMAL N // allow floats that have no decimal point if they have an exponent
  25. #define STB_C_LEX_C_IDENTIFIERS Y // "[_a-zA-Z][_a-zA-Z0-9]*" CLEX_id
  26. #define STB_C_LEX_C_COMMENTS Y // "/* comment */"
  27. #define STB_C_LEX_CPP_COMMENTS Y // "// comment to end of line\n"
  28. #define STB_C_LEX_INTEGERS_AS_DOUBLES N // parses integers as doubles so they can be larger than 'int', but only if STB_C_LEX_STDLIB==N
  29. #define STB_C_LEX_C_DECIMAL_INTS N // "0|[1-9][0-9]*" CLEX_intlit
  30. #define STB_C_LEX_C_HEX_INTS N // "0x[0-9a-fA-F]+" CLEX_intlit
  31. #define STB_C_LEX_C_OCTAL_INTS N // "[0-7]+" CLEX_intlit
  32. #define STB_C_LEX_C_DECIMAL_FLOATS N // "[0-9]*(.[0-9]*([eE][-+]?[0-9]+)?) CLEX_floatlit
  33. #define STB_C_LEX_C99_HEX_FLOATS N // "0x{hex}+(.{hex}*)?[pP][-+]?{hex}+ CLEX_floatlit
  34. #define STB_C_LEX_C_DQ_STRINGS N // double-quote-delimited strings with escapes CLEX_dqstring
  35. #define STB_C_LEX_C_SQ_STRINGS N // single-quote-delimited strings with escapes CLEX_ssstring
  36. #define STB_C_LEX_C_CHARS N // single-quote-delimited character with escape CLEX_charlits
  37. #define STB_C_LEX_C_COMPARISONS N // "==" CLEX_eq "!=" CLEX_noteq "<=" CLEX_lesseq ">=" CLEX_greatereq
  38. #define STB_C_LEX_C_LOGICAL N // "&&" CLEX_andand "||" CLEX_oror
  39. #define STB_C_LEX_C_SHIFTS N // "<<" CLEX_shl ">>" CLEX_shr
  40. #define STB_C_LEX_C_INCREMENTS N // "++" CLEX_plusplus "--" CLEX_minusminus
  41. #define STB_C_LEX_C_ARROW N // "->" CLEX_arrow
  42. #define STB_C_LEX_EQUAL_ARROW N // "=>" CLEX_eqarrow
  43. #define STB_C_LEX_C_BITWISEEQ N // "&=" CLEX_andeq "|=" CLEX_oreq "^=" CLEX_xoreq
  44. #define STB_C_LEX_C_ARITHEQ N // "+=" CLEX_pluseq "-=" CLEX_minuseq
  45. // "*=" CLEX_muleq "/=" CLEX_diveq "%=" CLEX_modeq
  46. // if both STB_C_LEX_SHIFTS & STB_C_LEX_ARITHEQ:
  47. // "<<=" CLEX_shleq ">>=" CLEX_shreq
  48. #define STB_C_LEX_PARSE_SUFFIXES N // letters after numbers are parsed as part of those numbers, and must be in suffix list below
  49. #define STB_C_LEX_DECIMAL_SUFFIXES "" // decimal integer suffixes e.g. "uUlL" -- these are returned as-is in string storage
  50. #define STB_C_LEX_HEX_SUFFIXES "" // e.g. "uUlL"
  51. #define STB_C_LEX_OCTAL_SUFFIXES "" // e.g. "uUlL"
  52. #define STB_C_LEX_FLOAT_SUFFIXES "" //
  53. #define STB_C_LEXER_IMPLEMENTATION
  54. #include "stb_c_lexer.h"
  55. #include <inttypes.h> // strtoimax
  56. #include <limits.h>
  57. #include <assert.h> //assert
  58. #include <stdio.h> // fread, fseek, ftell
  59. #include <stdlib.h> // malloc, free
  60. #include <stdarg.h> // va_start, va_list, va_end
  61. #include <stdint.h> // intxx_t, etc.
  62. #include <string.h> // memcmp
  63. #include <stdbool.h> // bool
  64. struct Dummy {
  65. char *p;
  66. char c;
  67. int x;
  68. };
  69. static inline void die(const char* format, ...) {
  70. va_list args;
  71. va_start(args, format);
  72. vprintf(format, args);
  73. va_end(args);
  74. exit(1);
  75. }
  76. static inline char* readWholeFile(const char* filepath, size_t *outSize) {
  77. FILE *fp = fopen(filepath, "rb");
  78. if (fp == NULL) {
  79. die("failed to open file: %s", filepath);
  80. }
  81. fseek(fp, 0, SEEK_END);
  82. size_t size = ftell(fp);
  83. fseek(fp, 0L, SEEK_SET);
  84. char *buffer = (char*) malloc(size + 1);
  85. fread(buffer, sizeof (char), size, fp);
  86. buffer[size] = '\0';
  87. fclose(fp);
  88. if (outSize != NULL) *outSize = size;
  89. return buffer;
  90. }
  91. static inline bool isWhitespace(char c) {
  92. return c == ' ' || c == '\r' || c == '\n' || c == '\f' || c == '\t';
  93. }
  94. static inline char* eatWhitespace(char* input) {
  95. char* orig = input;
  96. char c;
  97. while ((c = *input) != '\0') {
  98. if (!isWhitespace(c)) return input;
  99. input++;
  100. }
  101. return orig;
  102. }
  103. // de-duplicates whitespace
  104. static inline char* findNthLastCharOccurence(char* string, int length, char c, int n) {
  105. char* out = NULL;
  106. int _n = 0;
  107. for (int i = length - 1; i > 0; i--) {
  108. if (string[i] == c) _n++;
  109. if (_n == n) return string + i;
  110. while (isWhitespace(string[i]) && i > 0) {
  111. i--;
  112. }
  113. }
  114. return out;
  115. }
  116. static inline int strWrite(char *dest, const char *src, int maxCount) {
  117. int i = 0;
  118. for (; i < maxCount; i++) {
  119. if (src[i] == '\0') {
  120. break;
  121. }
  122. dest[i] = src[i];
  123. }
  124. dest[i] = '\0';
  125. return i;
  126. }
  127. static ssize_t alignForward(ssize_t ptr, ssize_t align) {
  128. ssize_t p, a, modulo;
  129. p = ptr;
  130. a = align;
  131. modulo = p % a;
  132. if (modulo != 0) {
  133. p += a - modulo;
  134. }
  135. return p;
  136. }
  137. struct Declaration {
  138. char type[64];
  139. char name[64];
  140. ssize_t size;
  141. ssize_t align;
  142. bool isBitfield;
  143. };
  144. struct StructInfo {
  145. char name[64];
  146. char alias[64];
  147. const char *filename;
  148. int lineNumber, lineOffset;
  149. ssize_t size;
  150. struct Declaration declarations[16];
  151. int numDeclarations;
  152. };
  153. static inline void printStructInfo(struct StructInfo *structInfo) {
  154. printf("%s - %d:%d", structInfo->filename, structInfo->lineNumber, structInfo->lineOffset);
  155. printf(" - %s", structInfo->name[0] == '\0' ? "(anonymous struct)" : structInfo->name);
  156. printf(", %s\n", structInfo->alias[0] == '\0' ? "(c++ style, no typedef alias)" : structInfo->alias);
  157. printf(" - total size: %ld\n", structInfo->size);
  158. for (int i = 0; i < structInfo->numDeclarations; i++) {
  159. struct Declaration *decl = structInfo->declarations + i;
  160. printf("\tdecl name: %s, type: %s, size: %ld, alignment: %ld\n", decl->name, decl->type, decl->size, decl->align);
  161. }
  162. }
  163. static int capacityAllStructs = 64;
  164. static int numAllStructs = 0;
  165. static struct StructInfo *allStructs;
  166. void pushStructInfo(struct StructInfo *structInfo) {
  167. if (numAllStructs >= capacityAllStructs) {
  168. capacityAllStructs *= 1.5;
  169. allStructs = realloc(allStructs, sizeof(struct StructInfo) * (capacityAllStructs));
  170. }
  171. memcpy((void*) &allStructs[numAllStructs++], (void*) structInfo, sizeof(struct StructInfo));
  172. }
  173. #include "table.h"
  174. #include "visualization.h"
  175. #define STORE_SIZE 1024*1000
  176. static const int store_size = STORE_SIZE;
  177. static char store[STORE_SIZE] = { 0 };
  178. #undef STORE_SIZE
  179. struct Array {
  180. unsigned int length;
  181. unsigned int capacity;
  182. void* data;
  183. };
  184. void push(struct Array* array, void* item) {
  185. }
  186. void parseType() {
  187. }
  188. static inline bool shouldSkipConst(char* nullTerminated) {
  189. // @HACK skip all instances of 'const'
  190. size_t bounds = sizeof("const");
  191. for (int i = 0; i < bounds; i++) {
  192. char c = nullTerminated[i];
  193. if (c != "const"[i]) return false;
  194. }
  195. return true;
  196. }
  197. static inline void finalizeDeclaration(
  198. char lineBuffer[128],
  199. int lookback,
  200. int numAsterisks,
  201. int numDeclarations,
  202. int arrayVal,
  203. struct StructInfo *structInfo
  204. ) {
  205. printf("LINE BUFFER: |%s|, arrayVal: %d\n", lineBuffer, arrayVal);
  206. // we're at the end of a line of declarations.
  207. // we can learn some interesting stuff by looking back now.
  208. char typeBuffer[64] = { 0 };
  209. char *cursor = findNthLastCharOccurence(lineBuffer, 128, ' ', lookback);
  210. if (cursor == NULL) {
  211. die("panic when finalizing a declaration");
  212. }
  213. struct Declaration *decl = structInfo->declarations + structInfo->numDeclarations;
  214. int diff = (int)(cursor - lineBuffer);
  215. int count = strWrite(typeBuffer, lineBuffer, diff);
  216. int multiplier = 1;
  217. if (arrayVal != -1) {
  218. multiplier = arrayVal;
  219. }
  220. ssize_t totalSize = 0;
  221. TableEntry *entry = lookup(typeTable, typeBuffer);
  222. if (numAsterisks == 0) {
  223. if (entry == NULL) {
  224. // this is likely a new/unknown type in the program. enter it into the type table with an unknown size.
  225. printf("warning: unknown field size and alignment in struct field: %s\n", typeBuffer);
  226. insertPadZeroes(typeTable, typeBuffer, -1, -1);
  227. decl->size = -1;
  228. decl->align = -1;
  229. } else {
  230. decl->size = entry->size * multiplier;
  231. decl->align = entry->align;
  232. }
  233. } else {
  234. decl->size = sizeof(void*) * multiplier;
  235. decl->align = sizeof(void*);
  236. }
  237. // we could have multiple declarations (comma separated)
  238. // they will have to be the same type, except for bitfields (kill me)
  239. // so we'll just copy the type from the first decl, and just move the cursor
  240. // to find the other name.
  241. for (int i = 0; i < numDeclarations; i++) {
  242. decl = structInfo->declarations + structInfo->numDeclarations;
  243. totalSize += decl->size;
  244. structInfo->numDeclarations++;
  245. // write in the type name field.
  246. // for looking up size in the table, we don't want to include the '*'
  247. // but for storing the type name of the decl, we probably do.
  248. for (int i = 0; i < numAsterisks; i++) {
  249. count += strWrite(typeBuffer + count, "*", 1);
  250. }
  251. strWrite(decl->type, typeBuffer, 64);
  252. // figure out the name of this field.
  253. char* nameStart;
  254. char c;
  255. while ((c = *cursor) != '\0') {
  256. if (!isWhitespace(c)) {
  257. nameStart = cursor;
  258. break;
  259. }
  260. cursor++;
  261. }
  262. char* nameEnd;
  263. while ((c = *cursor) != '\0') {
  264. if (isWhitespace(c)) {
  265. nameEnd = cursor;
  266. break;
  267. }
  268. cursor++;
  269. }
  270. int count = strWrite(decl->name, nameStart, (int) (nameEnd-nameStart));
  271. if (arrayVal != -1) {
  272. snprintf(decl->name + count, 64 - count, "[%d]", arrayVal);
  273. }
  274. }
  275. structInfo->size += totalSize;
  276. }
  277. void parseStructDeclaration(struct StructInfo *structInfo, stb_lexer *lexer) {
  278. bool somethingWasConst = false;
  279. bool numDeclarations = 1;
  280. int numAsterisks = 0;
  281. int soFar = 0;
  282. int lookback = 2;
  283. // for parsing things like 'char name[12]'
  284. char* lastOpenBracket = NULL;
  285. int arrayVal = -1;
  286. char lineBuffer[128] = { 0 };
  287. do {
  288. switch (lexer->token) {
  289. case 260: {
  290. // we don't record const because it's annoying.
  291. if (shouldSkipConst(lexer->string)) { somethingWasConst = true; break; }
  292. soFar += strWrite(lineBuffer + soFar, lexer->string, 64);
  293. soFar += strWrite(lineBuffer + soFar, " ", 1);
  294. } break;
  295. case ',':
  296. numDeclarations++;
  297. lookback++;
  298. break;
  299. case '*':
  300. numAsterisks++;
  301. break;
  302. case '[':
  303. lastOpenBracket = lexer->where_firstchar;
  304. break;
  305. case ']':
  306. arrayVal = strtoimax(lastOpenBracket + 1, &lexer->where_firstchar, 10);
  307. // happens if there is no value between, in which case it's a "name[]" decl
  308. if (arrayVal == 0) arrayVal = -1;
  309. break;
  310. case ';': {
  311. finalizeDeclaration(lineBuffer, lookback, numAsterisks, numDeclarations, arrayVal, structInfo);
  312. } return;
  313. }
  314. } while (stb_c_lexer_get_token(lexer) != 0);
  315. }
  316. //
  317. // the token in the lexer is a 'struct' keyword. we want to get the identifiers, and the nested declarations.
  318. //
  319. // <struct-or-union-specifier> ::= <struct-or-union> <identifier> { {<struct-declaration>}+ }
  320. // | <struct-or-union> { {<struct-declaration>}+ }
  321. // | <struct-or-union> <identifier>
  322. void parseStruct(const char *filename, stb_lexer *lexer, bool isClass) {
  323. int result = stb_c_lexer_get_token(lexer);
  324. if (result == 0) die("failed to parse struct");
  325. stb_lex_location location = { 0 };
  326. stb_c_lexer_get_location(lexer, lexer->where_firstchar, &location);
  327. struct StructInfo structInfo = { 0 };
  328. structInfo.filename = filename;
  329. structInfo.lineNumber = location.line_number;
  330. structInfo.lineOffset = location.line_offset;
  331. structInfo.numDeclarations = 0;
  332. structInfo.size = 0;
  333. switch (lexer->token) {
  334. case 260: {
  335. char tempNameBuffer[64] = { 0 };
  336. strWrite(tempNameBuffer, lexer->string, 64);
  337. // maybe a named struct.
  338. result = stb_c_lexer_get_token(lexer);
  339. if (result == 0) die("failed to parse struct");
  340. if (lexer->token == '{') {
  341. strWrite(structInfo.name, tempNameBuffer, 64);
  342. } else {
  343. return;
  344. }
  345. } break;
  346. case '{': {} break;
  347. default: return;
  348. }
  349. int balancer = 1;
  350. while (stb_c_lexer_get_token(lexer) != 0) {
  351. switch (lexer->token) {
  352. case '}': if (--balancer == 0) goto checkTypeAlias;
  353. case '{': ++balancer; break;
  354. case 260: {
  355. parseStructDeclaration(&structInfo, lexer);
  356. } break;
  357. }
  358. }
  359. checkTypeAlias:
  360. result = stb_c_lexer_get_token(lexer);
  361. if (result == 0) die("unexpected end of stream when parsing a struct");
  362. if (lexer->token == 260) {
  363. // we have a type alias for the struct.
  364. // @NOTE @TODO this could also conceivably by the __attribute__ thingy: https://stackoverflow.com/questions/14671253/is-there-a-gcc-keyword-to-allow-structure-reordering
  365. strWrite(structInfo.alias, lexer->string, 64);
  366. }
  367. pushStructInfo(&structInfo);
  368. }
  369. void parseTypedef(stb_lexer *lexer) {
  370. }
  371. void parseFile(const char *filepath) {
  372. printf("parsing file %s...\n", filepath);
  373. size_t size;
  374. char *buffer = readWholeFile(filepath, &size);
  375. stb_lexer lexer;
  376. stb_c_lexer_init(&lexer, buffer, buffer + size + 1, store, store_size);
  377. while (stb_c_lexer_get_token(&lexer) != 0) {
  378. switch (lexer.token) {
  379. case 260: { // token is a string
  380. const uint64_t LE_STRUCT = 0x0000746375727473U;
  381. const uint64_t LE_CLASS = 0x0000007373616C63U;
  382. const uint64_t LE_TYPEDEF = 0x0066656465707974U;
  383. uint64_t t = *((uint64_t*)(lexer.string));
  384. if ((t ) == LE_TYPEDEF) { parseTypedef(&lexer); }
  385. else if ((t & 0x00FFFFFFFFFFFFFF) == LE_STRUCT) { parseStruct(filepath, &lexer, false); }
  386. else if ((t & 0x0000FFFFFFFFFFFF) == LE_CLASS) { parseStruct(filepath, &lexer, true); }
  387. } break;
  388. }
  389. }
  390. free(buffer);
  391. memset(store, 0, store_size);
  392. }
  393. // http://www.catb.org/esr/structure-packing/
  394. int main(int argc, char* argv[]) {
  395. // @TODO check for flag -fshort-enums
  396. allStructs = malloc(sizeof(struct StructInfo) * capacityAllStructs);
  397. typeTable = initTable();
  398. if (CHAR_BIT != 8) {
  399. printf("warning - CHAR_BIT != 8\n");
  400. }
  401. if (false) {
  402. printf("CHAR_BIT = %d\n", CHAR_BIT);
  403. printf("MB_LEN_MAX = %d\n\n", MB_LEN_MAX);
  404. printf("CHAR_MIN = %+d\n", CHAR_MIN);
  405. printf("CHAR_MAX = %+d\n", CHAR_MAX);
  406. printf("SCHAR_MIN = %+d\n", SCHAR_MIN);
  407. printf("SCHAR_MAX = %+d\n", SCHAR_MAX);
  408. printf("UCHAR_MAX = %u\n\n", UCHAR_MAX);
  409. printf("SHRT_MIN = %+d\n", SHRT_MIN);
  410. printf("SHRT_MAX = %+d\n", SHRT_MAX);
  411. printf("USHRT_MAX = %u\n\n", USHRT_MAX);
  412. printf("INT_MIN = %+d\n", INT_MIN);
  413. printf("INT_MAX = %+d\n", INT_MAX);
  414. printf("UINT_MAX = %u\n\n", UINT_MAX);
  415. printf("LONG_MIN = %+ld\n", LONG_MIN);
  416. printf("LONG_MAX = %+ld\n", LONG_MAX);
  417. printf("ULONG_MAX = %lu\n\n", ULONG_MAX);
  418. printf("LLONG_MIN = %+lld\n", LLONG_MIN);
  419. printf("LLONG_MAX = %+lld\n", LLONG_MAX);
  420. printf("ULLONG_MAX = %llu\n\n", ULLONG_MAX);
  421. printf("PTRDIFF_MIN = %td\n", PTRDIFF_MIN);
  422. printf("PTRDIFF_MAX = %+td\n", PTRDIFF_MAX);
  423. printf("SIZE_MAX = %zu\n", SIZE_MAX);
  424. printf("SIG_ATOMIC_MIN = %+jd\n",(intmax_t)SIG_ATOMIC_MIN);
  425. printf("SIG_ATOMIC_MAX = %+jd\n",(intmax_t)SIG_ATOMIC_MAX);
  426. printf("WCHAR_MIN = %+jd\n",(intmax_t)WCHAR_MIN);
  427. printf("WCHAR_MAX = %+jd\n",(intmax_t)WCHAR_MAX);
  428. printf("WINT_MIN = %jd\n", (intmax_t)WINT_MIN);
  429. printf("WINT_MAX = %jd\n", (intmax_t)WINT_MAX);
  430. }
  431. if (argc < 2) {
  432. //die("provide a list of c/c++ files and/or headers to anaylze.");
  433. parseFile(__FILE__);
  434. parseFile("table.h");
  435. parseFile("visualization.h");
  436. parseFile("stb_c_lexer.h");
  437. }
  438. for (int i = 1; i < argc; i++) {
  439. const char *filepath = argv[i];
  440. parseFile(filepath);
  441. }
  442. outputHtml();
  443. return 0;
  444. }