visualize the data structures in a C program
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

516 lines
18 KiB

2 years ago
  1. //
  2. // @TODO
  3. // long-term goals:
  4. // - support C++!
  5. // - support Rust with repr(C) structs to the extent that it is possible!
  6. // - support Golang to the extent that it is possible!
  7. // - support Odin?!
  8. //
  9. // short-term:
  10. // - bitfields!
  11. // - robustness! change [64] byte name fields to pointers!
  12. // - find all files in folder of a given type!
  13. //
  14. #define STB_C_LEXER_DEFINITIONS
  15. #define STB_C_LEX_0_IS_EOF Y // if Y, ends parsing at '\0'; if N, returns '\0' as token
  16. #define STB_C_LEX_USE_STDLIB Y // use strtod,strtol for parsing #s; otherwise inaccurate hack
  17. #define STB_C_LEX_DOLLAR_IDENTIFIER N // allow $ as an identifier character
  18. #define STB_C_LEX_DEFINE_ALL_TOKEN_NAMES Y // if Y, all CLEX_ token names are defined, even if never returned
  19. // leaving it as N should help you catch config bugs
  20. #define STB_C_LEX_DISCARD_PREPROCESSOR Y // discard C-preprocessor directives (e.g. after prepocess
  21. // still have #line, #pragma, etc)
  22. #define STB_C_LEX_MULTILINE_DSTRINGS N // allow newlines in double-quoted strings
  23. #define STB_C_LEX_MULTILINE_SSTRINGS N // allow newlines in single-quoted strings
  24. #define STB_C_LEX_FLOAT_NO_DECIMAL N // allow floats that have no decimal point if they have an exponent
  25. #define STB_C_LEX_C_IDENTIFIERS Y // "[_a-zA-Z][_a-zA-Z0-9]*" CLEX_id
  26. #define STB_C_LEX_C_COMMENTS Y // "/* comment */"
  27. #define STB_C_LEX_CPP_COMMENTS Y // "// comment to end of line\n"
  28. #define STB_C_LEX_INTEGERS_AS_DOUBLES N // parses integers as doubles so they can be larger than 'int', but only if STB_C_LEX_STDLIB==N
  29. #define STB_C_LEX_C_DECIMAL_INTS N // "0|[1-9][0-9]*" CLEX_intlit
  30. #define STB_C_LEX_C_HEX_INTS N // "0x[0-9a-fA-F]+" CLEX_intlit
  31. #define STB_C_LEX_C_OCTAL_INTS N // "[0-7]+" CLEX_intlit
  32. #define STB_C_LEX_C_DECIMAL_FLOATS N // "[0-9]*(.[0-9]*([eE][-+]?[0-9]+)?) CLEX_floatlit
  33. #define STB_C_LEX_C99_HEX_FLOATS N // "0x{hex}+(.{hex}*)?[pP][-+]?{hex}+ CLEX_floatlit
  34. #define STB_C_LEX_C_DQ_STRINGS N // double-quote-delimited strings with escapes CLEX_dqstring
  35. #define STB_C_LEX_C_SQ_STRINGS N // single-quote-delimited strings with escapes CLEX_ssstring
  36. #define STB_C_LEX_C_CHARS N // single-quote-delimited character with escape CLEX_charlits
  37. #define STB_C_LEX_C_COMPARISONS N // "==" CLEX_eq "!=" CLEX_noteq "<=" CLEX_lesseq ">=" CLEX_greatereq
  38. #define STB_C_LEX_C_LOGICAL N // "&&" CLEX_andand "||" CLEX_oror
  39. #define STB_C_LEX_C_SHIFTS N // "<<" CLEX_shl ">>" CLEX_shr
  40. #define STB_C_LEX_C_INCREMENTS N // "++" CLEX_plusplus "--" CLEX_minusminus
  41. #define STB_C_LEX_C_ARROW N // "->" CLEX_arrow
  42. #define STB_C_LEX_EQUAL_ARROW N // "=>" CLEX_eqarrow
  43. #define STB_C_LEX_C_BITWISEEQ N // "&=" CLEX_andeq "|=" CLEX_oreq "^=" CLEX_xoreq
  44. #define STB_C_LEX_C_ARITHEQ N // "+=" CLEX_pluseq "-=" CLEX_minuseq
  45. // "*=" CLEX_muleq "/=" CLEX_diveq "%=" CLEX_modeq
  46. // if both STB_C_LEX_SHIFTS & STB_C_LEX_ARITHEQ:
  47. // "<<=" CLEX_shleq ">>=" CLEX_shreq
  48. #define STB_C_LEX_PARSE_SUFFIXES N // letters after numbers are parsed as part of those numbers, and must be in suffix list below
  49. #define STB_C_LEX_DECIMAL_SUFFIXES "" // decimal integer suffixes e.g. "uUlL" -- these are returned as-is in string storage
  50. #define STB_C_LEX_HEX_SUFFIXES "" // e.g. "uUlL"
  51. #define STB_C_LEX_OCTAL_SUFFIXES "" // e.g. "uUlL"
  52. #define STB_C_LEX_FLOAT_SUFFIXES "" //
  53. #define STB_C_LEXER_IMPLEMENTATION
  54. #include "stb_c_lexer.h"
  55. #include <inttypes.h> // strtoimax
  56. #include <limits.h>
  57. #include <stdio.h> // fread, fseek, ftell
  58. #include <stdlib.h> // malloc, free
  59. #include <stdarg.h> // va_start, va_list, va_end
  60. #include <stdint.h>
  61. #include <string.h> // memcmp
  62. #include <stdbool.h>
  63. static inline void die(const char* format, ...) {
  64. va_list args;
  65. va_start(args, format);
  66. vprintf(format, args);
  67. va_end(args);
  68. exit(1);
  69. }
  70. static inline char* readWholeFile(const char* filepath, size_t *outSize) {
  71. FILE *fp = fopen(filepath, "rb");
  72. if (fp == NULL) {
  73. die("failed to open file: %s", filepath);
  74. }
  75. fseek(fp, 0, SEEK_END);
  76. size_t size = ftell(fp);
  77. fseek(fp, 0L, SEEK_SET);
  78. char *buffer = (char*) malloc(size + 1);
  79. fread(buffer, sizeof (char), size, fp);
  80. buffer[size] = '\0';
  81. fclose(fp);
  82. if (outSize != NULL) *outSize = size;
  83. return buffer;
  84. }
  85. static inline bool isWhitespace(char c) {
  86. return c == ' ' || c == '\r' || c == '\n' || c == '\f' || c == '\t';
  87. }
  88. static inline char* eatWhitespace(char* input) {
  89. char* orig = input;
  90. char c;
  91. while ((c = *input) != '\0') {
  92. if (!isWhitespace(c)) return input;
  93. input++;
  94. }
  95. return orig;
  96. }
  97. // de-duplicates whitespace
  98. static inline char* findNthLastCharOccurence(char* string, int length, char c, int n) {
  99. char* out = NULL;
  100. int _n = 0;
  101. for (int i = length - 1; i > 0; i--) {
  102. if (string[i] == c) _n++;
  103. if (_n == n) return string + i;
  104. while (isWhitespace(string[i]) && i > 0) {
  105. i--;
  106. }
  107. }
  108. return out;
  109. }
  110. static inline int strWrite(char *dest, const char *src, int maxCount) {
  111. int i = 0;
  112. for (; i < maxCount; i++) {
  113. if (src[i] == '\0') {
  114. break;
  115. }
  116. dest[i] = src[i];
  117. }
  118. dest[i] = '\0';
  119. return i;
  120. }
  121. struct Declaration {
  122. char type[64];
  123. char name[64];
  124. ssize_t size;
  125. ssize_t align;
  126. bool isBitfield;
  127. };
  128. struct StructInfo {
  129. char name[64];
  130. char alias[64];
  131. const char *filename;
  132. int lineNumber, lineOffset;
  133. ssize_t size;
  134. struct Declaration declarations[16];
  135. int numDeclarations;
  136. };
  137. static inline void printStructInfo(struct StructInfo *structInfo) {
  138. printf("%s - %d:%d", structInfo->filename, structInfo->lineNumber, structInfo->lineOffset);
  139. printf(" - %s", structInfo->name[0] == '\0' ? "(anonymous struct)" : structInfo->name);
  140. printf(", %s\n", structInfo->alias[0] == '\0' ? "(c++ style, no typedef alias)" : structInfo->alias);
  141. printf(" - total size: %ld\n", structInfo->size);
  142. for (int i = 0; i < structInfo->numDeclarations; i++) {
  143. struct Declaration *decl = structInfo->declarations + i;
  144. printf("\tdecl name: %s, type: %s, size: %ld, alignment: %ld\n", decl->name, decl->type, decl->size, decl->align);
  145. }
  146. }
  147. static int capacityAllStructs = 64;
  148. static int numAllStructs = 0;
  149. static struct StructInfo *allStructs;
  150. void pushStructInfo(struct StructInfo *structInfo) {
  151. if (numAllStructs >= capacityAllStructs) {
  152. capacityAllStructs *= 1.5;
  153. allStructs = realloc(allStructs, sizeof(struct StructInfo) * (capacityAllStructs));
  154. }
  155. memcpy((void*) &allStructs[numAllStructs++], (void*) structInfo, sizeof(struct StructInfo));
  156. }
  157. #include "table.h"
  158. #include "visualization.h"
  159. #define STORE_SIZE 1024*1000
  160. static const int store_size = STORE_SIZE;
  161. static char store[STORE_SIZE] = { 0 };
  162. #undef STORE_SIZE
  163. struct Array {
  164. unsigned int length;
  165. unsigned int capacity;
  166. void* data;
  167. };
  168. void push(struct Array* array, void* item) {
  169. }
  170. void parseType() {
  171. }
  172. static inline bool shouldSkipConst(char* nullTerminated) {
  173. // @HACK skip all instances of 'const'
  174. size_t bounds = sizeof("const");
  175. for (int i = 0; i < bounds; i++) {
  176. char c = nullTerminated[i];
  177. if (c != "const"[i]) return false;
  178. }
  179. return true;
  180. }
  181. static inline void finalizeDeclaration(
  182. char lineBuffer[128],
  183. int lookback,
  184. int numAsterisks,
  185. int numDeclarations,
  186. int arrayVal,
  187. struct StructInfo *structInfo
  188. ) {
  189. printf("LINE BUFFER: |%s|, arrayVal: %d\n", lineBuffer, arrayVal);
  190. // we're at the end of a line of declarations.
  191. // we can learn some interesting stuff by looking back now.
  192. char typeBuffer[64] = { 0 };
  193. char *cursor = findNthLastCharOccurence(lineBuffer, 128, ' ', lookback);
  194. if (cursor == NULL) {
  195. die("panic when finalizing a declaration");
  196. }
  197. struct Declaration *decl = structInfo->declarations + structInfo->numDeclarations;
  198. int diff = (int)(cursor - lineBuffer);
  199. int count = strWrite(typeBuffer, lineBuffer, diff);
  200. int multiplier = 1;
  201. if (arrayVal != -1) {
  202. multiplier = arrayVal;
  203. }
  204. ssize_t totalSize = 0;
  205. TableEntry *entry = lookup(typeTable, typeBuffer);
  206. if (numAsterisks == 0) {
  207. if (entry == NULL) {
  208. // this is likely a new/unknown type in the program. enter it into the type table with an unknown size.
  209. printf("warning: unknown field size and alignment in struct field: %s\n", typeBuffer);
  210. insertPadZeroes(typeTable, typeBuffer, -1, -1);
  211. decl->size = -1;
  212. decl->align = -1;
  213. } else {
  214. decl->size = entry->size * multiplier;
  215. decl->align = entry->align;
  216. }
  217. } else {
  218. decl->size = sizeof(void*) * multiplier;
  219. decl->align = sizeof(void*);
  220. }
  221. // we could have multiple declarations (comma separated)
  222. // they will have to be the same type, except for bitfields (kill me)
  223. // so we'll just copy the type from the first decl, and just move the cursor
  224. // to find the other name.
  225. for (int i = 0; i < numDeclarations; i++) {
  226. decl = structInfo->declarations + structInfo->numDeclarations;
  227. totalSize += decl->size;
  228. structInfo->numDeclarations++;
  229. // write in the type name field.
  230. // for looking up size in the table, we don't want to include the '*'
  231. // but for storing the type name of the decl, we probably do.
  232. for (int i = 0; i < numAsterisks; i++) {
  233. count += strWrite(typeBuffer + count, "*", 1);
  234. }
  235. strWrite(decl->type, typeBuffer, 64);
  236. // figure out the name of this field.
  237. char* nameStart;
  238. char c;
  239. while ((c = *cursor) != '\0') {
  240. if (!isWhitespace(c)) {
  241. nameStart = cursor;
  242. break;
  243. }
  244. cursor++;
  245. }
  246. char* nameEnd;
  247. while ((c = *cursor) != '\0') {
  248. if (isWhitespace(c)) {
  249. nameEnd = cursor;
  250. break;
  251. }
  252. cursor++;
  253. }
  254. int count = strWrite(decl->name, nameStart, (int) (nameEnd-nameStart));
  255. if (arrayVal != -1) {
  256. snprintf(decl->name + count, 64 - count, "[%d]", arrayVal);
  257. }
  258. }
  259. structInfo->size += totalSize;
  260. }
  261. void parseStructDeclaration(struct StructInfo *structInfo, stb_lexer *lexer) {
  262. bool somethingWasConst = false;
  263. bool numDeclarations = 1;
  264. int numAsterisks = 0;
  265. int soFar = 0;
  266. int lookback = 2;
  267. // for parsing things like 'char name[12]'
  268. char* lastOpenBracket = NULL;
  269. int arrayVal = -1;
  270. char lineBuffer[128] = { 0 };
  271. do {
  272. switch (lexer->token) {
  273. case 260: {
  274. // we don't record const because it's annoying.
  275. if (shouldSkipConst(lexer->string)) { somethingWasConst = true; break; }
  276. soFar += strWrite(lineBuffer + soFar, lexer->string, 64);
  277. soFar += strWrite(lineBuffer + soFar, " ", 1);
  278. } break;
  279. case ',':
  280. numDeclarations++;
  281. lookback++;
  282. break;
  283. case '*':
  284. numAsterisks++;
  285. break;
  286. case '[':
  287. lastOpenBracket = lexer->where_firstchar;
  288. break;
  289. case ']':
  290. arrayVal = strtoimax(lastOpenBracket + 1, &lexer->where_firstchar, 10);
  291. if (arrayVal == 0) arrayVal = -1;
  292. break;
  293. case ';': {
  294. finalizeDeclaration(lineBuffer, lookback, numAsterisks, numDeclarations, arrayVal, structInfo);
  295. } return;
  296. }
  297. } while (stb_c_lexer_get_token(lexer) != 0);
  298. }
  299. //
  300. // the token in the lexer is a 'struct' keyword. we want to get the identifiers, and the nested declarations.
  301. //
  302. // <struct-or-union-specifier> ::= <struct-or-union> <identifier> { {<struct-declaration>}+ }
  303. // | <struct-or-union> { {<struct-declaration>}+ }
  304. // | <struct-or-union> <identifier>
  305. void parseStruct(const char *filename, stb_lexer *lexer, bool isClass) {
  306. int result = stb_c_lexer_get_token(lexer);
  307. if (result == 0) die("failed to parse struct");
  308. stb_lex_location location = { 0 };
  309. stb_c_lexer_get_location(lexer, lexer->where_firstchar, &location);
  310. struct StructInfo structInfo = { 0 };
  311. structInfo.filename = filename;
  312. structInfo.lineNumber = location.line_number;
  313. structInfo.lineOffset = location.line_offset;
  314. structInfo.numDeclarations = 0;
  315. structInfo.size = 0;
  316. switch (lexer->token) {
  317. case 260: {
  318. char tempNameBuffer[64] = { 0 };
  319. strWrite(tempNameBuffer, lexer->string, 64);
  320. // maybe a named struct.
  321. result = stb_c_lexer_get_token(lexer);
  322. if (result == 0) die("failed to parse struct");
  323. if (lexer->token == '{') {
  324. strWrite(structInfo.name, tempNameBuffer, 64);
  325. } else {
  326. return;
  327. }
  328. } break;
  329. case '{': {} break;
  330. default: return;
  331. }
  332. int balancer = 1;
  333. while (stb_c_lexer_get_token(lexer) != 0) {
  334. switch (lexer->token) {
  335. case '}': if (--balancer == 0) goto checkTypeAlias;
  336. case '{': ++balancer; break;
  337. case 260: {
  338. parseStructDeclaration(&structInfo, lexer);
  339. } break;
  340. }
  341. }
  342. checkTypeAlias:
  343. result = stb_c_lexer_get_token(lexer);
  344. if (result == 0) die("unexpected end of stream when parsing a struct");
  345. if (lexer->token == 260) {
  346. // we have a type alias for the struct.
  347. // @NOTE @TODO this could also conceivably by the __attribute__ thingy: https://stackoverflow.com/questions/14671253/is-there-a-gcc-keyword-to-allow-structure-reordering
  348. strWrite(structInfo.alias, lexer->string, 64);
  349. }
  350. pushStructInfo(&structInfo);
  351. }
  352. void parseTypedef(stb_lexer *lexer) {
  353. }
  354. void parseFile(const char *filepath) {
  355. printf("parsing file %s...\n", filepath);
  356. size_t size;
  357. char *buffer = readWholeFile(filepath, &size);
  358. stb_lexer lexer;
  359. stb_c_lexer_init(&lexer, buffer, buffer + size + 1, store, store_size);
  360. while (stb_c_lexer_get_token(&lexer) != 0) {
  361. switch (lexer.token) {
  362. case 260: { // token is a string
  363. const uint64_t LE_STRUCT = 0x0000746375727473U;
  364. const uint64_t LE_CLASS = 0x0000007373616C63U;
  365. const uint64_t LE_TYPEDEF = 0x0066656465707974U;
  366. uint64_t t = *((uint64_t*)(lexer.string));
  367. if ((t ) == LE_TYPEDEF) { parseTypedef(&lexer); }
  368. else if ((t & 0x00FFFFFFFFFFFFFF) == LE_STRUCT) { parseStruct(filepath, &lexer, false); }
  369. else if ((t & 0x0000FFFFFFFFFFFF) == LE_CLASS) { parseStruct(filepath, &lexer, true); }
  370. } break;
  371. }
  372. }
  373. free(buffer);
  374. memset(store, 0, store_size);
  375. }
  376. // http://www.catb.org/esr/structure-packing/
  377. int main(int argc, char* argv[]) {
  378. // @TODO check for flag -fshort-enums
  379. allStructs = malloc(sizeof(struct StructInfo) * capacityAllStructs);
  380. typeTable = initTable();
  381. if (CHAR_BIT != 8) {
  382. printf("warning - CHAR_BIT != 8\n");
  383. }
  384. if (false) {
  385. printf("CHAR_BIT = %d\n", CHAR_BIT);
  386. printf("MB_LEN_MAX = %d\n\n", MB_LEN_MAX);
  387. printf("CHAR_MIN = %+d\n", CHAR_MIN);
  388. printf("CHAR_MAX = %+d\n", CHAR_MAX);
  389. printf("SCHAR_MIN = %+d\n", SCHAR_MIN);
  390. printf("SCHAR_MAX = %+d\n", SCHAR_MAX);
  391. printf("UCHAR_MAX = %u\n\n", UCHAR_MAX);
  392. printf("SHRT_MIN = %+d\n", SHRT_MIN);
  393. printf("SHRT_MAX = %+d\n", SHRT_MAX);
  394. printf("USHRT_MAX = %u\n\n", USHRT_MAX);
  395. printf("INT_MIN = %+d\n", INT_MIN);
  396. printf("INT_MAX = %+d\n", INT_MAX);
  397. printf("UINT_MAX = %u\n\n", UINT_MAX);
  398. printf("LONG_MIN = %+ld\n", LONG_MIN);
  399. printf("LONG_MAX = %+ld\n", LONG_MAX);
  400. printf("ULONG_MAX = %lu\n\n", ULONG_MAX);
  401. printf("LLONG_MIN = %+lld\n", LLONG_MIN);
  402. printf("LLONG_MAX = %+lld\n", LLONG_MAX);
  403. printf("ULLONG_MAX = %llu\n\n", ULLONG_MAX);
  404. printf("PTRDIFF_MIN = %td\n", PTRDIFF_MIN);
  405. printf("PTRDIFF_MAX = %+td\n", PTRDIFF_MAX);
  406. printf("SIZE_MAX = %zu\n", SIZE_MAX);
  407. printf("SIG_ATOMIC_MIN = %+jd\n",(intmax_t)SIG_ATOMIC_MIN);
  408. printf("SIG_ATOMIC_MAX = %+jd\n",(intmax_t)SIG_ATOMIC_MAX);
  409. printf("WCHAR_MIN = %+jd\n",(intmax_t)WCHAR_MIN);
  410. printf("WCHAR_MAX = %+jd\n",(intmax_t)WCHAR_MAX);
  411. printf("WINT_MIN = %jd\n", (intmax_t)WINT_MIN);
  412. printf("WINT_MAX = %jd\n", (intmax_t)WINT_MAX);
  413. }
  414. if (argc < 2) {
  415. //die("provide a list of c/c++ files and/or headers to anaylze.");
  416. parseFile(__FILE__);
  417. parseFile("table.h");
  418. parseFile("visualization.h");
  419. parseFile("stb_c_lexer.h");
  420. }
  421. for (int i = 1; i < argc; i++) {
  422. const char *filepath = argv[i];
  423. parseFile(filepath);
  424. }
  425. outputHtml();
  426. return 0;
  427. }