From de51c93eafd40238285452a8298321f8f8ec0f6b Mon Sep 17 00:00:00 2001 From: churchianity Date: Mon, 17 Apr 2023 09:29:16 -0400 Subject: [PATCH] initial --- .gitignore | 3 + base-index.html | 143 ++++++++ index.html | 144 ++++++++ main.c | 516 ++++++++++++++++++++++++++ stb_c_lexer.h | 940 ++++++++++++++++++++++++++++++++++++++++++++++++ table.h | 166 +++++++++ visualization.h | 95 +++++ 7 files changed, 2007 insertions(+) create mode 100644 .gitignore create mode 100644 base-index.html create mode 100644 index.html create mode 100644 main.c create mode 100644 stb_c_lexer.h create mode 100644 table.h create mode 100644 visualization.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..86e11b5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.DS_Store +*.out + diff --git a/base-index.html b/base-index.html new file mode 100644 index 0000000..8049732 --- /dev/null +++ b/base-index.html @@ -0,0 +1,143 @@ + + + + + Struct Visualization + + + + + + + + diff --git a/index.html b/index.html new file mode 100644 index 0000000..19389a4 --- /dev/null +++ b/index.html @@ -0,0 +1,144 @@ + + + + + Struct Visualization + + + + + + + + +
char type[64]
...[32]
char name[64]
...[32]
ssize_t size
ssize_t align
bool isBitfield
char name[64]
...[32]
char alias[64]
...[32]
char* filename
int lineNumber
ssize_t size
?
struct Declaration declarations[16]
...
int numDeclarations
unsigned int length
unsigned int capacity
void* data
struct TableEntry* next
?
char key TABLE_KEY_SIZE
...
ssize_t size
ssize_t align
TableEntry** entries
char* input_stream
char* eof
char* parse_point
char* string_storage
int string_storage_len
char* where_firstchar
char* where_lastchar
long token
double real_number
long int_number
char* string
int string_len
int line_number
int line_offset
\ No newline at end of file diff --git a/main.c b/main.c new file mode 100644 index 0000000..d4171ec --- /dev/null +++ b/main.c @@ -0,0 +1,516 @@ +// +// @TODO +// long-term goals: +// - support C++! +// - support Rust with repr(C) structs to the extent that it is possible! +// - support Golang to the extent that it is possible! +// - support Odin?! +// +// short-term: +// - bitfields! +// - robustness! change [64] byte name fields to pointers! +// - find all files in folder of a given type! +// + + +#define STB_C_LEXER_DEFINITIONS + +#define STB_C_LEX_0_IS_EOF Y // if Y, ends parsing at '\0'; if N, returns '\0' as token +#define STB_C_LEX_USE_STDLIB Y // use strtod,strtol for parsing #s; otherwise inaccurate hack +#define STB_C_LEX_DOLLAR_IDENTIFIER N // allow $ as an identifier character + +#define STB_C_LEX_DEFINE_ALL_TOKEN_NAMES Y // if Y, all CLEX_ token names are defined, even if never returned + // leaving it as N should help you catch config bugs + +#define STB_C_LEX_DISCARD_PREPROCESSOR Y // discard C-preprocessor directives (e.g. after prepocess + // still have #line, #pragma, etc) + +#define STB_C_LEX_MULTILINE_DSTRINGS N // allow newlines in double-quoted strings +#define STB_C_LEX_MULTILINE_SSTRINGS N // allow newlines in single-quoted strings +#define STB_C_LEX_FLOAT_NO_DECIMAL N // allow floats that have no decimal point if they have an exponent + + +#define STB_C_LEX_C_IDENTIFIERS Y // "[_a-zA-Z][_a-zA-Z0-9]*" CLEX_id +#define STB_C_LEX_C_COMMENTS Y // "/* comment */" +#define STB_C_LEX_CPP_COMMENTS Y // "// comment to end of line\n" + +#define STB_C_LEX_INTEGERS_AS_DOUBLES N // parses integers as doubles so they can be larger than 'int', but only if STB_C_LEX_STDLIB==N +#define STB_C_LEX_C_DECIMAL_INTS N // "0|[1-9][0-9]*" CLEX_intlit +#define STB_C_LEX_C_HEX_INTS N // "0x[0-9a-fA-F]+" CLEX_intlit +#define STB_C_LEX_C_OCTAL_INTS N // "[0-7]+" CLEX_intlit +#define STB_C_LEX_C_DECIMAL_FLOATS N // "[0-9]*(.[0-9]*([eE][-+]?[0-9]+)?) CLEX_floatlit +#define STB_C_LEX_C99_HEX_FLOATS N // "0x{hex}+(.{hex}*)?[pP][-+]?{hex}+ CLEX_floatlit +#define STB_C_LEX_C_DQ_STRINGS N // double-quote-delimited strings with escapes CLEX_dqstring +#define STB_C_LEX_C_SQ_STRINGS N // single-quote-delimited strings with escapes CLEX_ssstring +#define STB_C_LEX_C_CHARS N // single-quote-delimited character with escape CLEX_charlits +#define STB_C_LEX_C_COMPARISONS N // "==" CLEX_eq "!=" CLEX_noteq "<=" CLEX_lesseq ">=" CLEX_greatereq +#define STB_C_LEX_C_LOGICAL N // "&&" CLEX_andand "||" CLEX_oror +#define STB_C_LEX_C_SHIFTS N // "<<" CLEX_shl ">>" CLEX_shr +#define STB_C_LEX_C_INCREMENTS N // "++" CLEX_plusplus "--" CLEX_minusminus +#define STB_C_LEX_C_ARROW N // "->" CLEX_arrow +#define STB_C_LEX_EQUAL_ARROW N // "=>" CLEX_eqarrow +#define STB_C_LEX_C_BITWISEEQ N // "&=" CLEX_andeq "|=" CLEX_oreq "^=" CLEX_xoreq +#define STB_C_LEX_C_ARITHEQ N // "+=" CLEX_pluseq "-=" CLEX_minuseq + // "*=" CLEX_muleq "/=" CLEX_diveq "%=" CLEX_modeq + // if both STB_C_LEX_SHIFTS & STB_C_LEX_ARITHEQ: + // "<<=" CLEX_shleq ">>=" CLEX_shreq + +#define STB_C_LEX_PARSE_SUFFIXES N // letters after numbers are parsed as part of those numbers, and must be in suffix list below +#define STB_C_LEX_DECIMAL_SUFFIXES "" // decimal integer suffixes e.g. "uUlL" -- these are returned as-is in string storage +#define STB_C_LEX_HEX_SUFFIXES "" // e.g. "uUlL" +#define STB_C_LEX_OCTAL_SUFFIXES "" // e.g. "uUlL" +#define STB_C_LEX_FLOAT_SUFFIXES "" // + +#define STB_C_LEXER_IMPLEMENTATION +#include "stb_c_lexer.h" + +#include // strtoimax +#include +#include // fread, fseek, ftell +#include // malloc, free +#include // va_start, va_list, va_end +#include +#include // memcmp +#include + + +static inline void die(const char* format, ...) { + va_list args; + va_start(args, format); + vprintf(format, args); + va_end(args); + exit(1); +} + +static inline char* readWholeFile(const char* filepath, size_t *outSize) { + FILE *fp = fopen(filepath, "rb"); + if (fp == NULL) { + die("failed to open file: %s", filepath); + } + fseek(fp, 0, SEEK_END); + size_t size = ftell(fp); + fseek(fp, 0L, SEEK_SET); + char *buffer = (char*) malloc(size + 1); + fread(buffer, sizeof (char), size, fp); + buffer[size] = '\0'; + fclose(fp); + + if (outSize != NULL) *outSize = size; + + return buffer; +} + +static inline bool isWhitespace(char c) { + return c == ' ' || c == '\r' || c == '\n' || c == '\f' || c == '\t'; +} + +static inline char* eatWhitespace(char* input) { + char* orig = input; + char c; + while ((c = *input) != '\0') { + if (!isWhitespace(c)) return input; + input++; + } + return orig; +} + +// de-duplicates whitespace +static inline char* findNthLastCharOccurence(char* string, int length, char c, int n) { + char* out = NULL; + int _n = 0; + for (int i = length - 1; i > 0; i--) { + if (string[i] == c) _n++; + if (_n == n) return string + i; + while (isWhitespace(string[i]) && i > 0) { + i--; + } + } + return out; +} + +static inline int strWrite(char *dest, const char *src, int maxCount) { + int i = 0; + for (; i < maxCount; i++) { + if (src[i] == '\0') { + break; + } + + dest[i] = src[i]; + } + dest[i] = '\0'; + return i; +} +struct Declaration { + char type[64]; + char name[64]; + ssize_t size; + ssize_t align; + bool isBitfield; +}; + +struct StructInfo { + char name[64]; + char alias[64]; + const char *filename; + int lineNumber, lineOffset; + + ssize_t size; + struct Declaration declarations[16]; + int numDeclarations; +}; + +static inline void printStructInfo(struct StructInfo *structInfo) { + printf("%s - %d:%d", structInfo->filename, structInfo->lineNumber, structInfo->lineOffset); + printf(" - %s", structInfo->name[0] == '\0' ? "(anonymous struct)" : structInfo->name); + printf(", %s\n", structInfo->alias[0] == '\0' ? "(c++ style, no typedef alias)" : structInfo->alias); + printf(" - total size: %ld\n", structInfo->size); + for (int i = 0; i < structInfo->numDeclarations; i++) { + struct Declaration *decl = structInfo->declarations + i; + printf("\tdecl name: %s, type: %s, size: %ld, alignment: %ld\n", decl->name, decl->type, decl->size, decl->align); + } +} + +static int capacityAllStructs = 64; +static int numAllStructs = 0; +static struct StructInfo *allStructs; + +void pushStructInfo(struct StructInfo *structInfo) { + if (numAllStructs >= capacityAllStructs) { + capacityAllStructs *= 1.5; + allStructs = realloc(allStructs, sizeof(struct StructInfo) * (capacityAllStructs)); + } + + memcpy((void*) &allStructs[numAllStructs++], (void*) structInfo, sizeof(struct StructInfo)); +} + +#include "table.h" +#include "visualization.h" + + +#define STORE_SIZE 1024*1000 +static const int store_size = STORE_SIZE; +static char store[STORE_SIZE] = { 0 }; +#undef STORE_SIZE + +struct Array { + unsigned int length; + unsigned int capacity; + void* data; +}; +void push(struct Array* array, void* item) { + +} + +void parseType() { + +} + +static inline bool shouldSkipConst(char* nullTerminated) { + // @HACK skip all instances of 'const' + size_t bounds = sizeof("const"); + for (int i = 0; i < bounds; i++) { + char c = nullTerminated[i]; + if (c != "const"[i]) return false; + } + + return true; +} + +static inline void finalizeDeclaration( + char lineBuffer[128], + int lookback, + int numAsterisks, + int numDeclarations, + int arrayVal, + struct StructInfo *structInfo +) { + printf("LINE BUFFER: |%s|, arrayVal: %d\n", lineBuffer, arrayVal); + // we're at the end of a line of declarations. + // we can learn some interesting stuff by looking back now. + char typeBuffer[64] = { 0 }; + char *cursor = findNthLastCharOccurence(lineBuffer, 128, ' ', lookback); + if (cursor == NULL) { + die("panic when finalizing a declaration"); + } + + struct Declaration *decl = structInfo->declarations + structInfo->numDeclarations; + int diff = (int)(cursor - lineBuffer); + int count = strWrite(typeBuffer, lineBuffer, diff); + int multiplier = 1; + if (arrayVal != -1) { + multiplier = arrayVal; + } + + ssize_t totalSize = 0; + TableEntry *entry = lookup(typeTable, typeBuffer); + if (numAsterisks == 0) { + if (entry == NULL) { + // this is likely a new/unknown type in the program. enter it into the type table with an unknown size. + printf("warning: unknown field size and alignment in struct field: %s\n", typeBuffer); + insertPadZeroes(typeTable, typeBuffer, -1, -1); + decl->size = -1; + decl->align = -1; + + } else { + decl->size = entry->size * multiplier; + decl->align = entry->align; + } + } else { + decl->size = sizeof(void*) * multiplier; + decl->align = sizeof(void*); + } + + // we could have multiple declarations (comma separated) + // they will have to be the same type, except for bitfields (kill me) + // so we'll just copy the type from the first decl, and just move the cursor + // to find the other name. + for (int i = 0; i < numDeclarations; i++) { + decl = structInfo->declarations + structInfo->numDeclarations; + totalSize += decl->size; + structInfo->numDeclarations++; + + // write in the type name field. + // for looking up size in the table, we don't want to include the '*' + // but for storing the type name of the decl, we probably do. + for (int i = 0; i < numAsterisks; i++) { + count += strWrite(typeBuffer + count, "*", 1); + } + strWrite(decl->type, typeBuffer, 64); + + // figure out the name of this field. + char* nameStart; + char c; + while ((c = *cursor) != '\0') { + if (!isWhitespace(c)) { + nameStart = cursor; + break; + } + cursor++; + } + char* nameEnd; + while ((c = *cursor) != '\0') { + if (isWhitespace(c)) { + nameEnd = cursor; + break; + } + cursor++; + } + int count = strWrite(decl->name, nameStart, (int) (nameEnd-nameStart)); + if (arrayVal != -1) { + snprintf(decl->name + count, 64 - count, "[%d]", arrayVal); + } + } + + structInfo->size += totalSize; +} + +void parseStructDeclaration(struct StructInfo *structInfo, stb_lexer *lexer) { + bool somethingWasConst = false; + bool numDeclarations = 1; + int numAsterisks = 0; + int soFar = 0; + int lookback = 2; + + // for parsing things like 'char name[12]' + char* lastOpenBracket = NULL; + int arrayVal = -1; + + char lineBuffer[128] = { 0 }; + do { + switch (lexer->token) { + case 260: { + // we don't record const because it's annoying. + if (shouldSkipConst(lexer->string)) { somethingWasConst = true; break; } + + soFar += strWrite(lineBuffer + soFar, lexer->string, 64); + soFar += strWrite(lineBuffer + soFar, " ", 1); + } break; + + case ',': + numDeclarations++; + lookback++; + break; + + case '*': + numAsterisks++; + break; + + case '[': + lastOpenBracket = lexer->where_firstchar; + break; + + case ']': + arrayVal = strtoimax(lastOpenBracket + 1, &lexer->where_firstchar, 10); + if (arrayVal == 0) arrayVal = -1; + break; + + case ';': { + finalizeDeclaration(lineBuffer, lookback, numAsterisks, numDeclarations, arrayVal, structInfo); + } return; + } + } while (stb_c_lexer_get_token(lexer) != 0); +} + +// +// the token in the lexer is a 'struct' keyword. we want to get the identifiers, and the nested declarations. +// +// ::= { {}+ } +// | { {}+ } +// | +void parseStruct(const char *filename, stb_lexer *lexer, bool isClass) { + int result = stb_c_lexer_get_token(lexer); + if (result == 0) die("failed to parse struct"); + + stb_lex_location location = { 0 }; + stb_c_lexer_get_location(lexer, lexer->where_firstchar, &location); + + struct StructInfo structInfo = { 0 }; + structInfo.filename = filename; + structInfo.lineNumber = location.line_number; + structInfo.lineOffset = location.line_offset; + structInfo.numDeclarations = 0; + structInfo.size = 0; + + switch (lexer->token) { + case 260: { + char tempNameBuffer[64] = { 0 }; + strWrite(tempNameBuffer, lexer->string, 64); + + // maybe a named struct. + result = stb_c_lexer_get_token(lexer); + if (result == 0) die("failed to parse struct"); + + if (lexer->token == '{') { + strWrite(structInfo.name, tempNameBuffer, 64); + + } else { + return; + } + } break; + + case '{': {} break; + + default: return; + } + + int balancer = 1; + while (stb_c_lexer_get_token(lexer) != 0) { + switch (lexer->token) { + case '}': if (--balancer == 0) goto checkTypeAlias; + case '{': ++balancer; break; + + case 260: { + parseStructDeclaration(&structInfo, lexer); + } break; + } + } + +checkTypeAlias: + result = stb_c_lexer_get_token(lexer); + if (result == 0) die("unexpected end of stream when parsing a struct"); + + if (lexer->token == 260) { + // we have a type alias for the struct. + // @NOTE @TODO this could also conceivably by the __attribute__ thingy: https://stackoverflow.com/questions/14671253/is-there-a-gcc-keyword-to-allow-structure-reordering + strWrite(structInfo.alias, lexer->string, 64); + } + + pushStructInfo(&structInfo); +} + +void parseTypedef(stb_lexer *lexer) { + +} + + +void parseFile(const char *filepath) { + printf("parsing file %s...\n", filepath); + size_t size; + char *buffer = readWholeFile(filepath, &size); + + stb_lexer lexer; + stb_c_lexer_init(&lexer, buffer, buffer + size + 1, store, store_size); + + while (stb_c_lexer_get_token(&lexer) != 0) { + switch (lexer.token) { + case 260: { // token is a string + const uint64_t LE_STRUCT = 0x0000746375727473U; + const uint64_t LE_CLASS = 0x0000007373616C63U; + const uint64_t LE_TYPEDEF = 0x0066656465707974U; + + uint64_t t = *((uint64_t*)(lexer.string)); + if ((t ) == LE_TYPEDEF) { parseTypedef(&lexer); } + else if ((t & 0x00FFFFFFFFFFFFFF) == LE_STRUCT) { parseStruct(filepath, &lexer, false); } + else if ((t & 0x0000FFFFFFFFFFFF) == LE_CLASS) { parseStruct(filepath, &lexer, true); } + } break; + } + } + free(buffer); + memset(store, 0, store_size); +} + +// http://www.catb.org/esr/structure-packing/ +int main(int argc, char* argv[]) { + // @TODO check for flag -fshort-enums + allStructs = malloc(sizeof(struct StructInfo) * capacityAllStructs); + typeTable = initTable(); + + if (CHAR_BIT != 8) { + printf("warning - CHAR_BIT != 8\n"); + } + + if (false) { + printf("CHAR_BIT = %d\n", CHAR_BIT); + printf("MB_LEN_MAX = %d\n\n", MB_LEN_MAX); + + printf("CHAR_MIN = %+d\n", CHAR_MIN); + printf("CHAR_MAX = %+d\n", CHAR_MAX); + printf("SCHAR_MIN = %+d\n", SCHAR_MIN); + printf("SCHAR_MAX = %+d\n", SCHAR_MAX); + printf("UCHAR_MAX = %u\n\n", UCHAR_MAX); + + printf("SHRT_MIN = %+d\n", SHRT_MIN); + printf("SHRT_MAX = %+d\n", SHRT_MAX); + printf("USHRT_MAX = %u\n\n", USHRT_MAX); + + printf("INT_MIN = %+d\n", INT_MIN); + printf("INT_MAX = %+d\n", INT_MAX); + printf("UINT_MAX = %u\n\n", UINT_MAX); + + printf("LONG_MIN = %+ld\n", LONG_MIN); + printf("LONG_MAX = %+ld\n", LONG_MAX); + printf("ULONG_MAX = %lu\n\n", ULONG_MAX); + + printf("LLONG_MIN = %+lld\n", LLONG_MIN); + printf("LLONG_MAX = %+lld\n", LLONG_MAX); + printf("ULLONG_MAX = %llu\n\n", ULLONG_MAX); + + printf("PTRDIFF_MIN = %td\n", PTRDIFF_MIN); + printf("PTRDIFF_MAX = %+td\n", PTRDIFF_MAX); + printf("SIZE_MAX = %zu\n", SIZE_MAX); + printf("SIG_ATOMIC_MIN = %+jd\n",(intmax_t)SIG_ATOMIC_MIN); + printf("SIG_ATOMIC_MAX = %+jd\n",(intmax_t)SIG_ATOMIC_MAX); + printf("WCHAR_MIN = %+jd\n",(intmax_t)WCHAR_MIN); + printf("WCHAR_MAX = %+jd\n",(intmax_t)WCHAR_MAX); + printf("WINT_MIN = %jd\n", (intmax_t)WINT_MIN); + printf("WINT_MAX = %jd\n", (intmax_t)WINT_MAX); + } + + if (argc < 2) { + //die("provide a list of c/c++ files and/or headers to anaylze."); + parseFile(__FILE__); + parseFile("table.h"); + parseFile("visualization.h"); + parseFile("stb_c_lexer.h"); + } + + for (int i = 1; i < argc; i++) { + const char *filepath = argv[i]; + parseFile(filepath); + } + + outputHtml(); + + return 0; +} + diff --git a/stb_c_lexer.h b/stb_c_lexer.h new file mode 100644 index 0000000..bf89dca --- /dev/null +++ b/stb_c_lexer.h @@ -0,0 +1,940 @@ +// stb_c_lexer.h - v0.12 - public domain Sean Barrett 2013 +// lexer for making little C-like languages with recursive-descent parsers +// +// This file provides both the interface and the implementation. +// To instantiate the implementation, +// #define STB_C_LEXER_IMPLEMENTATION +// in *ONE* source file, before #including this file. +// +// The default configuration is fairly close to a C lexer, although +// suffixes on integer constants are not handled (you can override this). +// +// History: +// 0.12 fix compilation bug for NUL support; better support separate inclusion +// 0.11 fix clang static analysis warning +// 0.10 fix warnings +// 0.09 hex floats, no-stdlib fixes +// 0.08 fix bad pointer comparison +// 0.07 fix mishandling of hexadecimal constants parsed by strtol +// 0.06 fix missing next character after ending quote mark (Andreas Fredriksson) +// 0.05 refixed get_location because github version had lost the fix +// 0.04 fix octal parsing bug +// 0.03 added STB_C_LEX_DISCARD_PREPROCESSOR option +// refactor API to simplify (only one struct instead of two) +// change literal enum names to have 'lit' at the end +// 0.02 first public release +// +// Status: +// - haven't tested compiling as C++ +// - haven't tested the float parsing path +// - haven't tested the non-default-config paths (e.g. non-stdlib) +// - only tested default-config paths by eyeballing output of self-parse +// +// - haven't implemented multiline strings +// - haven't implemented octal/hex character constants +// - haven't implemented support for unicode CLEX_char +// - need to expand error reporting so you don't just get "CLEX_parse_error" +// +// Contributors: +// Arpad Goretity (bugfix) +// Alan Hickman (hex floats) +// +// LICENSE +// +// See end of file for license information. + +#ifdef STB_C_LEXER_IMPLEMENTATION +#ifndef STB_C_LEXER_DEFINITIONS +// to change the default parsing rules, copy the following lines +// into your C/C++ file *before* including this, and then replace +// the Y's with N's for the ones you don't want. This needs to be +// set to the same values for every place in your program where +// stb_c_lexer.h is included. +// --BEGIN-- + +#if defined(Y) || defined(N) +#error "Can only use stb_c_lexer in contexts where the preprocessor symbols 'Y' and 'N' are not defined" +#endif + +#define STB_C_LEX_C_DECIMAL_INTS Y // "0|[1-9][0-9]*" CLEX_intlit +#define STB_C_LEX_C_HEX_INTS Y // "0x[0-9a-fA-F]+" CLEX_intlit +#define STB_C_LEX_C_OCTAL_INTS Y // "[0-7]+" CLEX_intlit +#define STB_C_LEX_C_DECIMAL_FLOATS Y // "[0-9]*(.[0-9]*([eE][-+]?[0-9]+)?) CLEX_floatlit +#define STB_C_LEX_C99_HEX_FLOATS N // "0x{hex}+(.{hex}*)?[pP][-+]?{hex}+ CLEX_floatlit +#define STB_C_LEX_C_IDENTIFIERS Y // "[_a-zA-Z][_a-zA-Z0-9]*" CLEX_id +#define STB_C_LEX_C_DQ_STRINGS Y // double-quote-delimited strings with escapes CLEX_dqstring +#define STB_C_LEX_C_SQ_STRINGS N // single-quote-delimited strings with escapes CLEX_ssstring +#define STB_C_LEX_C_CHARS Y // single-quote-delimited character with escape CLEX_charlits +#define STB_C_LEX_C_COMMENTS Y // "/* comment */" +#define STB_C_LEX_CPP_COMMENTS Y // "// comment to end of line\n" +#define STB_C_LEX_C_COMPARISONS Y // "==" CLEX_eq "!=" CLEX_noteq "<=" CLEX_lesseq ">=" CLEX_greatereq +#define STB_C_LEX_C_LOGICAL Y // "&&" CLEX_andand "||" CLEX_oror +#define STB_C_LEX_C_SHIFTS Y // "<<" CLEX_shl ">>" CLEX_shr +#define STB_C_LEX_C_INCREMENTS Y // "++" CLEX_plusplus "--" CLEX_minusminus +#define STB_C_LEX_C_ARROW Y // "->" CLEX_arrow +#define STB_C_LEX_EQUAL_ARROW N // "=>" CLEX_eqarrow +#define STB_C_LEX_C_BITWISEEQ Y // "&=" CLEX_andeq "|=" CLEX_oreq "^=" CLEX_xoreq +#define STB_C_LEX_C_ARITHEQ Y // "+=" CLEX_pluseq "-=" CLEX_minuseq + // "*=" CLEX_muleq "/=" CLEX_diveq "%=" CLEX_modeq + // if both STB_C_LEX_SHIFTS & STB_C_LEX_ARITHEQ: + // "<<=" CLEX_shleq ">>=" CLEX_shreq + +#define STB_C_LEX_PARSE_SUFFIXES N // letters after numbers are parsed as part of those numbers, and must be in suffix list below +#define STB_C_LEX_DECIMAL_SUFFIXES "" // decimal integer suffixes e.g. "uUlL" -- these are returned as-is in string storage +#define STB_C_LEX_HEX_SUFFIXES "" // e.g. "uUlL" +#define STB_C_LEX_OCTAL_SUFFIXES "" // e.g. "uUlL" +#define STB_C_LEX_FLOAT_SUFFIXES "" // + +#define STB_C_LEX_0_IS_EOF N // if Y, ends parsing at '\0'; if N, returns '\0' as token +#define STB_C_LEX_INTEGERS_AS_DOUBLES N // parses integers as doubles so they can be larger than 'int', but only if STB_C_LEX_STDLIB==N +#define STB_C_LEX_MULTILINE_DSTRINGS N // allow newlines in double-quoted strings +#define STB_C_LEX_MULTILINE_SSTRINGS N // allow newlines in single-quoted strings +#define STB_C_LEX_USE_STDLIB Y // use strtod,strtol for parsing #s; otherwise inaccurate hack +#define STB_C_LEX_DOLLAR_IDENTIFIER Y // allow $ as an identifier character +#define STB_C_LEX_FLOAT_NO_DECIMAL Y // allow floats that have no decimal point if they have an exponent + +#define STB_C_LEX_DEFINE_ALL_TOKEN_NAMES N // if Y, all CLEX_ token names are defined, even if never returned + // leaving it as N should help you catch config bugs + +#define STB_C_LEX_DISCARD_PREPROCESSOR Y // discard C-preprocessor directives (e.g. after prepocess + // still have #line, #pragma, etc) + +//#define STB_C_LEX_ISWHITE(str) ... // return length in bytes of whitespace characters if first char is whitespace + +#define STB_C_LEXER_DEFINITIONS // This line prevents the header file from replacing your definitions +// --END-- +#endif +#endif + +#ifndef INCLUDE_STB_C_LEXER_H +#define INCLUDE_STB_C_LEXER_H + +typedef struct +{ + // lexer variables + char *input_stream; + char *eof; + char *parse_point; + char *string_storage; + int string_storage_len; + + // lexer parse location for error messages + char *where_firstchar; + char *where_lastchar; + + // lexer token variables + long token; + double real_number; + long int_number; + char *string; + int string_len; +} stb_lexer; + +typedef struct +{ + int line_number; + int line_offset; +} stb_lex_location; + +#ifdef __cplusplus +extern "C" { +#endif + +extern void stb_c_lexer_init(stb_lexer *lexer, const char *input_stream, const char *input_stream_end, char *string_store, int store_length); +// this function initialize the 'lexer' structure +// Input: +// - input_stream points to the file to parse, loaded into memory +// - input_stream_end points to the end of the file, or NULL if you use 0-for-EOF +// - string_store is storage the lexer can use for storing parsed strings and identifiers +// - store_length is the length of that storage + +extern int stb_c_lexer_get_token(stb_lexer *lexer); +// this function returns non-zero if a token is parsed, or 0 if at EOF +// Output: +// - lexer->token is the token ID, which is unicode code point for a single-char token, < 0 for a multichar or eof or error +// - lexer->real_number is a double constant value for CLEX_floatlit, or CLEX_intlit if STB_C_LEX_INTEGERS_AS_DOUBLES +// - lexer->int_number is an integer constant for CLEX_intlit if !STB_C_LEX_INTEGERS_AS_DOUBLES, or character for CLEX_charlit +// - lexer->string is a 0-terminated string for CLEX_dqstring or CLEX_sqstring or CLEX_identifier +// - lexer->string_len is the byte length of lexer->string + +extern void stb_c_lexer_get_location(const stb_lexer *lexer, const char *where, stb_lex_location *loc); +// this inefficient function returns the line number and character offset of a +// given location in the file as returned by stb_lex_token. Because it's inefficient, +// you should only call it for errors, not for every token. +// For error messages of invalid tokens, you typically want the location of the start +// of the token (which caused the token to be invalid). For bugs involving legit +// tokens, you can report the first or the range. +// Output: +// - loc->line_number is the line number in the file, counting from 1, of the location +// - loc->line_offset is the char-offset in the line, counting from 0, of the location + + +#ifdef __cplusplus +} +#endif + +enum +{ + CLEX_eof = 256, + CLEX_parse_error, + CLEX_intlit , + CLEX_floatlit , + CLEX_id , + CLEX_dqstring , + CLEX_sqstring , + CLEX_charlit , + CLEX_eq , + CLEX_noteq , + CLEX_lesseq , + CLEX_greatereq , + CLEX_andand , + CLEX_oror , + CLEX_shl , + CLEX_shr , + CLEX_plusplus , + CLEX_minusminus , + CLEX_pluseq , + CLEX_minuseq , + CLEX_muleq , + CLEX_diveq , + CLEX_modeq , + CLEX_andeq , + CLEX_oreq , + CLEX_xoreq , + CLEX_arrow , + CLEX_eqarrow , + CLEX_shleq, CLEX_shreq, + + CLEX_first_unused_token + +}; +#endif // INCLUDE_STB_C_LEXER_H + +#ifdef STB_C_LEXER_IMPLEMENTATION + +// Hacky definitions so we can easily #if on them +#define Y(x) 1 +#define N(x) 0 + +#if STB_C_LEX_INTEGERS_AS_DOUBLES(x) +typedef double stb__clex_int; +#define intfield real_number +#define STB__clex_int_as_double +#else +typedef long stb__clex_int; +#define intfield int_number +#endif + +// Convert these config options to simple conditional #defines so we can more +// easily test them once we've change the meaning of Y/N + +#if STB_C_LEX_PARSE_SUFFIXES(x) +#define STB__clex_parse_suffixes +#endif + +#if STB_C_LEX_C99_HEX_FLOATS(x) +#define STB__clex_hex_floats +#endif + +#if STB_C_LEX_C_HEX_INTS(x) +#define STB__clex_hex_ints +#endif + +#if STB_C_LEX_C_DECIMAL_INTS(x) +#define STB__clex_decimal_ints +#endif + +#if STB_C_LEX_C_OCTAL_INTS(x) +#define STB__clex_octal_ints +#endif + +#if STB_C_LEX_C_DECIMAL_FLOATS(x) +#define STB__clex_decimal_floats +#endif + +#if STB_C_LEX_DISCARD_PREPROCESSOR(x) +#define STB__clex_discard_preprocessor +#endif + +#if STB_C_LEX_USE_STDLIB(x) && (!defined(STB__clex_hex_floats) || __STDC_VERSION__ >= 199901L) +#define STB__CLEX_use_stdlib +#include +#endif + +// Now for the rest of the file we'll use the basic definition where +// where Y expands to its contents and N expands to nothing +#undef Y +#define Y(a) a +#undef N +#define N(a) + +// API function +void stb_c_lexer_init(stb_lexer *lexer, const char *input_stream, const char *input_stream_end, char *string_store, int store_length) +{ + lexer->input_stream = (char *) input_stream; + lexer->eof = (char *) input_stream_end; + lexer->parse_point = (char *) input_stream; + lexer->string_storage = string_store; + lexer->string_storage_len = store_length; +} + +// API function +void stb_c_lexer_get_location(const stb_lexer *lexer, const char *where, stb_lex_location *loc) +{ + char *p = lexer->input_stream; + int line_number = 1; + int char_offset = 0; + while (*p && p < where) { + if (*p == '\n' || *p == '\r') { + p += (p[0]+p[1] == '\r'+'\n' ? 2 : 1); // skip newline + line_number += 1; + char_offset = 0; + } else { + ++p; + ++char_offset; + } + } + loc->line_number = line_number; + loc->line_offset = char_offset; +} + +// main helper function for returning a parsed token +static int stb__clex_token(stb_lexer *lexer, int token, char *start, char *end) +{ + lexer->token = token; + lexer->where_firstchar = start; + lexer->where_lastchar = end; + lexer->parse_point = end+1; + return 1; +} + +// helper function for returning eof +static int stb__clex_eof(stb_lexer *lexer) +{ + lexer->token = CLEX_eof; + return 0; +} + +static int stb__clex_iswhite(int x) +{ + return x == ' ' || x == '\t' || x == '\r' || x == '\n' || x == '\f'; +} + +static const char *stb__strchr(const char *str, int ch) +{ + for (; *str; ++str) + if (*str == ch) + return str; + return 0; +} + +// parse suffixes at the end of a number +static int stb__clex_parse_suffixes(stb_lexer *lexer, long tokenid, char *start, char *cur, const char *suffixes) +{ + #ifdef STB__clex_parse_suffixes + lexer->string = lexer->string_storage; + lexer->string_len = 0; + + while ((*cur >= 'a' && *cur <= 'z') || (*cur >= 'A' && *cur <= 'Z')) { + if (stb__strchr(suffixes, *cur) == 0) + return stb__clex_token(lexer, CLEX_parse_error, start, cur); + if (lexer->string_len+1 >= lexer->string_storage_len) + return stb__clex_token(lexer, CLEX_parse_error, start, cur); + lexer->string[lexer->string_len++] = *cur++; + } + #else + suffixes = suffixes; // attempt to suppress warnings + #endif + return stb__clex_token(lexer, tokenid, start, cur-1); +} + +#ifndef STB__CLEX_use_stdlib +static double stb__clex_pow(double base, unsigned int exponent) +{ + double value=1; + for ( ; exponent; exponent >>= 1) { + if (exponent & 1) + value *= base; + base *= base; + } + return value; +} + +static double stb__clex_parse_float(char *p, char **q) +{ + char *s = p; + double value=0; + int base=10; + int exponent=0; + +#ifdef STB__clex_hex_floats + if (*p == '0') { + if (p[1] == 'x' || p[1] == 'X') { + base=16; + p += 2; + } + } +#endif + + for (;;) { + if (*p >= '0' && *p <= '9') + value = value*base + (*p++ - '0'); +#ifdef STB__clex_hex_floats + else if (base == 16 && *p >= 'a' && *p <= 'f') + value = value*base + 10 + (*p++ - 'a'); + else if (base == 16 && *p >= 'A' && *p <= 'F') + value = value*base + 10 + (*p++ - 'A'); +#endif + else + break; + } + + if (*p == '.') { + double pow, addend = 0; + ++p; + for (pow=1; ; pow*=base) { + if (*p >= '0' && *p <= '9') + addend = addend*base + (*p++ - '0'); +#ifdef STB__clex_hex_floats + else if (base == 16 && *p >= 'a' && *p <= 'f') + addend = addend*base + 10 + (*p++ - 'a'); + else if (base == 16 && *p >= 'A' && *p <= 'F') + addend = addend*base + 10 + (*p++ - 'A'); +#endif + else + break; + } + value += addend / pow; + } +#ifdef STB__clex_hex_floats + if (base == 16) { + // exponent required for hex float literal + if (*p != 'p' && *p != 'P') { + *q = s; + return 0; + } + exponent = 1; + } else +#endif + exponent = (*p == 'e' || *p == 'E'); + + if (exponent) { + int sign = p[1] == '-'; + unsigned int exponent=0; + double power=1; + ++p; + if (*p == '-' || *p == '+') + ++p; + while (*p >= '0' && *p <= '9') + exponent = exponent*10 + (*p++ - '0'); + +#ifdef STB__clex_hex_floats + if (base == 16) + power = stb__clex_pow(2, exponent); + else +#endif + power = stb__clex_pow(10, exponent); + if (sign) + value /= power; + else + value *= power; + } + *q = p; + return value; +} +#endif + +static int stb__clex_parse_char(char *p, char **q) +{ + if (*p == '\\') { + *q = p+2; // tentatively guess we'll parse two characters + switch(p[1]) { + case '\\': return '\\'; + case '\'': return '\''; + case '"': return '"'; + case 't': return '\t'; + case 'f': return '\f'; + case 'n': return '\n'; + case 'r': return '\r'; + case '0': return '\0'; // @TODO ocatal constants + case 'x': case 'X': return -1; // @TODO hex constants + case 'u': return -1; // @TODO unicode constants + } + } + *q = p+1; + return (unsigned char) *p; +} + +static int stb__clex_parse_string(stb_lexer *lexer, char *p, int type) +{ + char *start = p; + char delim = *p++; // grab the " or ' for later matching + char *out = lexer->string_storage; + char *outend = lexer->string_storage + lexer->string_storage_len; + while (*p != delim) { + int n; + if (*p == '\\') { + char *q; + n = stb__clex_parse_char(p, &q); + if (n < 0) + return stb__clex_token(lexer, CLEX_parse_error, start, q); + p = q; + } else { + // @OPTIMIZE: could speed this up by looping-while-not-backslash + n = (unsigned char) *p++; + } + if (out+1 > outend) + return stb__clex_token(lexer, CLEX_parse_error, start, p); + // @TODO expand unicode escapes to UTF8 + *out++ = (char) n; + } + *out = 0; + lexer->string = lexer->string_storage; + lexer->string_len = (int) (out - lexer->string_storage); + return stb__clex_token(lexer, type, start, p); +} + +int stb_c_lexer_get_token(stb_lexer *lexer) +{ + char *p = lexer->parse_point; + + // skip whitespace and comments + for (;;) { + #ifdef STB_C_LEX_ISWHITE + while (p != lexer->stream_end) { + int n; + n = STB_C_LEX_ISWHITE(p); + if (n == 0) break; + if (lexer->eof && lexer->eof - lexer->parse_point < n) + return stb__clex_token(tok, CLEX_parse_error, p,lexer->eof-1); + p += n; + } + #else + while (p != lexer->eof && stb__clex_iswhite(*p)) + ++p; + #endif + + STB_C_LEX_CPP_COMMENTS( + if (p != lexer->eof && p[0] == '/' && p[1] == '/') { + while (p != lexer->eof && *p != '\r' && *p != '\n') + ++p; + continue; + } + ) + + STB_C_LEX_C_COMMENTS( + if (p != lexer->eof && p[0] == '/' && p[1] == '*') { + char *start = p; + p += 2; + while (p != lexer->eof && (p[0] != '*' || p[1] != '/')) + ++p; + if (p == lexer->eof) + return stb__clex_token(lexer, CLEX_parse_error, start, p-1); + p += 2; + continue; + } + ) + + #ifdef STB__clex_discard_preprocessor + // @TODO this discards everything after a '#', regardless + // of where in the line the # is, rather than requiring it + // be at the start. (because this parser doesn't otherwise + // check for line breaks!) + if (p != lexer->eof && p[0] == '#') { + while (p != lexer->eof && *p != '\r' && *p != '\n') + ++p; + continue; + } + #endif + + break; + } + + if (p == lexer->eof) + return stb__clex_eof(lexer); + + switch (*p) { + default: + if ( (*p >= 'a' && *p <= 'z') + || (*p >= 'A' && *p <= 'Z') + || *p == '_' || (unsigned char) *p >= 128 // >= 128 is UTF8 char + STB_C_LEX_DOLLAR_IDENTIFIER( || *p == '$' ) ) + { + int n = 0; + lexer->string = lexer->string_storage; + lexer->string_len = n; + do { + if (n+1 >= lexer->string_storage_len) + return stb__clex_token(lexer, CLEX_parse_error, p, p+n); + lexer->string[n] = p[n]; + ++n; + } while ( + (p[n] >= 'a' && p[n] <= 'z') + || (p[n] >= 'A' && p[n] <= 'Z') + || (p[n] >= '0' && p[n] <= '9') // allow digits in middle of identifier + || p[n] == '_' || (unsigned char) p[n] >= 128 + STB_C_LEX_DOLLAR_IDENTIFIER( || p[n] == '$' ) + ); + lexer->string[n] = 0; + return stb__clex_token(lexer, CLEX_id, p, p+n-1); + } + + // check for EOF + STB_C_LEX_0_IS_EOF( + if (*p == 0) + return stb__clex_eof(lexer); + ) + + single_char: + // not an identifier, return the character as itself + return stb__clex_token(lexer, *p, p, p); + + case '+': + if (p+1 != lexer->eof) { + STB_C_LEX_C_INCREMENTS(if (p[1] == '+') return stb__clex_token(lexer, CLEX_plusplus, p,p+1);) + STB_C_LEX_C_ARITHEQ( if (p[1] == '=') return stb__clex_token(lexer, CLEX_pluseq , p,p+1);) + } + goto single_char; + case '-': + if (p+1 != lexer->eof) { + STB_C_LEX_C_INCREMENTS(if (p[1] == '-') return stb__clex_token(lexer, CLEX_minusminus, p,p+1);) + STB_C_LEX_C_ARITHEQ( if (p[1] == '=') return stb__clex_token(lexer, CLEX_minuseq , p,p+1);) + STB_C_LEX_C_ARROW( if (p[1] == '>') return stb__clex_token(lexer, CLEX_arrow , p,p+1);) + } + goto single_char; + case '&': + if (p+1 != lexer->eof) { + STB_C_LEX_C_LOGICAL( if (p[1] == '&') return stb__clex_token(lexer, CLEX_andand, p,p+1);) + STB_C_LEX_C_BITWISEEQ(if (p[1] == '=') return stb__clex_token(lexer, CLEX_andeq , p,p+1);) + } + goto single_char; + case '|': + if (p+1 != lexer->eof) { + STB_C_LEX_C_LOGICAL( if (p[1] == '|') return stb__clex_token(lexer, CLEX_oror, p,p+1);) + STB_C_LEX_C_BITWISEEQ(if (p[1] == '=') return stb__clex_token(lexer, CLEX_oreq, p,p+1);) + } + goto single_char; + case '=': + if (p+1 != lexer->eof) { + STB_C_LEX_C_COMPARISONS(if (p[1] == '=') return stb__clex_token(lexer, CLEX_eq, p,p+1);) + STB_C_LEX_EQUAL_ARROW( if (p[1] == '>') return stb__clex_token(lexer, CLEX_eqarrow, p,p+1);) + } + goto single_char; + case '!': + STB_C_LEX_C_COMPARISONS(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_noteq, p,p+1);) + goto single_char; + case '^': + STB_C_LEX_C_BITWISEEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_xoreq, p,p+1)); + goto single_char; + case '%': + STB_C_LEX_C_ARITHEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_modeq, p,p+1)); + goto single_char; + case '*': + STB_C_LEX_C_ARITHEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_muleq, p,p+1)); + goto single_char; + case '/': + STB_C_LEX_C_ARITHEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_diveq, p,p+1)); + goto single_char; + case '<': + if (p+1 != lexer->eof) { + STB_C_LEX_C_COMPARISONS(if (p[1] == '=') return stb__clex_token(lexer, CLEX_lesseq, p,p+1);) + STB_C_LEX_C_SHIFTS( if (p[1] == '<') { + STB_C_LEX_C_ARITHEQ(if (p+2 != lexer->eof && p[2] == '=') + return stb__clex_token(lexer, CLEX_shleq, p,p+2);) + return stb__clex_token(lexer, CLEX_shl, p,p+1); + } + ) + } + goto single_char; + case '>': + if (p+1 != lexer->eof) { + STB_C_LEX_C_COMPARISONS(if (p[1] == '=') return stb__clex_token(lexer, CLEX_greatereq, p,p+1);) + STB_C_LEX_C_SHIFTS( if (p[1] == '>') { + STB_C_LEX_C_ARITHEQ(if (p+2 != lexer->eof && p[2] == '=') + return stb__clex_token(lexer, CLEX_shreq, p,p+2);) + return stb__clex_token(lexer, CLEX_shr, p,p+1); + } + ) + } + goto single_char; + + case '"': + STB_C_LEX_C_DQ_STRINGS(return stb__clex_parse_string(lexer, p, CLEX_dqstring);) + goto single_char; + case '\'': + STB_C_LEX_C_SQ_STRINGS(return stb__clex_parse_string(lexer, p, CLEX_sqstring);) + STB_C_LEX_C_CHARS( + { + char *start = p; + lexer->int_number = stb__clex_parse_char(p+1, &p); + if (lexer->int_number < 0) + return stb__clex_token(lexer, CLEX_parse_error, start,start); + if (p == lexer->eof || *p != '\'') + return stb__clex_token(lexer, CLEX_parse_error, start,p); + return stb__clex_token(lexer, CLEX_charlit, start, p+1); + }) + goto single_char; + + case '0': + #if defined(STB__clex_hex_ints) || defined(STB__clex_hex_floats) + if (p+1 != lexer->eof) { + if (p[1] == 'x' || p[1] == 'X') { + char *q; + + #ifdef STB__clex_hex_floats + for (q=p+2; + q != lexer->eof && ((*q >= '0' && *q <= '9') || (*q >= 'a' && *q <= 'f') || (*q >= 'A' && *q <= 'F')); + ++q); + if (q != lexer->eof) { + if (*q == '.' STB_C_LEX_FLOAT_NO_DECIMAL(|| *q == 'p' || *q == 'P')) { + #ifdef STB__CLEX_use_stdlib + lexer->real_number = strtod((char *) p, (char**) &q); + #else + lexer->real_number = stb__clex_parse_float(p, &q); + #endif + + if (p == q) + return stb__clex_token(lexer, CLEX_parse_error, p,q); + return stb__clex_parse_suffixes(lexer, CLEX_floatlit, p,q, STB_C_LEX_FLOAT_SUFFIXES); + + } + } + #endif // STB__CLEX_hex_floats + + #ifdef STB__clex_hex_ints + #ifdef STB__CLEX_use_stdlib + lexer->int_number = strtol((char *) p, (char **) &q, 16); + #else + { + stb__clex_int n=0; + for (q=p+2; q != lexer->eof; ++q) { + if (*q >= '0' && *q <= '9') + n = n*16 + (*q - '0'); + else if (*q >= 'a' && *q <= 'f') + n = n*16 + (*q - 'a') + 10; + else if (*q >= 'A' && *q <= 'F') + n = n*16 + (*q - 'A') + 10; + else + break; + } + lexer->int_number = n; + } + #endif + if (q == p+2) + return stb__clex_token(lexer, CLEX_parse_error, p-2,p-1); + return stb__clex_parse_suffixes(lexer, CLEX_intlit, p,q, STB_C_LEX_HEX_SUFFIXES); + #endif + } + } + #endif // defined(STB__clex_hex_ints) || defined(STB__clex_hex_floats) + // can't test for octal because we might parse '0.0' as float or as '0' '.' '0', + // so have to do float first + + /* FALL THROUGH */ + case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': + + #ifdef STB__clex_decimal_floats + { + char *q = p; + while (q != lexer->eof && (*q >= '0' && *q <= '9')) + ++q; + if (q != lexer->eof) { + if (*q == '.' STB_C_LEX_FLOAT_NO_DECIMAL(|| *q == 'e' || *q == 'E')) { + #ifdef STB__CLEX_use_stdlib + lexer->real_number = strtod((char *) p, (char**) &q); + #else + lexer->real_number = stb__clex_parse_float(p, &q); + #endif + + return stb__clex_parse_suffixes(lexer, CLEX_floatlit, p,q, STB_C_LEX_FLOAT_SUFFIXES); + + } + } + } + #endif // STB__clex_decimal_floats + + #ifdef STB__clex_octal_ints + if (p[0] == '0') { + char *q = p; + #ifdef STB__CLEX_use_stdlib + lexer->int_number = strtol((char *) p, (char **) &q, 8); + #else + stb__clex_int n=0; + while (q != lexer->eof) { + if (*q >= '0' && *q <= '7') + n = n*8 + (*q - '0'); + else + break; + ++q; + } + if (q != lexer->eof && (*q == '8' || *q=='9')) + return stb__clex_token(lexer, CLEX_parse_error, p, q); + lexer->int_number = n; + #endif + return stb__clex_parse_suffixes(lexer, CLEX_intlit, p,q, STB_C_LEX_OCTAL_SUFFIXES); + } + #endif // STB__clex_octal_ints + + #ifdef STB__clex_decimal_ints + { + char *q = p; + #ifdef STB__CLEX_use_stdlib + lexer->int_number = strtol((char *) p, (char **) &q, 10); + #else + stb__clex_int n=0; + while (q != lexer->eof) { + if (*q >= '0' && *q <= '9') + n = n*10 + (*q - '0'); + else + break; + ++q; + } + lexer->int_number = n; + #endif + return stb__clex_parse_suffixes(lexer, CLEX_intlit, p,q, STB_C_LEX_OCTAL_SUFFIXES); + } + #endif // STB__clex_decimal_ints + goto single_char; + } +} +#endif // STB_C_LEXER_IMPLEMENTATION + +#ifdef STB_C_LEXER_SELF_TEST +#define _CRT_SECURE_NO_WARNINGS +#include +#include + +static void print_token(stb_lexer *lexer) +{ + switch (lexer->token) { + case CLEX_id : printf("_%s", lexer->string); break; + case CLEX_eq : printf("=="); break; + case CLEX_noteq : printf("!="); break; + case CLEX_lesseq : printf("<="); break; + case CLEX_greatereq : printf(">="); break; + case CLEX_andand : printf("&&"); break; + case CLEX_oror : printf("||"); break; + case CLEX_shl : printf("<<"); break; + case CLEX_shr : printf(">>"); break; + case CLEX_plusplus : printf("++"); break; + case CLEX_minusminus: printf("--"); break; + case CLEX_arrow : printf("->"); break; + case CLEX_andeq : printf("&="); break; + case CLEX_oreq : printf("|="); break; + case CLEX_xoreq : printf("^="); break; + case CLEX_pluseq : printf("+="); break; + case CLEX_minuseq : printf("-="); break; + case CLEX_muleq : printf("*="); break; + case CLEX_diveq : printf("/="); break; + case CLEX_modeq : printf("%%="); break; + case CLEX_shleq : printf("<<="); break; + case CLEX_shreq : printf(">>="); break; + case CLEX_eqarrow : printf("=>"); break; + case CLEX_dqstring : printf("\"%s\"", lexer->string); break; + case CLEX_sqstring : printf("'\"%s\"'", lexer->string); break; + case CLEX_charlit : printf("'%s'", lexer->string); break; + #if defined(STB__clex_int_as_double) && !defined(STB__CLEX_use_stdlib) + case CLEX_intlit : printf("#%g", lexer->real_number); break; + #else + case CLEX_intlit : printf("#%ld", lexer->int_number); break; + #endif + case CLEX_floatlit : printf("%g", lexer->real_number); break; + default: + if (lexer->token >= 0 && lexer->token < 256) + printf("%c", (int) lexer->token); + else { + printf("<<>>\n", lexer->token); + } + break; + } +} + +/* Force a test +of parsing +multiline comments */ + +/*/ comment /*/ +/**/ extern /**/ + +void dummy(void) +{ + double some_floats[] = { + 1.0501, -10.4e12, 5E+10, +#if 0 // not supported in C++ or C-pre-99, so don't try to compile it, but let our parser test it + 0x1.0p+24, 0xff.FP-8, 0x1p-23, +#endif + 4. + }; + (void) sizeof(some_floats); + (void) some_floats[1]; + + printf("test %d",1); // https://github.com/nothings/stb/issues/13 +} + +int main(int argc, char **argv) +{ + FILE *f = fopen("stb_c_lexer.h","rb"); + char *text = (char *) malloc(1 << 20); + int len = f ? (int) fread(text, 1, 1<<20, f) : -1; + stb_lexer lex; + if (len < 0) { + fprintf(stderr, "Error opening file\n"); + free(text); + fclose(f); + return 1; + } + fclose(f); + + stb_c_lexer_init(&lex, text, text+len, (char *) malloc(0x10000), 0x10000); + while (stb_c_lexer_get_token(&lex)) { + if (lex.token == CLEX_parse_error) { + printf("\n<<>>\n"); + break; + } + print_token(&lex); + printf(" "); + } + return 0; +} +#endif +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/table.h b/table.h new file mode 100644 index 0000000..79a80bf --- /dev/null +++ b/table.h @@ -0,0 +1,166 @@ + +#define TABLE_KEY_SIZE 64 +#define TABLE_NUM_LANES 32 +static inline unsigned int APHash(const char* str, unsigned int length) { + unsigned int hash = 0xAAAAAAAA; + unsigned int i = 0; + + for (i = 0; i < length; ++str, ++i) + { + hash ^= ((i & 1) == 0) ? ( (hash << 7) ^ (*str) * (hash >> 3)) : + (~((hash << 11) + ((*str) ^ (hash >> 5)))); + } + + return hash; +} + +static inline uint32_t fastModuloReductionDanielLemire(uint32_t v, uint32_t c) { + return (((uint64_t)v) * ((uint64_t)c)) >> 32; +} + +static inline uint32_t hash(const char* key, uint32_t keyLength, uint32_t capacity) { + uint32_t value = APHash(key, keyLength); + + return fastModuloReductionDanielLemire(value, capacity); +} + +typedef struct TableEntry { + struct TableEntry* next; + + const char key[TABLE_KEY_SIZE]; + ssize_t size; + ssize_t align; +} TableEntry; + +typedef struct Table { + TableEntry **entries; +} Table; + +static inline TableEntry* lookup(Table *table, const char key[TABLE_KEY_SIZE]) { + TableEntry *entry = table->entries[hash(key, TABLE_KEY_SIZE, TABLE_NUM_LANES)]; + + for (; entry != NULL; entry = entry->next) { + if (memcmp(entry->key, key, TABLE_KEY_SIZE) == 0) { + return entry; + } + } + + return NULL; +} + +static inline void insert(Table *table, const char key[TABLE_KEY_SIZE], ssize_t size, ssize_t align) { + TableEntry *entry = lookup(table, key); + + if (!entry) { // no entry with that key exists + entry = (TableEntry*) calloc(sizeof(TableEntry), 1); + strWrite((char*)entry->key, key, TABLE_KEY_SIZE); + entry->size = size; + entry->align = align; + + unsigned int hashValue = hash(key, TABLE_KEY_SIZE, TABLE_NUM_LANES); + entry->next = table->entries[hashValue]; + table->entries[hashValue] = entry; + + } else { // entry already exists, replace its value + entry->size = size; + entry->align = align; + } +} + +static inline void insertPadZeroes(Table* table, const char* keyToPad, ssize_t size, ssize_t align) { + char scratch[64] = { 0 }; + for (int i = 0; i < TABLE_KEY_SIZE; i++) { + if (keyToPad[i] == '\0') break; + scratch[i] = keyToPad[i]; + } + + insert(table, scratch, size, align); +} + +static inline void traversePrint(Table *table) { + for (unsigned int i = 0; i < TABLE_NUM_LANES; i++) { + TableEntry *entry = table->entries[i]; + + while (entry != NULL) { + printf("entry key: %-64s, size: %lu, align: %lu\n", entry->key, entry->size, entry->align); + entry = entry->next; + } + } +} + +static inline Table* initTable() { + Table *table = malloc(sizeof(Table)); + table->entries = (TableEntry**) calloc(sizeof(TableEntry*), TABLE_NUM_LANES); + + insertPadZeroes(table, "char", sizeof(char), sizeof(char)); + insertPadZeroes(table, "signed char", sizeof(signed char), sizeof(signed char)); + insertPadZeroes(table, "unsigned char", sizeof(unsigned char), sizeof(unsigned char)); + insertPadZeroes(table, "short", sizeof(short), sizeof(short)); + insertPadZeroes(table, "short int", sizeof(short int), sizeof(short int)); + insertPadZeroes(table, "signed short", sizeof(signed short), sizeof(signed short)); + insertPadZeroes(table, "signed short int", sizeof(signed short int), sizeof(signed short int)); + insertPadZeroes(table, "unsigned short", sizeof(unsigned short), sizeof(unsigned short)); + insertPadZeroes(table, "unsigned short int", sizeof(unsigned short int), sizeof(unsigned short int)); + insertPadZeroes(table, "int", sizeof(int), sizeof(int)); + insertPadZeroes(table, "signed", sizeof(signed), sizeof(signed)); + insertPadZeroes(table, "signed int", sizeof(signed int), sizeof(signed int)); + insertPadZeroes(table, "unsigned", sizeof(unsigned), sizeof(unsigned)); + insertPadZeroes(table, "unsigned int", sizeof(unsigned int), sizeof(unsigned int)); + insertPadZeroes(table, "long", sizeof(long), sizeof(long)); + insertPadZeroes(table, "long int", sizeof(long int), sizeof(long int)); + insertPadZeroes(table, "signed long", sizeof(signed long), sizeof(signed long)); + insertPadZeroes(table, "signed long int", sizeof(signed long int), sizeof(signed long int)); + insertPadZeroes(table, "unsigned long", sizeof(unsigned long), sizeof(unsigned long)); + insertPadZeroes(table, "unsigned long int", sizeof(unsigned long int), sizeof(unsigned long int)); + insertPadZeroes(table, "long long", sizeof(long long), sizeof(long long)); + insertPadZeroes(table, "long long int", sizeof(long long int), sizeof(long long int)); + insertPadZeroes(table, "signed long long", sizeof(signed long long), sizeof(signed long long)); + insertPadZeroes(table, "signed long long int", sizeof(signed long long int), sizeof(signed long long int)); + insertPadZeroes(table, "unsigned long long", sizeof(unsigned long long), sizeof(unsigned long long)); + insertPadZeroes(table, "unsigned long long int", sizeof(unsigned long long int), sizeof(unsigned long long int)); + insertPadZeroes(table, "float", sizeof(float), sizeof(float)); + insertPadZeroes(table, "double", sizeof(double), sizeof(double)); + insertPadZeroes(table, "long double", sizeof(long double), sizeof(long double)); + + insertPadZeroes(table, "size_t", sizeof(size_t), sizeof(size_t)); + insertPadZeroes(table, "ssize_t", sizeof(ssize_t), sizeof(ssize_t)); + insertPadZeroes(table, "bool", sizeof(bool), sizeof(bool)); + insertPadZeroes(table, "_Bool", sizeof(_Bool), sizeof(_Bool)); + + //char + //signed char + //unsigned char + //short + //short int + //signed short + //signed short int + //unsigned short + //unsigned short int + //int + //signed + //signed int + //unsigned + //unsigned int + //long + //long int + //signed long + //signed long int + //unsigned long + //unsigned long int + //long long + //long long int + //signed long long + //signed long long int + //unsigned long long + //unsigned long long int + //float + //double + //long double + + //traversePrint(table); + + return table; +} + +static Table *typeTable; + diff --git a/visualization.h b/visualization.h new file mode 100644 index 0000000..fc651a4 --- /dev/null +++ b/visualization.h @@ -0,0 +1,95 @@ + +static size_t indexHtmlSize = 0; +static char* indexHtml = NULL; + +#define sb_concatf(fmt, ...) \ + if ((sbc - sbi) < 1024) { \ + sbc *= 1.5; \ + stringBuffer = realloc(stringBuffer, sbc); \ + } \ + if ((result = snprintf(stringBuffer + sbi, sbc - sbi, fmt, ##__VA_ARGS__)) > 0) sbi += result; \ + else die("fatal error concating to string"); + + +static void outputHtml() { + static int sbi = 0; + static int sbc = 50 * 1024; + static char* stringBuffer = NULL; + + if (stringBuffer == NULL) stringBuffer = malloc(sizeof(char) * sbc); + if (indexHtml == NULL) indexHtml = readWholeFile("base-index.html", &indexHtmlSize); + + int result = 0; + sb_concatf("%s", indexHtml); + + for (int i = 0; i < numAllStructs; i++) { + struct StructInfo *structInfo = allStructs + i; + printStructInfo(structInfo); + + ssize_t byteCounter = 0; + sb_concatf( + "
" + "" + "
" + , structInfo->name, structInfo->alias); + for (int d = 0; d < structInfo->numDeclarations; d++) { + struct Declaration *decl = structInfo->declarations + d; + bool truncate32 = false; + if (decl->size > 32) { + truncate32 = true; + } + + sb_concatf("%s", "
"); + + bool first = (d % 2) == 0; + const char* positionClass = first ? "struct-info-declaration-top" : "struct-info-declaration-bottom"; + if (decl->size == -1) { + sb_concatf( + "
?" + "
" + "%s %s" + "
" + "
" + "
...
" + , positionClass, decl->type, decl->name); + } else { + for (int b = 0; b < decl->size; b++) { + if (b == 0) { + sb_concatf( + "
" + "
" + "%s %s" + "
" + "
" + , positionClass, decl->type, decl->name); + } else if (truncate32 && b == 32) { + sb_concatf("
...[%ld]
", decl->size - b); + break; + + } else { + sb_concatf("%s", "
"); + } + } + } + sb_concatf("%s", "
"); + } + sb_concatf("%s", "
"); + } + + // don't forget the closing body and html tags + sb_concatf("%s", ""); + + // write the index.html file out to disk + FILE* fp = fopen("index.html", "wb"); + if (fp == NULL) { + die("failed to open the file index.html"); + } + + size_t writtenCount = fwrite(stringBuffer, 1, sbi, fp); + fclose(fp); + + if (writtenCount != sbi) { + die("wrote only partially"); + } +} +