churchianity
2 years ago
7 changed files with 2007 additions and 0 deletions
-
3.gitignore
-
143base-index.html
-
144index.html
-
516main.c
-
940stb_c_lexer.h
-
166table.h
-
95visualization.h
@ -0,0 +1,3 @@ |
|||||
|
.DS_Store |
||||
|
*.out |
||||
|
|
@ -0,0 +1,143 @@ |
|||||
|
<!doctype html> |
||||
|
<html lang="en"> |
||||
|
<head> |
||||
|
<meta charset="utf-8"> |
||||
|
<title>Struct Visualization</title> |
||||
|
<base href="/"> |
||||
|
<meta name="viewport" content="width=device-width, initial-scale=1"> |
||||
|
<link rel="icon" type="image/x-icon" href="favicon.ico"> |
||||
|
</head> |
||||
|
<style> |
||||
|
:root { |
||||
|
--r1: pink; |
||||
|
--r2: blue; |
||||
|
--r3: magenta; |
||||
|
--r4: yellow; |
||||
|
--r5: cyan; |
||||
|
--r6: orange; |
||||
|
--r7: lime; |
||||
|
--r8: olive; |
||||
|
} |
||||
|
|
||||
|
*::before, |
||||
|
*::after, |
||||
|
* { |
||||
|
margin: 0; |
||||
|
padding: 0; |
||||
|
box-sizing: border-box; |
||||
|
} |
||||
|
|
||||
|
html, body { |
||||
|
min-height: 100vh; |
||||
|
height: 100vh; |
||||
|
overscroll-behavior: none; |
||||
|
background: #F0EAD6; |
||||
|
|
||||
|
direction: ltr; |
||||
|
font: 16px HelveticaNeue-Light,Helvetica Neue Light,Helvetica Neue,Helvetica,Arial,Lucida Grande,sans-serif; |
||||
|
} |
||||
|
|
||||
|
.struct-info { |
||||
|
padding: 0 20px 20px 20px; |
||||
|
border-bottom: 1px solid silver; |
||||
|
margin-bottom: 20px; |
||||
|
} |
||||
|
|
||||
|
.struct-info-declaration { |
||||
|
position: absolute; |
||||
|
font-family: monospace; |
||||
|
min-width: min-content; |
||||
|
max-width: 20em; |
||||
|
text-overflow: ellipsis; |
||||
|
white-space: nowrap; |
||||
|
font-size: 0.9em; |
||||
|
transform: translateX(calc(50% - 10px)); |
||||
|
color: black; |
||||
|
font-weight: normal; |
||||
|
} |
||||
|
.struct-info-declaration-name { |
||||
|
font: 16px HelveticaNeue-Light,Helvetica Neue Light,Helvetica Neue,Helvetica,Arial,Lucida Grande,sans-serif; |
||||
|
} |
||||
|
|
||||
|
.struct-info-declaration-top { |
||||
|
top: -30px; |
||||
|
} |
||||
|
.struct-info-declaration-bottom { |
||||
|
bottom: -30px; |
||||
|
} |
||||
|
|
||||
|
.struct-info-byte-row-ellipsis { |
||||
|
position: absolute; |
||||
|
right: 1em; |
||||
|
} |
||||
|
|
||||
|
.struct-info-byte-rows { |
||||
|
display: flex; |
||||
|
flex-direction: column; |
||||
|
justify-content: flex-start; |
||||
|
align-items: flex-start; |
||||
|
} |
||||
|
|
||||
|
.struct-info-byte-row { |
||||
|
display: flex; |
||||
|
flex-direction: row; |
||||
|
justify-content: flex-start; |
||||
|
align-items: center; |
||||
|
} |
||||
|
|
||||
|
.struct-info-bytegroup { |
||||
|
margin: 2em 0; |
||||
|
|
||||
|
display: flex; |
||||
|
flex-wrap: wrap; |
||||
|
flex-direction: row; |
||||
|
justify-content: flex-start; |
||||
|
align-items: center; |
||||
|
} |
||||
|
|
||||
|
.struct-info-bytegroup:hover { |
||||
|
background-color: orange; |
||||
|
|
||||
|
.struct-info-declaration { |
||||
|
font-weight: bold; |
||||
|
color: orange; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
.struct-info-header { |
||||
|
font-weight: bold; |
||||
|
font-family: monospace; |
||||
|
} |
||||
|
|
||||
|
.struct-info-byte { |
||||
|
position: relative; |
||||
|
display: flex; |
||||
|
justify-content: center; |
||||
|
align-items: center; |
||||
|
border: 1px solid black; |
||||
|
width: 30px; |
||||
|
height: 45px; |
||||
|
} |
||||
|
.struct-info-byte-first { |
||||
|
border-left-width: 3px; |
||||
|
} |
||||
|
.struct-info-byte-unknown { |
||||
|
border-left-width: 3px; |
||||
|
background-color: maroon; |
||||
|
font-weight: bold; |
||||
|
color: white; |
||||
|
} |
||||
|
.struct-info-byte-counted-ellipsis { |
||||
|
background-color: #808000; |
||||
|
font-weight: bold; |
||||
|
} |
||||
|
.struct-info-byte-ellipsis { |
||||
|
background-color: #400000; |
||||
|
font-weight: bold; |
||||
|
color: white; |
||||
|
} |
||||
|
</style> |
||||
|
<script> |
||||
|
</script> |
||||
|
<body> |
||||
|
<!-- c code will terminate the body and html tags --> |
144
index.html
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -0,0 +1,516 @@ |
|||||
|
// |
||||
|
// @TODO |
||||
|
// long-term goals: |
||||
|
// - support C++! |
||||
|
// - support Rust with repr(C) structs to the extent that it is possible! |
||||
|
// - support Golang to the extent that it is possible! |
||||
|
// - support Odin?! |
||||
|
// |
||||
|
// short-term: |
||||
|
// - bitfields! |
||||
|
// - robustness! change [64] byte name fields to pointers! |
||||
|
// - find all files in folder of a given type! |
||||
|
// |
||||
|
|
||||
|
|
||||
|
#define STB_C_LEXER_DEFINITIONS |
||||
|
|
||||
|
#define STB_C_LEX_0_IS_EOF Y // if Y, ends parsing at '\0'; if N, returns '\0' as token |
||||
|
#define STB_C_LEX_USE_STDLIB Y // use strtod,strtol for parsing #s; otherwise inaccurate hack |
||||
|
#define STB_C_LEX_DOLLAR_IDENTIFIER N // allow $ as an identifier character |
||||
|
|
||||
|
#define STB_C_LEX_DEFINE_ALL_TOKEN_NAMES Y // if Y, all CLEX_ token names are defined, even if never returned |
||||
|
// leaving it as N should help you catch config bugs |
||||
|
|
||||
|
#define STB_C_LEX_DISCARD_PREPROCESSOR Y // discard C-preprocessor directives (e.g. after prepocess |
||||
|
// still have #line, #pragma, etc) |
||||
|
|
||||
|
#define STB_C_LEX_MULTILINE_DSTRINGS N // allow newlines in double-quoted strings |
||||
|
#define STB_C_LEX_MULTILINE_SSTRINGS N // allow newlines in single-quoted strings |
||||
|
#define STB_C_LEX_FLOAT_NO_DECIMAL N // allow floats that have no decimal point if they have an exponent |
||||
|
|
||||
|
|
||||
|
#define STB_C_LEX_C_IDENTIFIERS Y // "[_a-zA-Z][_a-zA-Z0-9]*" CLEX_id |
||||
|
#define STB_C_LEX_C_COMMENTS Y // "/* comment */" |
||||
|
#define STB_C_LEX_CPP_COMMENTS Y // "// comment to end of line\n" |
||||
|
|
||||
|
#define STB_C_LEX_INTEGERS_AS_DOUBLES N // parses integers as doubles so they can be larger than 'int', but only if STB_C_LEX_STDLIB==N |
||||
|
#define STB_C_LEX_C_DECIMAL_INTS N // "0|[1-9][0-9]*" CLEX_intlit |
||||
|
#define STB_C_LEX_C_HEX_INTS N // "0x[0-9a-fA-F]+" CLEX_intlit |
||||
|
#define STB_C_LEX_C_OCTAL_INTS N // "[0-7]+" CLEX_intlit |
||||
|
#define STB_C_LEX_C_DECIMAL_FLOATS N // "[0-9]*(.[0-9]*([eE][-+]?[0-9]+)?) CLEX_floatlit |
||||
|
#define STB_C_LEX_C99_HEX_FLOATS N // "0x{hex}+(.{hex}*)?[pP][-+]?{hex}+ CLEX_floatlit |
||||
|
#define STB_C_LEX_C_DQ_STRINGS N // double-quote-delimited strings with escapes CLEX_dqstring |
||||
|
#define STB_C_LEX_C_SQ_STRINGS N // single-quote-delimited strings with escapes CLEX_ssstring |
||||
|
#define STB_C_LEX_C_CHARS N // single-quote-delimited character with escape CLEX_charlits |
||||
|
#define STB_C_LEX_C_COMPARISONS N // "==" CLEX_eq "!=" CLEX_noteq "<=" CLEX_lesseq ">=" CLEX_greatereq |
||||
|
#define STB_C_LEX_C_LOGICAL N // "&&" CLEX_andand "||" CLEX_oror |
||||
|
#define STB_C_LEX_C_SHIFTS N // "<<" CLEX_shl ">>" CLEX_shr |
||||
|
#define STB_C_LEX_C_INCREMENTS N // "++" CLEX_plusplus "--" CLEX_minusminus |
||||
|
#define STB_C_LEX_C_ARROW N // "->" CLEX_arrow |
||||
|
#define STB_C_LEX_EQUAL_ARROW N // "=>" CLEX_eqarrow |
||||
|
#define STB_C_LEX_C_BITWISEEQ N // "&=" CLEX_andeq "|=" CLEX_oreq "^=" CLEX_xoreq |
||||
|
#define STB_C_LEX_C_ARITHEQ N // "+=" CLEX_pluseq "-=" CLEX_minuseq |
||||
|
// "*=" CLEX_muleq "/=" CLEX_diveq "%=" CLEX_modeq |
||||
|
// if both STB_C_LEX_SHIFTS & STB_C_LEX_ARITHEQ: |
||||
|
// "<<=" CLEX_shleq ">>=" CLEX_shreq |
||||
|
|
||||
|
#define STB_C_LEX_PARSE_SUFFIXES N // letters after numbers are parsed as part of those numbers, and must be in suffix list below |
||||
|
#define STB_C_LEX_DECIMAL_SUFFIXES "" // decimal integer suffixes e.g. "uUlL" -- these are returned as-is in string storage |
||||
|
#define STB_C_LEX_HEX_SUFFIXES "" // e.g. "uUlL" |
||||
|
#define STB_C_LEX_OCTAL_SUFFIXES "" // e.g. "uUlL" |
||||
|
#define STB_C_LEX_FLOAT_SUFFIXES "" // |
||||
|
|
||||
|
#define STB_C_LEXER_IMPLEMENTATION |
||||
|
#include "stb_c_lexer.h" |
||||
|
|
||||
|
#include <inttypes.h> // strtoimax |
||||
|
#include <limits.h> |
||||
|
#include <stdio.h> // fread, fseek, ftell |
||||
|
#include <stdlib.h> // malloc, free |
||||
|
#include <stdarg.h> // va_start, va_list, va_end |
||||
|
#include <stdint.h> |
||||
|
#include <string.h> // memcmp |
||||
|
#include <stdbool.h> |
||||
|
|
||||
|
|
||||
|
static inline void die(const char* format, ...) { |
||||
|
va_list args; |
||||
|
va_start(args, format); |
||||
|
vprintf(format, args); |
||||
|
va_end(args); |
||||
|
exit(1); |
||||
|
} |
||||
|
|
||||
|
static inline char* readWholeFile(const char* filepath, size_t *outSize) { |
||||
|
FILE *fp = fopen(filepath, "rb"); |
||||
|
if (fp == NULL) { |
||||
|
die("failed to open file: %s", filepath); |
||||
|
} |
||||
|
fseek(fp, 0, SEEK_END); |
||||
|
size_t size = ftell(fp); |
||||
|
fseek(fp, 0L, SEEK_SET); |
||||
|
char *buffer = (char*) malloc(size + 1); |
||||
|
fread(buffer, sizeof (char), size, fp); |
||||
|
buffer[size] = '\0'; |
||||
|
fclose(fp); |
||||
|
|
||||
|
if (outSize != NULL) *outSize = size; |
||||
|
|
||||
|
return buffer; |
||||
|
} |
||||
|
|
||||
|
static inline bool isWhitespace(char c) { |
||||
|
return c == ' ' || c == '\r' || c == '\n' || c == '\f' || c == '\t'; |
||||
|
} |
||||
|
|
||||
|
static inline char* eatWhitespace(char* input) { |
||||
|
char* orig = input; |
||||
|
char c; |
||||
|
while ((c = *input) != '\0') { |
||||
|
if (!isWhitespace(c)) return input; |
||||
|
input++; |
||||
|
} |
||||
|
return orig; |
||||
|
} |
||||
|
|
||||
|
// de-duplicates whitespace |
||||
|
static inline char* findNthLastCharOccurence(char* string, int length, char c, int n) { |
||||
|
char* out = NULL; |
||||
|
int _n = 0; |
||||
|
for (int i = length - 1; i > 0; i--) { |
||||
|
if (string[i] == c) _n++; |
||||
|
if (_n == n) return string + i; |
||||
|
while (isWhitespace(string[i]) && i > 0) { |
||||
|
i--; |
||||
|
} |
||||
|
} |
||||
|
return out; |
||||
|
} |
||||
|
|
||||
|
static inline int strWrite(char *dest, const char *src, int maxCount) { |
||||
|
int i = 0; |
||||
|
for (; i < maxCount; i++) { |
||||
|
if (src[i] == '\0') { |
||||
|
break; |
||||
|
} |
||||
|
|
||||
|
dest[i] = src[i]; |
||||
|
} |
||||
|
dest[i] = '\0'; |
||||
|
return i; |
||||
|
} |
||||
|
struct Declaration { |
||||
|
char type[64]; |
||||
|
char name[64]; |
||||
|
ssize_t size; |
||||
|
ssize_t align; |
||||
|
bool isBitfield; |
||||
|
}; |
||||
|
|
||||
|
struct StructInfo { |
||||
|
char name[64]; |
||||
|
char alias[64]; |
||||
|
const char *filename; |
||||
|
int lineNumber, lineOffset; |
||||
|
|
||||
|
ssize_t size; |
||||
|
struct Declaration declarations[16]; |
||||
|
int numDeclarations; |
||||
|
}; |
||||
|
|
||||
|
static inline void printStructInfo(struct StructInfo *structInfo) { |
||||
|
printf("%s - %d:%d", structInfo->filename, structInfo->lineNumber, structInfo->lineOffset); |
||||
|
printf(" - %s", structInfo->name[0] == '\0' ? "(anonymous struct)" : structInfo->name); |
||||
|
printf(", %s\n", structInfo->alias[0] == '\0' ? "(c++ style, no typedef alias)" : structInfo->alias); |
||||
|
printf(" - total size: %ld\n", structInfo->size); |
||||
|
for (int i = 0; i < structInfo->numDeclarations; i++) { |
||||
|
struct Declaration *decl = structInfo->declarations + i; |
||||
|
printf("\tdecl name: %s, type: %s, size: %ld, alignment: %ld\n", decl->name, decl->type, decl->size, decl->align); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
static int capacityAllStructs = 64; |
||||
|
static int numAllStructs = 0; |
||||
|
static struct StructInfo *allStructs; |
||||
|
|
||||
|
void pushStructInfo(struct StructInfo *structInfo) { |
||||
|
if (numAllStructs >= capacityAllStructs) { |
||||
|
capacityAllStructs *= 1.5; |
||||
|
allStructs = realloc(allStructs, sizeof(struct StructInfo) * (capacityAllStructs)); |
||||
|
} |
||||
|
|
||||
|
memcpy((void*) &allStructs[numAllStructs++], (void*) structInfo, sizeof(struct StructInfo)); |
||||
|
} |
||||
|
|
||||
|
#include "table.h" |
||||
|
#include "visualization.h" |
||||
|
|
||||
|
|
||||
|
#define STORE_SIZE 1024*1000 |
||||
|
static const int store_size = STORE_SIZE; |
||||
|
static char store[STORE_SIZE] = { 0 }; |
||||
|
#undef STORE_SIZE |
||||
|
|
||||
|
struct Array { |
||||
|
unsigned int length; |
||||
|
unsigned int capacity; |
||||
|
void* data; |
||||
|
}; |
||||
|
void push(struct Array* array, void* item) { |
||||
|
|
||||
|
} |
||||
|
|
||||
|
void parseType() { |
||||
|
|
||||
|
} |
||||
|
|
||||
|
static inline bool shouldSkipConst(char* nullTerminated) { |
||||
|
// @HACK skip all instances of 'const' |
||||
|
size_t bounds = sizeof("const"); |
||||
|
for (int i = 0; i < bounds; i++) { |
||||
|
char c = nullTerminated[i]; |
||||
|
if (c != "const"[i]) return false; |
||||
|
} |
||||
|
|
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
static inline void finalizeDeclaration( |
||||
|
char lineBuffer[128], |
||||
|
int lookback, |
||||
|
int numAsterisks, |
||||
|
int numDeclarations, |
||||
|
int arrayVal, |
||||
|
struct StructInfo *structInfo |
||||
|
) { |
||||
|
printf("LINE BUFFER: |%s|, arrayVal: %d\n", lineBuffer, arrayVal); |
||||
|
// we're at the end of a line of declarations. |
||||
|
// we can learn some interesting stuff by looking back now. |
||||
|
char typeBuffer[64] = { 0 }; |
||||
|
char *cursor = findNthLastCharOccurence(lineBuffer, 128, ' ', lookback); |
||||
|
if (cursor == NULL) { |
||||
|
die("panic when finalizing a declaration"); |
||||
|
} |
||||
|
|
||||
|
struct Declaration *decl = structInfo->declarations + structInfo->numDeclarations; |
||||
|
int diff = (int)(cursor - lineBuffer); |
||||
|
int count = strWrite(typeBuffer, lineBuffer, diff); |
||||
|
int multiplier = 1; |
||||
|
if (arrayVal != -1) { |
||||
|
multiplier = arrayVal; |
||||
|
} |
||||
|
|
||||
|
ssize_t totalSize = 0; |
||||
|
TableEntry *entry = lookup(typeTable, typeBuffer); |
||||
|
if (numAsterisks == 0) { |
||||
|
if (entry == NULL) { |
||||
|
// this is likely a new/unknown type in the program. enter it into the type table with an unknown size. |
||||
|
printf("warning: unknown field size and alignment in struct field: %s\n", typeBuffer); |
||||
|
insertPadZeroes(typeTable, typeBuffer, -1, -1); |
||||
|
decl->size = -1; |
||||
|
decl->align = -1; |
||||
|
|
||||
|
} else { |
||||
|
decl->size = entry->size * multiplier; |
||||
|
decl->align = entry->align; |
||||
|
} |
||||
|
} else { |
||||
|
decl->size = sizeof(void*) * multiplier; |
||||
|
decl->align = sizeof(void*); |
||||
|
} |
||||
|
|
||||
|
// we could have multiple declarations (comma separated) |
||||
|
// they will have to be the same type, except for bitfields (kill me) |
||||
|
// so we'll just copy the type from the first decl, and just move the cursor |
||||
|
// to find the other name. |
||||
|
for (int i = 0; i < numDeclarations; i++) { |
||||
|
decl = structInfo->declarations + structInfo->numDeclarations; |
||||
|
totalSize += decl->size; |
||||
|
structInfo->numDeclarations++; |
||||
|
|
||||
|
// write in the type name field. |
||||
|
// for looking up size in the table, we don't want to include the '*' |
||||
|
// but for storing the type name of the decl, we probably do. |
||||
|
for (int i = 0; i < numAsterisks; i++) { |
||||
|
count += strWrite(typeBuffer + count, "*", 1); |
||||
|
} |
||||
|
strWrite(decl->type, typeBuffer, 64); |
||||
|
|
||||
|
// figure out the name of this field. |
||||
|
char* nameStart; |
||||
|
char c; |
||||
|
while ((c = *cursor) != '\0') { |
||||
|
if (!isWhitespace(c)) { |
||||
|
nameStart = cursor; |
||||
|
break; |
||||
|
} |
||||
|
cursor++; |
||||
|
} |
||||
|
char* nameEnd; |
||||
|
while ((c = *cursor) != '\0') { |
||||
|
if (isWhitespace(c)) { |
||||
|
nameEnd = cursor; |
||||
|
break; |
||||
|
} |
||||
|
cursor++; |
||||
|
} |
||||
|
int count = strWrite(decl->name, nameStart, (int) (nameEnd-nameStart)); |
||||
|
if (arrayVal != -1) { |
||||
|
snprintf(decl->name + count, 64 - count, "[%d]", arrayVal); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
structInfo->size += totalSize; |
||||
|
} |
||||
|
|
||||
|
void parseStructDeclaration(struct StructInfo *structInfo, stb_lexer *lexer) { |
||||
|
bool somethingWasConst = false; |
||||
|
bool numDeclarations = 1; |
||||
|
int numAsterisks = 0; |
||||
|
int soFar = 0; |
||||
|
int lookback = 2; |
||||
|
|
||||
|
// for parsing things like 'char name[12]' |
||||
|
char* lastOpenBracket = NULL; |
||||
|
int arrayVal = -1; |
||||
|
|
||||
|
char lineBuffer[128] = { 0 }; |
||||
|
do { |
||||
|
switch (lexer->token) { |
||||
|
case 260: { |
||||
|
// we don't record const because it's annoying. |
||||
|
if (shouldSkipConst(lexer->string)) { somethingWasConst = true; break; } |
||||
|
|
||||
|
soFar += strWrite(lineBuffer + soFar, lexer->string, 64); |
||||
|
soFar += strWrite(lineBuffer + soFar, " ", 1); |
||||
|
} break; |
||||
|
|
||||
|
case ',': |
||||
|
numDeclarations++; |
||||
|
lookback++; |
||||
|
break; |
||||
|
|
||||
|
case '*': |
||||
|
numAsterisks++; |
||||
|
break; |
||||
|
|
||||
|
case '[': |
||||
|
lastOpenBracket = lexer->where_firstchar; |
||||
|
break; |
||||
|
|
||||
|
case ']': |
||||
|
arrayVal = strtoimax(lastOpenBracket + 1, &lexer->where_firstchar, 10); |
||||
|
if (arrayVal == 0) arrayVal = -1; |
||||
|
break; |
||||
|
|
||||
|
case ';': { |
||||
|
finalizeDeclaration(lineBuffer, lookback, numAsterisks, numDeclarations, arrayVal, structInfo); |
||||
|
} return; |
||||
|
} |
||||
|
} while (stb_c_lexer_get_token(lexer) != 0); |
||||
|
} |
||||
|
|
||||
|
// |
||||
|
// the token in the lexer is a 'struct' keyword. we want to get the identifiers, and the nested declarations. |
||||
|
// |
||||
|
// <struct-or-union-specifier> ::= <struct-or-union> <identifier> { {<struct-declaration>}+ } |
||||
|
// | <struct-or-union> { {<struct-declaration>}+ } |
||||
|
// | <struct-or-union> <identifier> |
||||
|
void parseStruct(const char *filename, stb_lexer *lexer, bool isClass) { |
||||
|
int result = stb_c_lexer_get_token(lexer); |
||||
|
if (result == 0) die("failed to parse struct"); |
||||
|
|
||||
|
stb_lex_location location = { 0 }; |
||||
|
stb_c_lexer_get_location(lexer, lexer->where_firstchar, &location); |
||||
|
|
||||
|
struct StructInfo structInfo = { 0 }; |
||||
|
structInfo.filename = filename; |
||||
|
structInfo.lineNumber = location.line_number; |
||||
|
structInfo.lineOffset = location.line_offset; |
||||
|
structInfo.numDeclarations = 0; |
||||
|
structInfo.size = 0; |
||||
|
|
||||
|
switch (lexer->token) { |
||||
|
case 260: { |
||||
|
char tempNameBuffer[64] = { 0 }; |
||||
|
strWrite(tempNameBuffer, lexer->string, 64); |
||||
|
|
||||
|
// maybe a named struct. |
||||
|
result = stb_c_lexer_get_token(lexer); |
||||
|
if (result == 0) die("failed to parse struct"); |
||||
|
|
||||
|
if (lexer->token == '{') { |
||||
|
strWrite(structInfo.name, tempNameBuffer, 64); |
||||
|
|
||||
|
} else { |
||||
|
return; |
||||
|
} |
||||
|
} break; |
||||
|
|
||||
|
case '{': {} break; |
||||
|
|
||||
|
default: return; |
||||
|
} |
||||
|
|
||||
|
int balancer = 1; |
||||
|
while (stb_c_lexer_get_token(lexer) != 0) { |
||||
|
switch (lexer->token) { |
||||
|
case '}': if (--balancer == 0) goto checkTypeAlias; |
||||
|
case '{': ++balancer; break; |
||||
|
|
||||
|
case 260: { |
||||
|
parseStructDeclaration(&structInfo, lexer); |
||||
|
} break; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
checkTypeAlias: |
||||
|
result = stb_c_lexer_get_token(lexer); |
||||
|
if (result == 0) die("unexpected end of stream when parsing a struct"); |
||||
|
|
||||
|
if (lexer->token == 260) { |
||||
|
// we have a type alias for the struct. |
||||
|
// @NOTE @TODO this could also conceivably by the __attribute__ thingy: https://stackoverflow.com/questions/14671253/is-there-a-gcc-keyword-to-allow-structure-reordering |
||||
|
strWrite(structInfo.alias, lexer->string, 64); |
||||
|
} |
||||
|
|
||||
|
pushStructInfo(&structInfo); |
||||
|
} |
||||
|
|
||||
|
void parseTypedef(stb_lexer *lexer) { |
||||
|
|
||||
|
} |
||||
|
|
||||
|
|
||||
|
void parseFile(const char *filepath) { |
||||
|
printf("parsing file %s...\n", filepath); |
||||
|
size_t size; |
||||
|
char *buffer = readWholeFile(filepath, &size); |
||||
|
|
||||
|
stb_lexer lexer; |
||||
|
stb_c_lexer_init(&lexer, buffer, buffer + size + 1, store, store_size); |
||||
|
|
||||
|
while (stb_c_lexer_get_token(&lexer) != 0) { |
||||
|
switch (lexer.token) { |
||||
|
case 260: { // token is a string |
||||
|
const uint64_t LE_STRUCT = 0x0000746375727473U; |
||||
|
const uint64_t LE_CLASS = 0x0000007373616C63U; |
||||
|
const uint64_t LE_TYPEDEF = 0x0066656465707974U; |
||||
|
|
||||
|
uint64_t t = *((uint64_t*)(lexer.string)); |
||||
|
if ((t ) == LE_TYPEDEF) { parseTypedef(&lexer); } |
||||
|
else if ((t & 0x00FFFFFFFFFFFFFF) == LE_STRUCT) { parseStruct(filepath, &lexer, false); } |
||||
|
else if ((t & 0x0000FFFFFFFFFFFF) == LE_CLASS) { parseStruct(filepath, &lexer, true); } |
||||
|
} break; |
||||
|
} |
||||
|
} |
||||
|
free(buffer); |
||||
|
memset(store, 0, store_size); |
||||
|
} |
||||
|
|
||||
|
// http://www.catb.org/esr/structure-packing/ |
||||
|
int main(int argc, char* argv[]) { |
||||
|
// @TODO check for flag -fshort-enums |
||||
|
allStructs = malloc(sizeof(struct StructInfo) * capacityAllStructs); |
||||
|
typeTable = initTable(); |
||||
|
|
||||
|
if (CHAR_BIT != 8) { |
||||
|
printf("warning - CHAR_BIT != 8\n"); |
||||
|
} |
||||
|
|
||||
|
if (false) { |
||||
|
printf("CHAR_BIT = %d\n", CHAR_BIT); |
||||
|
printf("MB_LEN_MAX = %d\n\n", MB_LEN_MAX); |
||||
|
|
||||
|
printf("CHAR_MIN = %+d\n", CHAR_MIN); |
||||
|
printf("CHAR_MAX = %+d\n", CHAR_MAX); |
||||
|
printf("SCHAR_MIN = %+d\n", SCHAR_MIN); |
||||
|
printf("SCHAR_MAX = %+d\n", SCHAR_MAX); |
||||
|
printf("UCHAR_MAX = %u\n\n", UCHAR_MAX); |
||||
|
|
||||
|
printf("SHRT_MIN = %+d\n", SHRT_MIN); |
||||
|
printf("SHRT_MAX = %+d\n", SHRT_MAX); |
||||
|
printf("USHRT_MAX = %u\n\n", USHRT_MAX); |
||||
|
|
||||
|
printf("INT_MIN = %+d\n", INT_MIN); |
||||
|
printf("INT_MAX = %+d\n", INT_MAX); |
||||
|
printf("UINT_MAX = %u\n\n", UINT_MAX); |
||||
|
|
||||
|
printf("LONG_MIN = %+ld\n", LONG_MIN); |
||||
|
printf("LONG_MAX = %+ld\n", LONG_MAX); |
||||
|
printf("ULONG_MAX = %lu\n\n", ULONG_MAX); |
||||
|
|
||||
|
printf("LLONG_MIN = %+lld\n", LLONG_MIN); |
||||
|
printf("LLONG_MAX = %+lld\n", LLONG_MAX); |
||||
|
printf("ULLONG_MAX = %llu\n\n", ULLONG_MAX); |
||||
|
|
||||
|
printf("PTRDIFF_MIN = %td\n", PTRDIFF_MIN); |
||||
|
printf("PTRDIFF_MAX = %+td\n", PTRDIFF_MAX); |
||||
|
printf("SIZE_MAX = %zu\n", SIZE_MAX); |
||||
|
printf("SIG_ATOMIC_MIN = %+jd\n",(intmax_t)SIG_ATOMIC_MIN); |
||||
|
printf("SIG_ATOMIC_MAX = %+jd\n",(intmax_t)SIG_ATOMIC_MAX); |
||||
|
printf("WCHAR_MIN = %+jd\n",(intmax_t)WCHAR_MIN); |
||||
|
printf("WCHAR_MAX = %+jd\n",(intmax_t)WCHAR_MAX); |
||||
|
printf("WINT_MIN = %jd\n", (intmax_t)WINT_MIN); |
||||
|
printf("WINT_MAX = %jd\n", (intmax_t)WINT_MAX); |
||||
|
} |
||||
|
|
||||
|
if (argc < 2) { |
||||
|
//die("provide a list of c/c++ files and/or headers to anaylze."); |
||||
|
parseFile(__FILE__); |
||||
|
parseFile("table.h"); |
||||
|
parseFile("visualization.h"); |
||||
|
parseFile("stb_c_lexer.h"); |
||||
|
} |
||||
|
|
||||
|
for (int i = 1; i < argc; i++) { |
||||
|
const char *filepath = argv[i]; |
||||
|
parseFile(filepath); |
||||
|
} |
||||
|
|
||||
|
outputHtml(); |
||||
|
|
||||
|
return 0; |
||||
|
} |
||||
|
|
@ -0,0 +1,940 @@ |
|||||
|
// stb_c_lexer.h - v0.12 - public domain Sean Barrett 2013 |
||||
|
// lexer for making little C-like languages with recursive-descent parsers |
||||
|
// |
||||
|
// This file provides both the interface and the implementation. |
||||
|
// To instantiate the implementation, |
||||
|
// #define STB_C_LEXER_IMPLEMENTATION |
||||
|
// in *ONE* source file, before #including this file. |
||||
|
// |
||||
|
// The default configuration is fairly close to a C lexer, although |
||||
|
// suffixes on integer constants are not handled (you can override this). |
||||
|
// |
||||
|
// History: |
||||
|
// 0.12 fix compilation bug for NUL support; better support separate inclusion |
||||
|
// 0.11 fix clang static analysis warning |
||||
|
// 0.10 fix warnings |
||||
|
// 0.09 hex floats, no-stdlib fixes |
||||
|
// 0.08 fix bad pointer comparison |
||||
|
// 0.07 fix mishandling of hexadecimal constants parsed by strtol |
||||
|
// 0.06 fix missing next character after ending quote mark (Andreas Fredriksson) |
||||
|
// 0.05 refixed get_location because github version had lost the fix |
||||
|
// 0.04 fix octal parsing bug |
||||
|
// 0.03 added STB_C_LEX_DISCARD_PREPROCESSOR option |
||||
|
// refactor API to simplify (only one struct instead of two) |
||||
|
// change literal enum names to have 'lit' at the end |
||||
|
// 0.02 first public release |
||||
|
// |
||||
|
// Status: |
||||
|
// - haven't tested compiling as C++ |
||||
|
// - haven't tested the float parsing path |
||||
|
// - haven't tested the non-default-config paths (e.g. non-stdlib) |
||||
|
// - only tested default-config paths by eyeballing output of self-parse |
||||
|
// |
||||
|
// - haven't implemented multiline strings |
||||
|
// - haven't implemented octal/hex character constants |
||||
|
// - haven't implemented support for unicode CLEX_char |
||||
|
// - need to expand error reporting so you don't just get "CLEX_parse_error" |
||||
|
// |
||||
|
// Contributors: |
||||
|
// Arpad Goretity (bugfix) |
||||
|
// Alan Hickman (hex floats) |
||||
|
// |
||||
|
// LICENSE |
||||
|
// |
||||
|
// See end of file for license information. |
||||
|
|
||||
|
#ifdef STB_C_LEXER_IMPLEMENTATION |
||||
|
#ifndef STB_C_LEXER_DEFINITIONS |
||||
|
// to change the default parsing rules, copy the following lines |
||||
|
// into your C/C++ file *before* including this, and then replace |
||||
|
// the Y's with N's for the ones you don't want. This needs to be |
||||
|
// set to the same values for every place in your program where |
||||
|
// stb_c_lexer.h is included. |
||||
|
// --BEGIN-- |
||||
|
|
||||
|
#if defined(Y) || defined(N) |
||||
|
#error "Can only use stb_c_lexer in contexts where the preprocessor symbols 'Y' and 'N' are not defined" |
||||
|
#endif |
||||
|
|
||||
|
#define STB_C_LEX_C_DECIMAL_INTS Y // "0|[1-9][0-9]*" CLEX_intlit |
||||
|
#define STB_C_LEX_C_HEX_INTS Y // "0x[0-9a-fA-F]+" CLEX_intlit |
||||
|
#define STB_C_LEX_C_OCTAL_INTS Y // "[0-7]+" CLEX_intlit |
||||
|
#define STB_C_LEX_C_DECIMAL_FLOATS Y // "[0-9]*(.[0-9]*([eE][-+]?[0-9]+)?) CLEX_floatlit |
||||
|
#define STB_C_LEX_C99_HEX_FLOATS N // "0x{hex}+(.{hex}*)?[pP][-+]?{hex}+ CLEX_floatlit |
||||
|
#define STB_C_LEX_C_IDENTIFIERS Y // "[_a-zA-Z][_a-zA-Z0-9]*" CLEX_id |
||||
|
#define STB_C_LEX_C_DQ_STRINGS Y // double-quote-delimited strings with escapes CLEX_dqstring |
||||
|
#define STB_C_LEX_C_SQ_STRINGS N // single-quote-delimited strings with escapes CLEX_ssstring |
||||
|
#define STB_C_LEX_C_CHARS Y // single-quote-delimited character with escape CLEX_charlits |
||||
|
#define STB_C_LEX_C_COMMENTS Y // "/* comment */" |
||||
|
#define STB_C_LEX_CPP_COMMENTS Y // "// comment to end of line\n" |
||||
|
#define STB_C_LEX_C_COMPARISONS Y // "==" CLEX_eq "!=" CLEX_noteq "<=" CLEX_lesseq ">=" CLEX_greatereq |
||||
|
#define STB_C_LEX_C_LOGICAL Y // "&&" CLEX_andand "||" CLEX_oror |
||||
|
#define STB_C_LEX_C_SHIFTS Y // "<<" CLEX_shl ">>" CLEX_shr |
||||
|
#define STB_C_LEX_C_INCREMENTS Y // "++" CLEX_plusplus "--" CLEX_minusminus |
||||
|
#define STB_C_LEX_C_ARROW Y // "->" CLEX_arrow |
||||
|
#define STB_C_LEX_EQUAL_ARROW N // "=>" CLEX_eqarrow |
||||
|
#define STB_C_LEX_C_BITWISEEQ Y // "&=" CLEX_andeq "|=" CLEX_oreq "^=" CLEX_xoreq |
||||
|
#define STB_C_LEX_C_ARITHEQ Y // "+=" CLEX_pluseq "-=" CLEX_minuseq |
||||
|
// "*=" CLEX_muleq "/=" CLEX_diveq "%=" CLEX_modeq |
||||
|
// if both STB_C_LEX_SHIFTS & STB_C_LEX_ARITHEQ: |
||||
|
// "<<=" CLEX_shleq ">>=" CLEX_shreq |
||||
|
|
||||
|
#define STB_C_LEX_PARSE_SUFFIXES N // letters after numbers are parsed as part of those numbers, and must be in suffix list below |
||||
|
#define STB_C_LEX_DECIMAL_SUFFIXES "" // decimal integer suffixes e.g. "uUlL" -- these are returned as-is in string storage |
||||
|
#define STB_C_LEX_HEX_SUFFIXES "" // e.g. "uUlL" |
||||
|
#define STB_C_LEX_OCTAL_SUFFIXES "" // e.g. "uUlL" |
||||
|
#define STB_C_LEX_FLOAT_SUFFIXES "" // |
||||
|
|
||||
|
#define STB_C_LEX_0_IS_EOF N // if Y, ends parsing at '\0'; if N, returns '\0' as token |
||||
|
#define STB_C_LEX_INTEGERS_AS_DOUBLES N // parses integers as doubles so they can be larger than 'int', but only if STB_C_LEX_STDLIB==N |
||||
|
#define STB_C_LEX_MULTILINE_DSTRINGS N // allow newlines in double-quoted strings |
||||
|
#define STB_C_LEX_MULTILINE_SSTRINGS N // allow newlines in single-quoted strings |
||||
|
#define STB_C_LEX_USE_STDLIB Y // use strtod,strtol for parsing #s; otherwise inaccurate hack |
||||
|
#define STB_C_LEX_DOLLAR_IDENTIFIER Y // allow $ as an identifier character |
||||
|
#define STB_C_LEX_FLOAT_NO_DECIMAL Y // allow floats that have no decimal point if they have an exponent |
||||
|
|
||||
|
#define STB_C_LEX_DEFINE_ALL_TOKEN_NAMES N // if Y, all CLEX_ token names are defined, even if never returned |
||||
|
// leaving it as N should help you catch config bugs |
||||
|
|
||||
|
#define STB_C_LEX_DISCARD_PREPROCESSOR Y // discard C-preprocessor directives (e.g. after prepocess |
||||
|
// still have #line, #pragma, etc) |
||||
|
|
||||
|
//#define STB_C_LEX_ISWHITE(str) ... // return length in bytes of whitespace characters if first char is whitespace |
||||
|
|
||||
|
#define STB_C_LEXER_DEFINITIONS // This line prevents the header file from replacing your definitions |
||||
|
// --END-- |
||||
|
#endif |
||||
|
#endif |
||||
|
|
||||
|
#ifndef INCLUDE_STB_C_LEXER_H |
||||
|
#define INCLUDE_STB_C_LEXER_H |
||||
|
|
||||
|
typedef struct |
||||
|
{ |
||||
|
// lexer variables |
||||
|
char *input_stream; |
||||
|
char *eof; |
||||
|
char *parse_point; |
||||
|
char *string_storage; |
||||
|
int string_storage_len; |
||||
|
|
||||
|
// lexer parse location for error messages |
||||
|
char *where_firstchar; |
||||
|
char *where_lastchar; |
||||
|
|
||||
|
// lexer token variables |
||||
|
long token; |
||||
|
double real_number; |
||||
|
long int_number; |
||||
|
char *string; |
||||
|
int string_len; |
||||
|
} stb_lexer; |
||||
|
|
||||
|
typedef struct |
||||
|
{ |
||||
|
int line_number; |
||||
|
int line_offset; |
||||
|
} stb_lex_location; |
||||
|
|
||||
|
#ifdef __cplusplus |
||||
|
extern "C" { |
||||
|
#endif |
||||
|
|
||||
|
extern void stb_c_lexer_init(stb_lexer *lexer, const char *input_stream, const char *input_stream_end, char *string_store, int store_length); |
||||
|
// this function initialize the 'lexer' structure |
||||
|
// Input: |
||||
|
// - input_stream points to the file to parse, loaded into memory |
||||
|
// - input_stream_end points to the end of the file, or NULL if you use 0-for-EOF |
||||
|
// - string_store is storage the lexer can use for storing parsed strings and identifiers |
||||
|
// - store_length is the length of that storage |
||||
|
|
||||
|
extern int stb_c_lexer_get_token(stb_lexer *lexer); |
||||
|
// this function returns non-zero if a token is parsed, or 0 if at EOF |
||||
|
// Output: |
||||
|
// - lexer->token is the token ID, which is unicode code point for a single-char token, < 0 for a multichar or eof or error |
||||
|
// - lexer->real_number is a double constant value for CLEX_floatlit, or CLEX_intlit if STB_C_LEX_INTEGERS_AS_DOUBLES |
||||
|
// - lexer->int_number is an integer constant for CLEX_intlit if !STB_C_LEX_INTEGERS_AS_DOUBLES, or character for CLEX_charlit |
||||
|
// - lexer->string is a 0-terminated string for CLEX_dqstring or CLEX_sqstring or CLEX_identifier |
||||
|
// - lexer->string_len is the byte length of lexer->string |
||||
|
|
||||
|
extern void stb_c_lexer_get_location(const stb_lexer *lexer, const char *where, stb_lex_location *loc); |
||||
|
// this inefficient function returns the line number and character offset of a |
||||
|
// given location in the file as returned by stb_lex_token. Because it's inefficient, |
||||
|
// you should only call it for errors, not for every token. |
||||
|
// For error messages of invalid tokens, you typically want the location of the start |
||||
|
// of the token (which caused the token to be invalid). For bugs involving legit |
||||
|
// tokens, you can report the first or the range. |
||||
|
// Output: |
||||
|
// - loc->line_number is the line number in the file, counting from 1, of the location |
||||
|
// - loc->line_offset is the char-offset in the line, counting from 0, of the location |
||||
|
|
||||
|
|
||||
|
#ifdef __cplusplus |
||||
|
} |
||||
|
#endif |
||||
|
|
||||
|
enum |
||||
|
{ |
||||
|
CLEX_eof = 256, |
||||
|
CLEX_parse_error, |
||||
|
CLEX_intlit , |
||||
|
CLEX_floatlit , |
||||
|
CLEX_id , |
||||
|
CLEX_dqstring , |
||||
|
CLEX_sqstring , |
||||
|
CLEX_charlit , |
||||
|
CLEX_eq , |
||||
|
CLEX_noteq , |
||||
|
CLEX_lesseq , |
||||
|
CLEX_greatereq , |
||||
|
CLEX_andand , |
||||
|
CLEX_oror , |
||||
|
CLEX_shl , |
||||
|
CLEX_shr , |
||||
|
CLEX_plusplus , |
||||
|
CLEX_minusminus , |
||||
|
CLEX_pluseq , |
||||
|
CLEX_minuseq , |
||||
|
CLEX_muleq , |
||||
|
CLEX_diveq , |
||||
|
CLEX_modeq , |
||||
|
CLEX_andeq , |
||||
|
CLEX_oreq , |
||||
|
CLEX_xoreq , |
||||
|
CLEX_arrow , |
||||
|
CLEX_eqarrow , |
||||
|
CLEX_shleq, CLEX_shreq, |
||||
|
|
||||
|
CLEX_first_unused_token |
||||
|
|
||||
|
}; |
||||
|
#endif // INCLUDE_STB_C_LEXER_H |
||||
|
|
||||
|
#ifdef STB_C_LEXER_IMPLEMENTATION |
||||
|
|
||||
|
// Hacky definitions so we can easily #if on them |
||||
|
#define Y(x) 1 |
||||
|
#define N(x) 0 |
||||
|
|
||||
|
#if STB_C_LEX_INTEGERS_AS_DOUBLES(x) |
||||
|
typedef double stb__clex_int; |
||||
|
#define intfield real_number |
||||
|
#define STB__clex_int_as_double |
||||
|
#else |
||||
|
typedef long stb__clex_int; |
||||
|
#define intfield int_number |
||||
|
#endif |
||||
|
|
||||
|
// Convert these config options to simple conditional #defines so we can more |
||||
|
// easily test them once we've change the meaning of Y/N |
||||
|
|
||||
|
#if STB_C_LEX_PARSE_SUFFIXES(x) |
||||
|
#define STB__clex_parse_suffixes |
||||
|
#endif |
||||
|
|
||||
|
#if STB_C_LEX_C99_HEX_FLOATS(x) |
||||
|
#define STB__clex_hex_floats |
||||
|
#endif |
||||
|
|
||||
|
#if STB_C_LEX_C_HEX_INTS(x) |
||||
|
#define STB__clex_hex_ints |
||||
|
#endif |
||||
|
|
||||
|
#if STB_C_LEX_C_DECIMAL_INTS(x) |
||||
|
#define STB__clex_decimal_ints |
||||
|
#endif |
||||
|
|
||||
|
#if STB_C_LEX_C_OCTAL_INTS(x) |
||||
|
#define STB__clex_octal_ints |
||||
|
#endif |
||||
|
|
||||
|
#if STB_C_LEX_C_DECIMAL_FLOATS(x) |
||||
|
#define STB__clex_decimal_floats |
||||
|
#endif |
||||
|
|
||||
|
#if STB_C_LEX_DISCARD_PREPROCESSOR(x) |
||||
|
#define STB__clex_discard_preprocessor |
||||
|
#endif |
||||
|
|
||||
|
#if STB_C_LEX_USE_STDLIB(x) && (!defined(STB__clex_hex_floats) || __STDC_VERSION__ >= 199901L) |
||||
|
#define STB__CLEX_use_stdlib |
||||
|
#include <stdlib.h> |
||||
|
#endif |
||||
|
|
||||
|
// Now for the rest of the file we'll use the basic definition where |
||||
|
// where Y expands to its contents and N expands to nothing |
||||
|
#undef Y |
||||
|
#define Y(a) a |
||||
|
#undef N |
||||
|
#define N(a) |
||||
|
|
||||
|
// API function |
||||
|
void stb_c_lexer_init(stb_lexer *lexer, const char *input_stream, const char *input_stream_end, char *string_store, int store_length) |
||||
|
{ |
||||
|
lexer->input_stream = (char *) input_stream; |
||||
|
lexer->eof = (char *) input_stream_end; |
||||
|
lexer->parse_point = (char *) input_stream; |
||||
|
lexer->string_storage = string_store; |
||||
|
lexer->string_storage_len = store_length; |
||||
|
} |
||||
|
|
||||
|
// API function |
||||
|
void stb_c_lexer_get_location(const stb_lexer *lexer, const char *where, stb_lex_location *loc) |
||||
|
{ |
||||
|
char *p = lexer->input_stream; |
||||
|
int line_number = 1; |
||||
|
int char_offset = 0; |
||||
|
while (*p && p < where) { |
||||
|
if (*p == '\n' || *p == '\r') { |
||||
|
p += (p[0]+p[1] == '\r'+'\n' ? 2 : 1); // skip newline |
||||
|
line_number += 1; |
||||
|
char_offset = 0; |
||||
|
} else { |
||||
|
++p; |
||||
|
++char_offset; |
||||
|
} |
||||
|
} |
||||
|
loc->line_number = line_number; |
||||
|
loc->line_offset = char_offset; |
||||
|
} |
||||
|
|
||||
|
// main helper function for returning a parsed token |
||||
|
static int stb__clex_token(stb_lexer *lexer, int token, char *start, char *end) |
||||
|
{ |
||||
|
lexer->token = token; |
||||
|
lexer->where_firstchar = start; |
||||
|
lexer->where_lastchar = end; |
||||
|
lexer->parse_point = end+1; |
||||
|
return 1; |
||||
|
} |
||||
|
|
||||
|
// helper function for returning eof |
||||
|
static int stb__clex_eof(stb_lexer *lexer) |
||||
|
{ |
||||
|
lexer->token = CLEX_eof; |
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
static int stb__clex_iswhite(int x) |
||||
|
{ |
||||
|
return x == ' ' || x == '\t' || x == '\r' || x == '\n' || x == '\f'; |
||||
|
} |
||||
|
|
||||
|
static const char *stb__strchr(const char *str, int ch) |
||||
|
{ |
||||
|
for (; *str; ++str) |
||||
|
if (*str == ch) |
||||
|
return str; |
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
// parse suffixes at the end of a number |
||||
|
static int stb__clex_parse_suffixes(stb_lexer *lexer, long tokenid, char *start, char *cur, const char *suffixes) |
||||
|
{ |
||||
|
#ifdef STB__clex_parse_suffixes |
||||
|
lexer->string = lexer->string_storage; |
||||
|
lexer->string_len = 0; |
||||
|
|
||||
|
while ((*cur >= 'a' && *cur <= 'z') || (*cur >= 'A' && *cur <= 'Z')) { |
||||
|
if (stb__strchr(suffixes, *cur) == 0) |
||||
|
return stb__clex_token(lexer, CLEX_parse_error, start, cur); |
||||
|
if (lexer->string_len+1 >= lexer->string_storage_len) |
||||
|
return stb__clex_token(lexer, CLEX_parse_error, start, cur); |
||||
|
lexer->string[lexer->string_len++] = *cur++; |
||||
|
} |
||||
|
#else |
||||
|
suffixes = suffixes; // attempt to suppress warnings |
||||
|
#endif |
||||
|
return stb__clex_token(lexer, tokenid, start, cur-1); |
||||
|
} |
||||
|
|
||||
|
#ifndef STB__CLEX_use_stdlib |
||||
|
static double stb__clex_pow(double base, unsigned int exponent) |
||||
|
{ |
||||
|
double value=1; |
||||
|
for ( ; exponent; exponent >>= 1) { |
||||
|
if (exponent & 1) |
||||
|
value *= base; |
||||
|
base *= base; |
||||
|
} |
||||
|
return value; |
||||
|
} |
||||
|
|
||||
|
static double stb__clex_parse_float(char *p, char **q) |
||||
|
{ |
||||
|
char *s = p; |
||||
|
double value=0; |
||||
|
int base=10; |
||||
|
int exponent=0; |
||||
|
|
||||
|
#ifdef STB__clex_hex_floats |
||||
|
if (*p == '0') { |
||||
|
if (p[1] == 'x' || p[1] == 'X') { |
||||
|
base=16; |
||||
|
p += 2; |
||||
|
} |
||||
|
} |
||||
|
#endif |
||||
|
|
||||
|
for (;;) { |
||||
|
if (*p >= '0' && *p <= '9') |
||||
|
value = value*base + (*p++ - '0'); |
||||
|
#ifdef STB__clex_hex_floats |
||||
|
else if (base == 16 && *p >= 'a' && *p <= 'f') |
||||
|
value = value*base + 10 + (*p++ - 'a'); |
||||
|
else if (base == 16 && *p >= 'A' && *p <= 'F') |
||||
|
value = value*base + 10 + (*p++ - 'A'); |
||||
|
#endif |
||||
|
else |
||||
|
break; |
||||
|
} |
||||
|
|
||||
|
if (*p == '.') { |
||||
|
double pow, addend = 0; |
||||
|
++p; |
||||
|
for (pow=1; ; pow*=base) { |
||||
|
if (*p >= '0' && *p <= '9') |
||||
|
addend = addend*base + (*p++ - '0'); |
||||
|
#ifdef STB__clex_hex_floats |
||||
|
else if (base == 16 && *p >= 'a' && *p <= 'f') |
||||
|
addend = addend*base + 10 + (*p++ - 'a'); |
||||
|
else if (base == 16 && *p >= 'A' && *p <= 'F') |
||||
|
addend = addend*base + 10 + (*p++ - 'A'); |
||||
|
#endif |
||||
|
else |
||||
|
break; |
||||
|
} |
||||
|
value += addend / pow; |
||||
|
} |
||||
|
#ifdef STB__clex_hex_floats |
||||
|
if (base == 16) { |
||||
|
// exponent required for hex float literal |
||||
|
if (*p != 'p' && *p != 'P') { |
||||
|
*q = s; |
||||
|
return 0; |
||||
|
} |
||||
|
exponent = 1; |
||||
|
} else |
||||
|
#endif |
||||
|
exponent = (*p == 'e' || *p == 'E'); |
||||
|
|
||||
|
if (exponent) { |
||||
|
int sign = p[1] == '-'; |
||||
|
unsigned int exponent=0; |
||||
|
double power=1; |
||||
|
++p; |
||||
|
if (*p == '-' || *p == '+') |
||||
|
++p; |
||||
|
while (*p >= '0' && *p <= '9') |
||||
|
exponent = exponent*10 + (*p++ - '0'); |
||||
|
|
||||
|
#ifdef STB__clex_hex_floats |
||||
|
if (base == 16) |
||||
|
power = stb__clex_pow(2, exponent); |
||||
|
else |
||||
|
#endif |
||||
|
power = stb__clex_pow(10, exponent); |
||||
|
if (sign) |
||||
|
value /= power; |
||||
|
else |
||||
|
value *= power; |
||||
|
} |
||||
|
*q = p; |
||||
|
return value; |
||||
|
} |
||||
|
#endif |
||||
|
|
||||
|
static int stb__clex_parse_char(char *p, char **q) |
||||
|
{ |
||||
|
if (*p == '\\') { |
||||
|
*q = p+2; // tentatively guess we'll parse two characters |
||||
|
switch(p[1]) { |
||||
|
case '\\': return '\\'; |
||||
|
case '\'': return '\''; |
||||
|
case '"': return '"'; |
||||
|
case 't': return '\t'; |
||||
|
case 'f': return '\f'; |
||||
|
case 'n': return '\n'; |
||||
|
case 'r': return '\r'; |
||||
|
case '0': return '\0'; // @TODO ocatal constants |
||||
|
case 'x': case 'X': return -1; // @TODO hex constants |
||||
|
case 'u': return -1; // @TODO unicode constants |
||||
|
} |
||||
|
} |
||||
|
*q = p+1; |
||||
|
return (unsigned char) *p; |
||||
|
} |
||||
|
|
||||
|
static int stb__clex_parse_string(stb_lexer *lexer, char *p, int type) |
||||
|
{ |
||||
|
char *start = p; |
||||
|
char delim = *p++; // grab the " or ' for later matching |
||||
|
char *out = lexer->string_storage; |
||||
|
char *outend = lexer->string_storage + lexer->string_storage_len; |
||||
|
while (*p != delim) { |
||||
|
int n; |
||||
|
if (*p == '\\') { |
||||
|
char *q; |
||||
|
n = stb__clex_parse_char(p, &q); |
||||
|
if (n < 0) |
||||
|
return stb__clex_token(lexer, CLEX_parse_error, start, q); |
||||
|
p = q; |
||||
|
} else { |
||||
|
// @OPTIMIZE: could speed this up by looping-while-not-backslash |
||||
|
n = (unsigned char) *p++; |
||||
|
} |
||||
|
if (out+1 > outend) |
||||
|
return stb__clex_token(lexer, CLEX_parse_error, start, p); |
||||
|
// @TODO expand unicode escapes to UTF8 |
||||
|
*out++ = (char) n; |
||||
|
} |
||||
|
*out = 0; |
||||
|
lexer->string = lexer->string_storage; |
||||
|
lexer->string_len = (int) (out - lexer->string_storage); |
||||
|
return stb__clex_token(lexer, type, start, p); |
||||
|
} |
||||
|
|
||||
|
int stb_c_lexer_get_token(stb_lexer *lexer) |
||||
|
{ |
||||
|
char *p = lexer->parse_point; |
||||
|
|
||||
|
// skip whitespace and comments |
||||
|
for (;;) { |
||||
|
#ifdef STB_C_LEX_ISWHITE |
||||
|
while (p != lexer->stream_end) { |
||||
|
int n; |
||||
|
n = STB_C_LEX_ISWHITE(p); |
||||
|
if (n == 0) break; |
||||
|
if (lexer->eof && lexer->eof - lexer->parse_point < n) |
||||
|
return stb__clex_token(tok, CLEX_parse_error, p,lexer->eof-1); |
||||
|
p += n; |
||||
|
} |
||||
|
#else |
||||
|
while (p != lexer->eof && stb__clex_iswhite(*p)) |
||||
|
++p; |
||||
|
#endif |
||||
|
|
||||
|
STB_C_LEX_CPP_COMMENTS( |
||||
|
if (p != lexer->eof && p[0] == '/' && p[1] == '/') { |
||||
|
while (p != lexer->eof && *p != '\r' && *p != '\n') |
||||
|
++p; |
||||
|
continue; |
||||
|
} |
||||
|
) |
||||
|
|
||||
|
STB_C_LEX_C_COMMENTS( |
||||
|
if (p != lexer->eof && p[0] == '/' && p[1] == '*') { |
||||
|
char *start = p; |
||||
|
p += 2; |
||||
|
while (p != lexer->eof && (p[0] != '*' || p[1] != '/')) |
||||
|
++p; |
||||
|
if (p == lexer->eof) |
||||
|
return stb__clex_token(lexer, CLEX_parse_error, start, p-1); |
||||
|
p += 2; |
||||
|
continue; |
||||
|
} |
||||
|
) |
||||
|
|
||||
|
#ifdef STB__clex_discard_preprocessor |
||||
|
// @TODO this discards everything after a '#', regardless |
||||
|
// of where in the line the # is, rather than requiring it |
||||
|
// be at the start. (because this parser doesn't otherwise |
||||
|
// check for line breaks!) |
||||
|
if (p != lexer->eof && p[0] == '#') { |
||||
|
while (p != lexer->eof && *p != '\r' && *p != '\n') |
||||
|
++p; |
||||
|
continue; |
||||
|
} |
||||
|
#endif |
||||
|
|
||||
|
break; |
||||
|
} |
||||
|
|
||||
|
if (p == lexer->eof) |
||||
|
return stb__clex_eof(lexer); |
||||
|
|
||||
|
switch (*p) { |
||||
|
default: |
||||
|
if ( (*p >= 'a' && *p <= 'z') |
||||
|
|| (*p >= 'A' && *p <= 'Z') |
||||
|
|| *p == '_' || (unsigned char) *p >= 128 // >= 128 is UTF8 char |
||||
|
STB_C_LEX_DOLLAR_IDENTIFIER( || *p == '$' ) ) |
||||
|
{ |
||||
|
int n = 0; |
||||
|
lexer->string = lexer->string_storage; |
||||
|
lexer->string_len = n; |
||||
|
do { |
||||
|
if (n+1 >= lexer->string_storage_len) |
||||
|
return stb__clex_token(lexer, CLEX_parse_error, p, p+n); |
||||
|
lexer->string[n] = p[n]; |
||||
|
++n; |
||||
|
} while ( |
||||
|
(p[n] >= 'a' && p[n] <= 'z') |
||||
|
|| (p[n] >= 'A' && p[n] <= 'Z') |
||||
|
|| (p[n] >= '0' && p[n] <= '9') // allow digits in middle of identifier |
||||
|
|| p[n] == '_' || (unsigned char) p[n] >= 128 |
||||
|
STB_C_LEX_DOLLAR_IDENTIFIER( || p[n] == '$' ) |
||||
|
); |
||||
|
lexer->string[n] = 0; |
||||
|
return stb__clex_token(lexer, CLEX_id, p, p+n-1); |
||||
|
} |
||||
|
|
||||
|
// check for EOF |
||||
|
STB_C_LEX_0_IS_EOF( |
||||
|
if (*p == 0) |
||||
|
return stb__clex_eof(lexer); |
||||
|
) |
||||
|
|
||||
|
single_char: |
||||
|
// not an identifier, return the character as itself |
||||
|
return stb__clex_token(lexer, *p, p, p); |
||||
|
|
||||
|
case '+': |
||||
|
if (p+1 != lexer->eof) { |
||||
|
STB_C_LEX_C_INCREMENTS(if (p[1] == '+') return stb__clex_token(lexer, CLEX_plusplus, p,p+1);) |
||||
|
STB_C_LEX_C_ARITHEQ( if (p[1] == '=') return stb__clex_token(lexer, CLEX_pluseq , p,p+1);) |
||||
|
} |
||||
|
goto single_char; |
||||
|
case '-': |
||||
|
if (p+1 != lexer->eof) { |
||||
|
STB_C_LEX_C_INCREMENTS(if (p[1] == '-') return stb__clex_token(lexer, CLEX_minusminus, p,p+1);) |
||||
|
STB_C_LEX_C_ARITHEQ( if (p[1] == '=') return stb__clex_token(lexer, CLEX_minuseq , p,p+1);) |
||||
|
STB_C_LEX_C_ARROW( if (p[1] == '>') return stb__clex_token(lexer, CLEX_arrow , p,p+1);) |
||||
|
} |
||||
|
goto single_char; |
||||
|
case '&': |
||||
|
if (p+1 != lexer->eof) { |
||||
|
STB_C_LEX_C_LOGICAL( if (p[1] == '&') return stb__clex_token(lexer, CLEX_andand, p,p+1);) |
||||
|
STB_C_LEX_C_BITWISEEQ(if (p[1] == '=') return stb__clex_token(lexer, CLEX_andeq , p,p+1);) |
||||
|
} |
||||
|
goto single_char; |
||||
|
case '|': |
||||
|
if (p+1 != lexer->eof) { |
||||
|
STB_C_LEX_C_LOGICAL( if (p[1] == '|') return stb__clex_token(lexer, CLEX_oror, p,p+1);) |
||||
|
STB_C_LEX_C_BITWISEEQ(if (p[1] == '=') return stb__clex_token(lexer, CLEX_oreq, p,p+1);) |
||||
|
} |
||||
|
goto single_char; |
||||
|
case '=': |
||||
|
if (p+1 != lexer->eof) { |
||||
|
STB_C_LEX_C_COMPARISONS(if (p[1] == '=') return stb__clex_token(lexer, CLEX_eq, p,p+1);) |
||||
|
STB_C_LEX_EQUAL_ARROW( if (p[1] == '>') return stb__clex_token(lexer, CLEX_eqarrow, p,p+1);) |
||||
|
} |
||||
|
goto single_char; |
||||
|
case '!': |
||||
|
STB_C_LEX_C_COMPARISONS(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_noteq, p,p+1);) |
||||
|
goto single_char; |
||||
|
case '^': |
||||
|
STB_C_LEX_C_BITWISEEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_xoreq, p,p+1)); |
||||
|
goto single_char; |
||||
|
case '%': |
||||
|
STB_C_LEX_C_ARITHEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_modeq, p,p+1)); |
||||
|
goto single_char; |
||||
|
case '*': |
||||
|
STB_C_LEX_C_ARITHEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_muleq, p,p+1)); |
||||
|
goto single_char; |
||||
|
case '/': |
||||
|
STB_C_LEX_C_ARITHEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_diveq, p,p+1)); |
||||
|
goto single_char; |
||||
|
case '<': |
||||
|
if (p+1 != lexer->eof) { |
||||
|
STB_C_LEX_C_COMPARISONS(if (p[1] == '=') return stb__clex_token(lexer, CLEX_lesseq, p,p+1);) |
||||
|
STB_C_LEX_C_SHIFTS( if (p[1] == '<') { |
||||
|
STB_C_LEX_C_ARITHEQ(if (p+2 != lexer->eof && p[2] == '=') |
||||
|
return stb__clex_token(lexer, CLEX_shleq, p,p+2);) |
||||
|
return stb__clex_token(lexer, CLEX_shl, p,p+1); |
||||
|
} |
||||
|
) |
||||
|
} |
||||
|
goto single_char; |
||||
|
case '>': |
||||
|
if (p+1 != lexer->eof) { |
||||
|
STB_C_LEX_C_COMPARISONS(if (p[1] == '=') return stb__clex_token(lexer, CLEX_greatereq, p,p+1);) |
||||
|
STB_C_LEX_C_SHIFTS( if (p[1] == '>') { |
||||
|
STB_C_LEX_C_ARITHEQ(if (p+2 != lexer->eof && p[2] == '=') |
||||
|
return stb__clex_token(lexer, CLEX_shreq, p,p+2);) |
||||
|
return stb__clex_token(lexer, CLEX_shr, p,p+1); |
||||
|
} |
||||
|
) |
||||
|
} |
||||
|
goto single_char; |
||||
|
|
||||
|
case '"': |
||||
|
STB_C_LEX_C_DQ_STRINGS(return stb__clex_parse_string(lexer, p, CLEX_dqstring);) |
||||
|
goto single_char; |
||||
|
case '\'': |
||||
|
STB_C_LEX_C_SQ_STRINGS(return stb__clex_parse_string(lexer, p, CLEX_sqstring);) |
||||
|
STB_C_LEX_C_CHARS( |
||||
|
{ |
||||
|
char *start = p; |
||||
|
lexer->int_number = stb__clex_parse_char(p+1, &p); |
||||
|
if (lexer->int_number < 0) |
||||
|
return stb__clex_token(lexer, CLEX_parse_error, start,start); |
||||
|
if (p == lexer->eof || *p != '\'') |
||||
|
return stb__clex_token(lexer, CLEX_parse_error, start,p); |
||||
|
return stb__clex_token(lexer, CLEX_charlit, start, p+1); |
||||
|
}) |
||||
|
goto single_char; |
||||
|
|
||||
|
case '0': |
||||
|
#if defined(STB__clex_hex_ints) || defined(STB__clex_hex_floats) |
||||
|
if (p+1 != lexer->eof) { |
||||
|
if (p[1] == 'x' || p[1] == 'X') { |
||||
|
char *q; |
||||
|
|
||||
|
#ifdef STB__clex_hex_floats |
||||
|
for (q=p+2; |
||||
|
q != lexer->eof && ((*q >= '0' && *q <= '9') || (*q >= 'a' && *q <= 'f') || (*q >= 'A' && *q <= 'F')); |
||||
|
++q); |
||||
|
if (q != lexer->eof) { |
||||
|
if (*q == '.' STB_C_LEX_FLOAT_NO_DECIMAL(|| *q == 'p' || *q == 'P')) { |
||||
|
#ifdef STB__CLEX_use_stdlib |
||||
|
lexer->real_number = strtod((char *) p, (char**) &q); |
||||
|
#else |
||||
|
lexer->real_number = stb__clex_parse_float(p, &q); |
||||
|
#endif |
||||
|
|
||||
|
if (p == q) |
||||
|
return stb__clex_token(lexer, CLEX_parse_error, p,q); |
||||
|
return stb__clex_parse_suffixes(lexer, CLEX_floatlit, p,q, STB_C_LEX_FLOAT_SUFFIXES); |
||||
|
|
||||
|
} |
||||
|
} |
||||
|
#endif // STB__CLEX_hex_floats |
||||
|
|
||||
|
#ifdef STB__clex_hex_ints |
||||
|
#ifdef STB__CLEX_use_stdlib |
||||
|
lexer->int_number = strtol((char *) p, (char **) &q, 16); |
||||
|
#else |
||||
|
{ |
||||
|
stb__clex_int n=0; |
||||
|
for (q=p+2; q != lexer->eof; ++q) { |
||||
|
if (*q >= '0' && *q <= '9') |
||||
|
n = n*16 + (*q - '0'); |
||||
|
else if (*q >= 'a' && *q <= 'f') |
||||
|
n = n*16 + (*q - 'a') + 10; |
||||
|
else if (*q >= 'A' && *q <= 'F') |
||||
|
n = n*16 + (*q - 'A') + 10; |
||||
|
else |
||||
|
break; |
||||
|
} |
||||
|
lexer->int_number = n; |
||||
|
} |
||||
|
#endif |
||||
|
if (q == p+2) |
||||
|
return stb__clex_token(lexer, CLEX_parse_error, p-2,p-1); |
||||
|
return stb__clex_parse_suffixes(lexer, CLEX_intlit, p,q, STB_C_LEX_HEX_SUFFIXES); |
||||
|
#endif |
||||
|
} |
||||
|
} |
||||
|
#endif // defined(STB__clex_hex_ints) || defined(STB__clex_hex_floats) |
||||
|
// can't test for octal because we might parse '0.0' as float or as '0' '.' '0', |
||||
|
// so have to do float first |
||||
|
|
||||
|
/* FALL THROUGH */ |
||||
|
case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': |
||||
|
|
||||
|
#ifdef STB__clex_decimal_floats |
||||
|
{ |
||||
|
char *q = p; |
||||
|
while (q != lexer->eof && (*q >= '0' && *q <= '9')) |
||||
|
++q; |
||||
|
if (q != lexer->eof) { |
||||
|
if (*q == '.' STB_C_LEX_FLOAT_NO_DECIMAL(|| *q == 'e' || *q == 'E')) { |
||||
|
#ifdef STB__CLEX_use_stdlib |
||||
|
lexer->real_number = strtod((char *) p, (char**) &q); |
||||
|
#else |
||||
|
lexer->real_number = stb__clex_parse_float(p, &q); |
||||
|
#endif |
||||
|
|
||||
|
return stb__clex_parse_suffixes(lexer, CLEX_floatlit, p,q, STB_C_LEX_FLOAT_SUFFIXES); |
||||
|
|
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
#endif // STB__clex_decimal_floats |
||||
|
|
||||
|
#ifdef STB__clex_octal_ints |
||||
|
if (p[0] == '0') { |
||||
|
char *q = p; |
||||
|
#ifdef STB__CLEX_use_stdlib |
||||
|
lexer->int_number = strtol((char *) p, (char **) &q, 8); |
||||
|
#else |
||||
|
stb__clex_int n=0; |
||||
|
while (q != lexer->eof) { |
||||
|
if (*q >= '0' && *q <= '7') |
||||
|
n = n*8 + (*q - '0'); |
||||
|
else |
||||
|
break; |
||||
|
++q; |
||||
|
} |
||||
|
if (q != lexer->eof && (*q == '8' || *q=='9')) |
||||
|
return stb__clex_token(lexer, CLEX_parse_error, p, q); |
||||
|
lexer->int_number = n; |
||||
|
#endif |
||||
|
return stb__clex_parse_suffixes(lexer, CLEX_intlit, p,q, STB_C_LEX_OCTAL_SUFFIXES); |
||||
|
} |
||||
|
#endif // STB__clex_octal_ints |
||||
|
|
||||
|
#ifdef STB__clex_decimal_ints |
||||
|
{ |
||||
|
char *q = p; |
||||
|
#ifdef STB__CLEX_use_stdlib |
||||
|
lexer->int_number = strtol((char *) p, (char **) &q, 10); |
||||
|
#else |
||||
|
stb__clex_int n=0; |
||||
|
while (q != lexer->eof) { |
||||
|
if (*q >= '0' && *q <= '9') |
||||
|
n = n*10 + (*q - '0'); |
||||
|
else |
||||
|
break; |
||||
|
++q; |
||||
|
} |
||||
|
lexer->int_number = n; |
||||
|
#endif |
||||
|
return stb__clex_parse_suffixes(lexer, CLEX_intlit, p,q, STB_C_LEX_OCTAL_SUFFIXES); |
||||
|
} |
||||
|
#endif // STB__clex_decimal_ints |
||||
|
goto single_char; |
||||
|
} |
||||
|
} |
||||
|
#endif // STB_C_LEXER_IMPLEMENTATION |
||||
|
|
||||
|
#ifdef STB_C_LEXER_SELF_TEST |
||||
|
#define _CRT_SECURE_NO_WARNINGS |
||||
|
#include <stdio.h> |
||||
|
#include <stdlib.h> |
||||
|
|
||||
|
static void print_token(stb_lexer *lexer) |
||||
|
{ |
||||
|
switch (lexer->token) { |
||||
|
case CLEX_id : printf("_%s", lexer->string); break; |
||||
|
case CLEX_eq : printf("=="); break; |
||||
|
case CLEX_noteq : printf("!="); break; |
||||
|
case CLEX_lesseq : printf("<="); break; |
||||
|
case CLEX_greatereq : printf(">="); break; |
||||
|
case CLEX_andand : printf("&&"); break; |
||||
|
case CLEX_oror : printf("||"); break; |
||||
|
case CLEX_shl : printf("<<"); break; |
||||
|
case CLEX_shr : printf(">>"); break; |
||||
|
case CLEX_plusplus : printf("++"); break; |
||||
|
case CLEX_minusminus: printf("--"); break; |
||||
|
case CLEX_arrow : printf("->"); break; |
||||
|
case CLEX_andeq : printf("&="); break; |
||||
|
case CLEX_oreq : printf("|="); break; |
||||
|
case CLEX_xoreq : printf("^="); break; |
||||
|
case CLEX_pluseq : printf("+="); break; |
||||
|
case CLEX_minuseq : printf("-="); break; |
||||
|
case CLEX_muleq : printf("*="); break; |
||||
|
case CLEX_diveq : printf("/="); break; |
||||
|
case CLEX_modeq : printf("%%="); break; |
||||
|
case CLEX_shleq : printf("<<="); break; |
||||
|
case CLEX_shreq : printf(">>="); break; |
||||
|
case CLEX_eqarrow : printf("=>"); break; |
||||
|
case CLEX_dqstring : printf("\"%s\"", lexer->string); break; |
||||
|
case CLEX_sqstring : printf("'\"%s\"'", lexer->string); break; |
||||
|
case CLEX_charlit : printf("'%s'", lexer->string); break; |
||||
|
#if defined(STB__clex_int_as_double) && !defined(STB__CLEX_use_stdlib) |
||||
|
case CLEX_intlit : printf("#%g", lexer->real_number); break; |
||||
|
#else |
||||
|
case CLEX_intlit : printf("#%ld", lexer->int_number); break; |
||||
|
#endif |
||||
|
case CLEX_floatlit : printf("%g", lexer->real_number); break; |
||||
|
default: |
||||
|
if (lexer->token >= 0 && lexer->token < 256) |
||||
|
printf("%c", (int) lexer->token); |
||||
|
else { |
||||
|
printf("<<<UNKNOWN TOKEN %ld >>>\n", lexer->token); |
||||
|
} |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/* Force a test |
||||
|
of parsing |
||||
|
multiline comments */ |
||||
|
|
||||
|
/*/ comment /*/ |
||||
|
/**/ extern /**/ |
||||
|
|
||||
|
void dummy(void) |
||||
|
{ |
||||
|
double some_floats[] = { |
||||
|
1.0501, -10.4e12, 5E+10, |
||||
|
#if 0 // not supported in C++ or C-pre-99, so don't try to compile it, but let our parser test it |
||||
|
0x1.0p+24, 0xff.FP-8, 0x1p-23, |
||||
|
#endif |
||||
|
4. |
||||
|
}; |
||||
|
(void) sizeof(some_floats); |
||||
|
(void) some_floats[1]; |
||||
|
|
||||
|
printf("test %d",1); // https://github.com/nothings/stb/issues/13 |
||||
|
} |
||||
|
|
||||
|
int main(int argc, char **argv) |
||||
|
{ |
||||
|
FILE *f = fopen("stb_c_lexer.h","rb"); |
||||
|
char *text = (char *) malloc(1 << 20); |
||||
|
int len = f ? (int) fread(text, 1, 1<<20, f) : -1; |
||||
|
stb_lexer lex; |
||||
|
if (len < 0) { |
||||
|
fprintf(stderr, "Error opening file\n"); |
||||
|
free(text); |
||||
|
fclose(f); |
||||
|
return 1; |
||||
|
} |
||||
|
fclose(f); |
||||
|
|
||||
|
stb_c_lexer_init(&lex, text, text+len, (char *) malloc(0x10000), 0x10000); |
||||
|
while (stb_c_lexer_get_token(&lex)) { |
||||
|
if (lex.token == CLEX_parse_error) { |
||||
|
printf("\n<<<PARSE ERROR>>>\n"); |
||||
|
break; |
||||
|
} |
||||
|
print_token(&lex); |
||||
|
printf(" "); |
||||
|
} |
||||
|
return 0; |
||||
|
} |
||||
|
#endif |
||||
|
/* |
||||
|
------------------------------------------------------------------------------ |
||||
|
This software is available under 2 licenses -- choose whichever you prefer. |
||||
|
------------------------------------------------------------------------------ |
||||
|
ALTERNATIVE A - MIT License |
||||
|
Copyright (c) 2017 Sean Barrett |
||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy of |
||||
|
this software and associated documentation files (the "Software"), to deal in |
||||
|
the Software without restriction, including without limitation the rights to |
||||
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies |
||||
|
of the Software, and to permit persons to whom the Software is furnished to do |
||||
|
so, subject to the following conditions: |
||||
|
The above copyright notice and this permission notice shall be included in all |
||||
|
copies or substantial portions of the Software. |
||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
|
SOFTWARE. |
||||
|
------------------------------------------------------------------------------ |
||||
|
ALTERNATIVE B - Public Domain (www.unlicense.org) |
||||
|
This is free and unencumbered software released into the public domain. |
||||
|
Anyone is free to copy, modify, publish, use, compile, sell, or distribute this |
||||
|
software, either in source code form or as a compiled binary, for any purpose, |
||||
|
commercial or non-commercial, and by any means. |
||||
|
In jurisdictions that recognize copyright laws, the author or authors of this |
||||
|
software dedicate any and all copyright interest in the software to the public |
||||
|
domain. We make this dedication for the benefit of the public at large and to |
||||
|
the detriment of our heirs and successors. We intend this dedication to be an |
||||
|
overt act of relinquishment in perpetuity of all present and future rights to |
||||
|
this software under copyright law. |
||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
||||
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
||||
|
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
|
------------------------------------------------------------------------------ |
||||
|
*/ |
@ -0,0 +1,166 @@ |
|||||
|
|
||||
|
#define TABLE_KEY_SIZE 64 |
||||
|
#define TABLE_NUM_LANES 32 |
||||
|
static inline unsigned int APHash(const char* str, unsigned int length) { |
||||
|
unsigned int hash = 0xAAAAAAAA; |
||||
|
unsigned int i = 0; |
||||
|
|
||||
|
for (i = 0; i < length; ++str, ++i) |
||||
|
{ |
||||
|
hash ^= ((i & 1) == 0) ? ( (hash << 7) ^ (*str) * (hash >> 3)) : |
||||
|
(~((hash << 11) + ((*str) ^ (hash >> 5)))); |
||||
|
} |
||||
|
|
||||
|
return hash; |
||||
|
} |
||||
|
|
||||
|
static inline uint32_t fastModuloReductionDanielLemire(uint32_t v, uint32_t c) { |
||||
|
return (((uint64_t)v) * ((uint64_t)c)) >> 32; |
||||
|
} |
||||
|
|
||||
|
static inline uint32_t hash(const char* key, uint32_t keyLength, uint32_t capacity) { |
||||
|
uint32_t value = APHash(key, keyLength); |
||||
|
|
||||
|
return fastModuloReductionDanielLemire(value, capacity); |
||||
|
} |
||||
|
|
||||
|
typedef struct TableEntry { |
||||
|
struct TableEntry* next; |
||||
|
|
||||
|
const char key[TABLE_KEY_SIZE]; |
||||
|
ssize_t size; |
||||
|
ssize_t align; |
||||
|
} TableEntry; |
||||
|
|
||||
|
typedef struct Table { |
||||
|
TableEntry **entries; |
||||
|
} Table; |
||||
|
|
||||
|
static inline TableEntry* lookup(Table *table, const char key[TABLE_KEY_SIZE]) { |
||||
|
TableEntry *entry = table->entries[hash(key, TABLE_KEY_SIZE, TABLE_NUM_LANES)]; |
||||
|
|
||||
|
for (; entry != NULL; entry = entry->next) { |
||||
|
if (memcmp(entry->key, key, TABLE_KEY_SIZE) == 0) { |
||||
|
return entry; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return NULL; |
||||
|
} |
||||
|
|
||||
|
static inline void insert(Table *table, const char key[TABLE_KEY_SIZE], ssize_t size, ssize_t align) { |
||||
|
TableEntry *entry = lookup(table, key); |
||||
|
|
||||
|
if (!entry) { // no entry with that key exists |
||||
|
entry = (TableEntry*) calloc(sizeof(TableEntry), 1); |
||||
|
strWrite((char*)entry->key, key, TABLE_KEY_SIZE); |
||||
|
entry->size = size; |
||||
|
entry->align = align; |
||||
|
|
||||
|
unsigned int hashValue = hash(key, TABLE_KEY_SIZE, TABLE_NUM_LANES); |
||||
|
entry->next = table->entries[hashValue]; |
||||
|
table->entries[hashValue] = entry; |
||||
|
|
||||
|
} else { // entry already exists, replace its value |
||||
|
entry->size = size; |
||||
|
entry->align = align; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
static inline void insertPadZeroes(Table* table, const char* keyToPad, ssize_t size, ssize_t align) { |
||||
|
char scratch[64] = { 0 }; |
||||
|
for (int i = 0; i < TABLE_KEY_SIZE; i++) { |
||||
|
if (keyToPad[i] == '\0') break; |
||||
|
scratch[i] = keyToPad[i]; |
||||
|
} |
||||
|
|
||||
|
insert(table, scratch, size, align); |
||||
|
} |
||||
|
|
||||
|
static inline void traversePrint(Table *table) { |
||||
|
for (unsigned int i = 0; i < TABLE_NUM_LANES; i++) { |
||||
|
TableEntry *entry = table->entries[i]; |
||||
|
|
||||
|
while (entry != NULL) { |
||||
|
printf("entry key: %-64s, size: %lu, align: %lu\n", entry->key, entry->size, entry->align); |
||||
|
entry = entry->next; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
static inline Table* initTable() { |
||||
|
Table *table = malloc(sizeof(Table)); |
||||
|
table->entries = (TableEntry**) calloc(sizeof(TableEntry*), TABLE_NUM_LANES); |
||||
|
|
||||
|
insertPadZeroes(table, "char", sizeof(char), sizeof(char)); |
||||
|
insertPadZeroes(table, "signed char", sizeof(signed char), sizeof(signed char)); |
||||
|
insertPadZeroes(table, "unsigned char", sizeof(unsigned char), sizeof(unsigned char)); |
||||
|
insertPadZeroes(table, "short", sizeof(short), sizeof(short)); |
||||
|
insertPadZeroes(table, "short int", sizeof(short int), sizeof(short int)); |
||||
|
insertPadZeroes(table, "signed short", sizeof(signed short), sizeof(signed short)); |
||||
|
insertPadZeroes(table, "signed short int", sizeof(signed short int), sizeof(signed short int)); |
||||
|
insertPadZeroes(table, "unsigned short", sizeof(unsigned short), sizeof(unsigned short)); |
||||
|
insertPadZeroes(table, "unsigned short int", sizeof(unsigned short int), sizeof(unsigned short int)); |
||||
|
insertPadZeroes(table, "int", sizeof(int), sizeof(int)); |
||||
|
insertPadZeroes(table, "signed", sizeof(signed), sizeof(signed)); |
||||
|
insertPadZeroes(table, "signed int", sizeof(signed int), sizeof(signed int)); |
||||
|
insertPadZeroes(table, "unsigned", sizeof(unsigned), sizeof(unsigned)); |
||||
|
insertPadZeroes(table, "unsigned int", sizeof(unsigned int), sizeof(unsigned int)); |
||||
|
insertPadZeroes(table, "long", sizeof(long), sizeof(long)); |
||||
|
insertPadZeroes(table, "long int", sizeof(long int), sizeof(long int)); |
||||
|
insertPadZeroes(table, "signed long", sizeof(signed long), sizeof(signed long)); |
||||
|
insertPadZeroes(table, "signed long int", sizeof(signed long int), sizeof(signed long int)); |
||||
|
insertPadZeroes(table, "unsigned long", sizeof(unsigned long), sizeof(unsigned long)); |
||||
|
insertPadZeroes(table, "unsigned long int", sizeof(unsigned long int), sizeof(unsigned long int)); |
||||
|
insertPadZeroes(table, "long long", sizeof(long long), sizeof(long long)); |
||||
|
insertPadZeroes(table, "long long int", sizeof(long long int), sizeof(long long int)); |
||||
|
insertPadZeroes(table, "signed long long", sizeof(signed long long), sizeof(signed long long)); |
||||
|
insertPadZeroes(table, "signed long long int", sizeof(signed long long int), sizeof(signed long long int)); |
||||
|
insertPadZeroes(table, "unsigned long long", sizeof(unsigned long long), sizeof(unsigned long long)); |
||||
|
insertPadZeroes(table, "unsigned long long int", sizeof(unsigned long long int), sizeof(unsigned long long int)); |
||||
|
insertPadZeroes(table, "float", sizeof(float), sizeof(float)); |
||||
|
insertPadZeroes(table, "double", sizeof(double), sizeof(double)); |
||||
|
insertPadZeroes(table, "long double", sizeof(long double), sizeof(long double)); |
||||
|
|
||||
|
insertPadZeroes(table, "size_t", sizeof(size_t), sizeof(size_t)); |
||||
|
insertPadZeroes(table, "ssize_t", sizeof(ssize_t), sizeof(ssize_t)); |
||||
|
insertPadZeroes(table, "bool", sizeof(bool), sizeof(bool)); |
||||
|
insertPadZeroes(table, "_Bool", sizeof(_Bool), sizeof(_Bool)); |
||||
|
|
||||
|
//char |
||||
|
//signed char |
||||
|
//unsigned char |
||||
|
//short |
||||
|
//short int |
||||
|
//signed short |
||||
|
//signed short int |
||||
|
//unsigned short |
||||
|
//unsigned short int |
||||
|
//int |
||||
|
//signed |
||||
|
//signed int |
||||
|
//unsigned |
||||
|
//unsigned int |
||||
|
//long |
||||
|
//long int |
||||
|
//signed long |
||||
|
//signed long int |
||||
|
//unsigned long |
||||
|
//unsigned long int |
||||
|
//long long |
||||
|
//long long int |
||||
|
//signed long long |
||||
|
//signed long long int |
||||
|
//unsigned long long |
||||
|
//unsigned long long int |
||||
|
//float |
||||
|
//double |
||||
|
//long double |
||||
|
|
||||
|
//traversePrint(table); |
||||
|
|
||||
|
return table; |
||||
|
} |
||||
|
|
||||
|
static Table *typeTable; |
||||
|
|
@ -0,0 +1,95 @@ |
|||||
|
|
||||
|
static size_t indexHtmlSize = 0; |
||||
|
static char* indexHtml = NULL; |
||||
|
|
||||
|
#define sb_concatf(fmt, ...) \ |
||||
|
if ((sbc - sbi) < 1024) { \ |
||||
|
sbc *= 1.5; \ |
||||
|
stringBuffer = realloc(stringBuffer, sbc); \ |
||||
|
} \ |
||||
|
if ((result = snprintf(stringBuffer + sbi, sbc - sbi, fmt, ##__VA_ARGS__)) > 0) sbi += result; \ |
||||
|
else die("fatal error concating to string"); |
||||
|
|
||||
|
|
||||
|
static void outputHtml() { |
||||
|
static int sbi = 0; |
||||
|
static int sbc = 50 * 1024; |
||||
|
static char* stringBuffer = NULL; |
||||
|
|
||||
|
if (stringBuffer == NULL) stringBuffer = malloc(sizeof(char) * sbc); |
||||
|
if (indexHtml == NULL) indexHtml = readWholeFile("base-index.html", &indexHtmlSize); |
||||
|
|
||||
|
int result = 0; |
||||
|
sb_concatf("%s", indexHtml); |
||||
|
|
||||
|
for (int i = 0; i < numAllStructs; i++) { |
||||
|
struct StructInfo *structInfo = allStructs + i; |
||||
|
printStructInfo(structInfo); |
||||
|
|
||||
|
ssize_t byteCounter = 0; |
||||
|
sb_concatf( |
||||
|
"<div class='struct-info'>" |
||||
|
"<label class='struct-info-header'>struct %s - alias: %s</label>" |
||||
|
"<div class='struct-info-byte-row'>" |
||||
|
, structInfo->name, structInfo->alias); |
||||
|
for (int d = 0; d < structInfo->numDeclarations; d++) { |
||||
|
struct Declaration *decl = structInfo->declarations + d; |
||||
|
bool truncate32 = false; |
||||
|
if (decl->size > 32) { |
||||
|
truncate32 = true; |
||||
|
} |
||||
|
|
||||
|
sb_concatf("%s", "<div class='struct-info-bytegroup'>"); |
||||
|
|
||||
|
bool first = (d % 2) == 0; |
||||
|
const char* positionClass = first ? "struct-info-declaration-top" : "struct-info-declaration-bottom"; |
||||
|
if (decl->size == -1) { |
||||
|
sb_concatf( |
||||
|
"<div class='struct-info-byte struct-info-byte-unknown'>?" |
||||
|
"<div class='struct-info-declaration %s'>" |
||||
|
"%s <span class='struct-info-declaration-name'>%s</span>" |
||||
|
"</div>" |
||||
|
"</div>" |
||||
|
"<div class='struct-info-byte struct-info-byte-ellipsis'>...</div>" |
||||
|
, positionClass, decl->type, decl->name); |
||||
|
} else { |
||||
|
for (int b = 0; b < decl->size; b++) { |
||||
|
if (b == 0) { |
||||
|
sb_concatf( |
||||
|
"<div class='struct-info-byte struct-info-byte-first'>" |
||||
|
"<div class='struct-info-declaration %s'>" |
||||
|
"%s <span class='struct-info-declaration-name'>%s</span>" |
||||
|
"</div>" |
||||
|
"</div>" |
||||
|
, positionClass, decl->type, decl->name); |
||||
|
} else if (truncate32 && b == 32) { |
||||
|
sb_concatf("<div class='struct-info-byte struct-info-byte-counted-ellipsis'>...[%ld]</div>", decl->size - b); |
||||
|
break; |
||||
|
|
||||
|
} else { |
||||
|
sb_concatf("%s", "<div class='struct-info-byte'></div>"); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
sb_concatf("%s", "</div>"); |
||||
|
} |
||||
|
sb_concatf("%s", "</div></div>"); |
||||
|
} |
||||
|
|
||||
|
// don't forget the closing body and html tags |
||||
|
sb_concatf("%s", "</body></html>"); |
||||
|
|
||||
|
// write the index.html file out to disk |
||||
|
FILE* fp = fopen("index.html", "wb"); |
||||
|
if (fp == NULL) { |
||||
|
die("failed to open the file index.html"); |
||||
|
} |
||||
|
|
||||
|
size_t writtenCount = fwrite(stringBuffer, 1, sbi, fp); |
||||
|
fclose(fp); |
||||
|
|
||||
|
if (writtenCount != sbi) { |
||||
|
die("wrote only partially"); |
||||
|
} |
||||
|
} |
||||
|
|
Write
Preview
Loading…
Cancel
Save
Reference in new issue