From 697b4b26bef4c3d0dc3ec6e6e80f44f502ccff08 Mon Sep 17 00:00:00 2001 From: Nick Hayashi <43279719+churchianity@users.noreply.github.com> Date: Sun, 23 Jul 2023 21:35:42 -0400 Subject: [PATCH] allocator, string, print work --- alloc.cpp | 18 ++++++--- alloc.h | 26 ++++++++++-- array.hpp | 69 ++++++++++++++++++++++++-------- file.cpp | 3 +- print.cpp | 17 +++++++- print.h | 5 +++ string.h | 109 +++++++++++++++++++++++++++++++++----------------- table.hpp | 117 ++++++++++++++++++++++++++++++++++++++++++++++++------ 8 files changed, 286 insertions(+), 78 deletions(-) diff --git a/alloc.cpp b/alloc.cpp index e595afa..6333fa6 100644 --- a/alloc.cpp +++ b/alloc.cpp @@ -133,10 +133,10 @@ static void defaultAllocatorInit() { DefaultAllocatorInited = true; } -Allocator* Allocator::GetDefault() { +Allocator& Allocator :: GetDefault() { ULE_TYPES_H_FTAG; if (!DefaultAllocatorInited) defaultAllocatorInit(); - return &DefaultAllocator; + return DefaultAllocator; } //================================================================================ @@ -174,13 +174,18 @@ void* Arena :: Alloc(u32 sizeInBytes) { ULE_TYPES_H_FTAG; u8* p = this->buffer + this->index; + u8* end = this->buffer + this->bufferSizeInBytes; + if ((p + sizeInBytes) >= end) { + return null; + } this->index += sizeInBytes; - //String::memset(p, 0, sizeInBytes); + return (void*) p; /* u8* p = this->buffer + this->index; u32 offset = (u32) alignForward2((u64) p, 64); + //String::memset(p, 0, sizeInBytes); #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wint-to-void-pointer-cast" @@ -201,6 +206,11 @@ void Arena :: Clear() { ULE_TYPES_H_FTAG; this->index = 0; } + +float Arena :: PercentUsed() { + ULE_TYPES_H_FTAG; + return ((float)this->index)/((float)this->bufferSizeInBytes); +} //================================================================================ struct StackAllocator { @@ -213,5 +223,3 @@ struct PoolAllocator { u8* buffer; }; - - diff --git a/alloc.h b/alloc.h index 2130903..9300698 100644 --- a/alloc.h +++ b/alloc.h @@ -20,6 +20,7 @@ typedef void (*clearer) ( void*); typedef void (*destroyer) ( void*); // operating system allocator wrappers +// the overloads with the 'void*' are to make them conform to the allocator interface above. extern void* pMalloc(size_t size); extern void* pMalloc(size_t size, void* allocatorState); extern void* pCalloc(size_t maxNumOfElements, size_t elementSize); @@ -33,15 +34,15 @@ extern void pFree(const void* ptr, void* allocatorState); struct Allocator { - void* state; mallocator mallocate; callocator callocate; reallocator reallocate; - freeer free; // releases a specific piece of memory + freeer free; // releases a specific piece/chunk of memory, identified by pointer. clearer clear; // should release all the memory owned by this allocator at once. destroyer destroy; // releases all the memory owned by this allocator, and also destroys the allocator. + void* state; - static Allocator* GetDefault(); + static Allocator& GetDefault(); Allocator() { this->state = null; @@ -62,6 +63,24 @@ struct Allocator { this->clear = null; this->destroy = null; } + + Allocator( + mallocator mallocate, + callocator callocate = null, + reallocator reallocate = null, + freeer free = null, + clearer clear = null, + destroyer destroy = null, + void* state = null + ) { + this->mallocate = mallocate; + this->callocate = callocate; + this->reallocate = reallocate; + this->free = free; + this->clear = clear; + this->destroy = destroy; + this->state = state; + } }; struct Arena { @@ -73,6 +92,7 @@ struct Arena { void* Alloc(u32 sizeInBytes); void Clear(); + float PercentUsed(); }; #endif diff --git a/array.hpp b/array.hpp index e00feaa..9030e33 100644 --- a/array.hpp +++ b/array.hpp @@ -1,4 +1,5 @@ +#pragma once #ifndef ULE_ARRAY_H #define ULE_ARRAY_H @@ -7,7 +8,7 @@ #include "config.h" #include "alloc.h" // allocators... #include "serialize.h" // serialization -#include "string.h" // String::memcpy +#include "string.h" // String::memcpy, String::memset #include "types.h" // type definitions @@ -15,32 +16,38 @@ // should work with any data type for T including primitive types // some initial |capacity| is heap-allocated and a pointer is stored to it as |data| // the |length| of the array, or number of filled slots is also tracked. -// -// it implements a single constructor, and operator new. no destructor, or operator overloading besides that. -// remember to use ->data or .data to actually access the underlying array. -// -// overhead: -// size of the struct will be 128 bits on 64-bit platforms -// note that if you heap allocate this structure and store it on another struct, you will have to chase two pointers to get at the data. -// to avoid this, I often include this struct in other structs so there's only one pointer dereference, -// just like including a raw pointer array + length in your struct. -// because I like to do this, automatic destructors are not useful. -// template struct Array { + T* data; u32 length; u32 capacity; - T* data; + Allocator* allocator; - Array(u32 _capacity = 8) { + Array(u32 _capacity = 8, Allocator* allocator = &Allocator::GetDefault()) { ULE_TYPES_H_FTAG; + this->data = (T*) allocator->mallocate(sizeof(T) * _capacity, allocator->state); + String::memset(this->data, '\0', sizeof(T) * _capacity); this->length = 0; this->capacity = _capacity; - this->data = (T*) pCalloc(sizeof (T), _capacity); + this->allocator = allocator; + } + static Array* Init(u32 _capacity = 8, Allocator* allocator = &Allocator::GetDefault()) { + Array* array = allocator->mallocate(sizeof(Array), allocator->state); + array->allocator = allocator; + array->length = 0; + array->capacity = _capacity; + const size_t size = sizeof(T) * _capacity; + array->data = (T*) allocator->mallocate(size, array->allocator->state); + String::memset(array->data, '\0', size); + return array; } - void* operator new(size_t size) { + + // function call overhead + dev 'massert' bounds-checking overhead. + // you can use "->data[i]" if you know you can't be out of bounds. + T& Array::operator[](u32 index) const { ULE_TYPES_H_FTAG; - return pMalloc((u32) size); + massert(index >= 0 && index < this->length, "array access out of bounds!"); + return this->data[index]; } void checkIfShouldGrow() { @@ -133,6 +140,10 @@ struct Array { // returns the next address into which you can store a T. makes sure there's enough room first. // it is irresponsible to call this and then not store a T in that address. this increments length, // reserving the next spot for you. + // + // C++ 'placement new' is the more safe/standard way to do this, but it requires your initialization + // logic is in a constructor somewhere, which isn't necessarily the case for you, and isn't for us. + // T* pushNextAddrPromise() { ULE_TYPES_H_FTAG; this->checkIfShouldGrow(); @@ -228,6 +239,21 @@ struct Array { return this->length == this->capacity; } + void sort(bool(*comparator)(T a, T b)) { + ULE_TYPES_H_FTAG; + massert(comparator != null, "can't call sort with a null comparator"); + const auto length = this->length; + for (u32 i = 0; i < length; i++) { + for (u32 j = i + 1; j < length; j++) { + if (comparator(this->data[i], this->data[j])) { + auto temp = this->data[i]; + this->data[i] = this->data[j]; + this->data[j] = temp; + } + } + } + } + void clear() { ULE_TYPES_H_FTAG; this->length = 0; @@ -261,6 +287,15 @@ static void deserialize(char** buffer, Array* array) { ULE_TYPES_H_FTAG; deserialize(buffer, &array->length); deserialize(buffer, &array->capacity); + + // |array| should have already been allocated, including its |data| member, + // but this may not be sufficient to write in the data we now wish to. + // realloc to be sure. + array->data = (T*) pRealloc(array->data, sizeof(T) * array->capacity); + if (array->data == null) { + massert(false, "failed to realloc when deserializing an array."); + } + for (u32 i = 0; i < array->length; i++) { deserialize(buffer, array->data + i); } diff --git a/file.cpp b/file.cpp index b47e456..6a3cc54 100644 --- a/file.cpp +++ b/file.cpp @@ -1,7 +1,8 @@ +#ifndef _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS #include // fopen, fseek, ftell, fclose -#undef _CRT_SECURE_NO_WARNINGS +#endif #include "alloc.h" #include "array.hpp" diff --git a/print.cpp b/print.cpp index a0bc993..22af3d1 100644 --- a/print.cpp +++ b/print.cpp @@ -48,6 +48,18 @@ void println(const char* format, ...) { va_end(args); } +void printBitsLE(const size_t count, void* ptr) { + u8* b = (u8*) ptr; + u8 byte; + for (s32 i = count - 1; i >= 0; i--) { + for (s32 j = 7; j >= 0; j--) { + byte = (b[i] >> j) & 1; + print("%u", byte); + } + } + println(); +} + /** * Prints a stack trace. * Implementation varies for Win32 vs. *nix @@ -134,6 +146,7 @@ void writeMinidump(void* exceptionPointers) { // 'EXCEPTION_POINTERS*' actually } #else void writeMinidump(void* exceptionPointers) {} // stub... does nothing on Unix + // OSX and Linux stacktrace stuff. #include // backtrace, backtrace_symbols #include // abi::__cxa_demangle @@ -232,14 +245,14 @@ void _debug( ) { ULE_TYPES_H_FTAG; if (format == null) { - print("[%s, %s] [%s:%d] %sdebug:%s null\n", date, time, filename, line, ANSI_BLUE, ANSI_RESET); + print("[%s, %s] [%s:%d] null\n", date, time, filename, line); return; } va_list args; va_start(args, format); - print("[%s, %s] [%s:%d] %sdebug:%s ", date, time, filename, line, ANSI_BLUE, ANSI_RESET); + print("[%s, %s] [%s:%d] ", date, time, filename, line); vprintln(format, args); va_end(args); diff --git a/print.h b/print.h index 32ec6cc..8c55dd6 100644 --- a/print.h +++ b/print.h @@ -81,8 +81,13 @@ extern void vprintln(const char* format, va_list args); extern void print(const char* format, ...); extern void println(const char* format, ...); +// prints the |count| * 8 little endian bits pointed to by |ptr| +extern void printBitsLE(const size_t count, void* ptr); + // Prints a stack trace, or concatenates the stack trace to |string| if it is not null. extern void trace(String* string = null); + +// on windows only extern void writeMinidump(void* exceptionPointers); // This ends the program and calls trace(). generally you should use 'massert' instead diff --git a/string.h b/string.h index e9e0b89..c2f7fce 100644 --- a/string.h +++ b/string.h @@ -7,20 +7,20 @@ #include "types.h" #include "alloc.h" -#include // @TODO remove this +#include // memcpy, memset, memcmp #define STB_SPRINTF_IMPLEMENTATION #define STB_SPRINTF_STATIC #include -#define STR_MEMALLOC pMalloc -#define STR_MEMFREE pFree -#define STR_ASSERT assert +#define STR_ASSERT assert #define STR_IMPLEMENTATION #define STR_SUPPORT_STD_STRING 0 #define STR_DEFINE_STR32 0 // the type Str32, which would normally be available, conflicts with a type in MacTypes.h +static Allocator* _stringClassAllocator = &Allocator::GetDefault(); // only used for the string class, string functions you pass an allocator if it allocates + // 'String' is a datatype, but it also is a namespace for a bunch of static 'char*' operations that // you would normally find in the or header // The datatype is a modified version of a string class developed by Omar Cornut: https://github.com/ocornut/str @@ -99,7 +99,7 @@ public: return c == '0' || c == '1'; } - static inline char* intToString(u64 integer) { + static inline char* intToString(u64 integer) { // @ALLOC ULE_TYPES_H_FTAG; u32 capacity = 10; u32* remainders = (u32*) pMalloc(sizeof (u32) * capacity); @@ -191,23 +191,24 @@ public: } // heap allocates a copy of |string| and returns a pointer to it. - static inline char* cpy(const char* string, u32 length, Allocator* allocator = Allocator::GetDefault()) { + static inline char* cpy(const char* string, u32 length, Allocator& allocator = Allocator::GetDefault()) { ULE_TYPES_H_FTAG; - char* buffer = (char*) allocator->mallocate(sizeof (char) * (length + 1), allocator->state); + assert(&allocator == &Allocator::GetDefault()); + char* buffer = (char*) pMalloc(sizeof(char) * (length+1)); //allocator.mallocate(sizeof (char) * (length + 1), allocator.state); u32 i = 0; for (; i < length; i++) { buffer[i] = string[i]; } buffer[i] = '\0'; + return buffer; } // heap allocates a copy of |string| and returns a pointer to it. - static inline char* cpy(const char* string, Allocator* allocator = Allocator::GetDefault()) { + static inline char* cpy(const char* string, Allocator& allocator = Allocator::GetDefault()) { ULE_TYPES_H_FTAG; u32 len = String::len(string); - return String::cpy(string, len, allocator = Allocator::GetDefault()); } @@ -222,6 +223,30 @@ public: return memeq(m1, m2, l1); } + // assumes null termination. + static inline bool LexographicComparisonASCII(const char* str1, const char* str2) { + u32 min; + + const u32 l1 = String::len(str1); + const u32 l2 = String::len(str2); + if (l1 > l2) { + min = l2; + } else { + min = l1; + } + + for (u32 i = 0; i < min; i++) { + if (str1[i] < str2[i]) { + return true; + + } else if (str1[i] > str2[i]) { + return false; + } + } + + return l1 < l2; + } + #ifdef _WIN32 static inline size_t wcharToChar(wchar_t* wstring, char* buffer, size_t maxBufferLength) { ULE_TYPES_H_FTAG; @@ -239,12 +264,16 @@ public: static inline void memcpy(void* dest, void* src, u32 size) { ULE_TYPES_H_FTAG; - u8* dest_ = (u8*) dest; - u8* src_ = (u8*) src; + // allowing c++ compilers to know we're invoking memcpy allows more aggressive optimizations, sometimes resulting in 8-9x speedup, + // when compared to the horrible, commented out loop below. + std::memcpy(dest, src, size); - for (u32 i = 0; i < size; i++) { - dest_[i] = src_[i]; - } + //u8* dest_ = (u8*) dest; + //u8* src_ = (u8*) src; + + //for (u32 i = 0; i < size; i++) { + // dest_[i] = src_[i]; + //} } // replace all instances of |c1| in |string| with |c2| @@ -340,7 +369,7 @@ public: // @TODO ALL OF THESE TRIMS //static inline char* trimStart(const char* str, u32 count); //static inline char* trimEnd(const char* str, u32 count); - static inline char* trim(const char* str, u32 count, Allocator* allocator = Allocator::GetDefault()) { + static inline char* trim(const char* str, u32 count, Allocator& allocator = Allocator::GetDefault()) { ULE_TYPES_H_FTAG; u32 length = String::len(str); @@ -348,7 +377,7 @@ public: return (char*) ""; } - char* buffer = (char*) allocator->mallocate(sizeof (char) * (length - 1), allocator->state); + char* buffer = (char*) allocator.mallocate(sizeof (char) * (length - 1), allocator.state); u32 i = 0; for (; i < (length - count); i++) { @@ -360,10 +389,10 @@ public: return buffer; } - static inline char* asciiToLower(const char* str, Allocator* allocator = Allocator::GetDefault()) { + static inline char* asciiToLower(const char* str, Allocator& allocator = Allocator::GetDefault()) { ULE_TYPES_H_FTAG; u32 length = String::len(str); - char* buffer = (char*) allocator->mallocate(sizeof (char) * length + 1, allocator->state); + char* buffer = (char*) allocator.mallocate(sizeof (char) * length + 1, allocator.state); u32 i = 0; for (; i < length; i++) { buffer[i] = String::ASCII_LOWER[str[i]]; @@ -372,10 +401,10 @@ public: return buffer; } - static inline char* asciiToUpper(const char* str, Allocator* allocator = Allocator::GetDefault()) { + static inline char* asciiToUpper(const char* str, Allocator& allocator = Allocator::GetDefault()) { ULE_TYPES_H_FTAG; u32 length = String::len(str); - char* buffer = (char*) allocator->mallocate(sizeof (char) * length + 1, allocator->state); + char* buffer = (char*) allocator.mallocate(sizeof (char) * length + 1, allocator.state); u32 i = 0; for (; i < length; i++) { buffer[i] = String::ASCII_LOWER[str[i]]; @@ -384,12 +413,12 @@ public: return buffer; } - static inline char* concat(const char* str1, const char* str2, Allocator* allocator = Allocator::GetDefault()) { + static inline char* concat(const char* str1, const char* str2, Allocator& allocator = Allocator::GetDefault()) { ULE_TYPES_H_FTAG; u32 l1 = String::len(str1); u32 l2 = String::len(str2); u32 newLength = l1 + l2; - char* newBuffer = (char*) allocator->mallocate(sizeof (char) * newLength + 1, allocator->state); + char* newBuffer = (char*) allocator.mallocate(sizeof (char) * newLength + 1, allocator.state); u32 i = 0; for (; i < newLength; i++) { if (i < l1) { @@ -419,9 +448,9 @@ public: return String::write(dest, src, length); } - static inline char* read(const char* buffer, u32 length, Allocator* allocator = Allocator::GetDefault()) { + static inline char* read(const char* buffer, u32 length, Allocator& allocator = Allocator::GetDefault()) { ULE_TYPES_H_FTAG; - char* tk = (char*) allocator->mallocate(sizeof (char) * length + 1, allocator->state); + char* tk = (char*) allocator.mallocate(sizeof (char) * length + 1, allocator.state); u32 i = 0; while (i < length) { tk[i] = *(buffer + i); @@ -431,6 +460,12 @@ public: return tk; } + //--------------------------------------------------------------- + // Begin String class type + static inline void SetStringClassAllocator(Allocator* allocator) { + _stringClassAllocator = allocator; + } + char* Data; // Point to LocalBuf() or heap allocated int Capacity : 21; // Max 2 MB int LocalBufSize : 10; // Max 1023 bytes @@ -469,7 +504,7 @@ public: inline void set_ref(const char* src) { ULE_TYPES_H_FTAG; if (Owned && !is_using_local_buf()) - STR_MEMFREE(Data); + _stringClassAllocator->free(Data, null); Data = src ? (char*)src : EmptyBuffer; Capacity = 0; Owned = 0; @@ -513,7 +548,7 @@ public: inline void clear() { ULE_TYPES_H_FTAG; if (Owned && !is_using_local_buf()) - STR_MEMFREE(Data); + _stringClassAllocator->free(Data, null); if (LocalBufSize) { Data = local_buf(); Data[0] = '\0'; @@ -539,7 +574,7 @@ public: new_capacity = LocalBufSize; } else { // Disowned or LocalBuf -> Heap - new_data = (char*)STR_MEMALLOC((size_t)new_capacity * sizeof(char)); + new_data = (char*)_stringClassAllocator->mallocate((size_t)new_capacity * sizeof(char), null); } // string in Data might be longer than new_capacity if it wasn't owned, don't copy too much @@ -551,7 +586,7 @@ public: new_data[new_capacity - 1] = 0; if (Owned && !is_using_local_buf()) - STR_MEMFREE(Data); + _stringClassAllocator->free(Data, null); Data = new_data; Capacity = new_capacity; @@ -565,7 +600,7 @@ public: return; if (Owned && !is_using_local_buf()) - STR_MEMFREE(Data); + _stringClassAllocator->free(Data, null); if (new_capacity < LocalBufSize) { // Disowned -> LocalBuf @@ -573,7 +608,7 @@ public: Capacity = LocalBufSize; } else { // Disowned or LocalBuf -> Heap - Data = (char*)STR_MEMALLOC((size_t)new_capacity * sizeof(char)); + Data = (char*)_stringClassAllocator->mallocate((size_t)new_capacity * sizeof(char), null); Capacity = new_capacity; } Owned = 1; @@ -585,9 +620,9 @@ public: int new_capacity = length() + 1; if (Capacity <= new_capacity) return; - char* new_data = (char*)STR_MEMALLOC((size_t)new_capacity * sizeof(char)); + char* new_data = (char*)_stringClassAllocator->mallocate((size_t)new_capacity * sizeof(char), null); memcpy(new_data, Data, (size_t)new_capacity); - STR_MEMFREE(Data); + _stringClassAllocator->free(Data, null); Data = new_data; Capacity = new_capacity; } @@ -725,7 +760,7 @@ public: inline ~String() { if (Owned && !is_using_local_buf()) - STR_MEMFREE(Data); + if (_stringClassAllocator->free != null) ::_stringClassAllocator->free(Data, null); } protected: @@ -831,12 +866,12 @@ public: new_data = (char*)this + sizeof(String); \ new_capacity = LocalBufSize; \ } else { \ - new_data = (char*)STR_MEMALLOC((size_t)new_capacity * sizeof(char)); \ + new_data = (char*)_stringClassAllocator->mallocate((size_t)new_capacity * sizeof(char), null); \ } \ strncpy(new_data, Data, (size_t)new_capacity - 1); \ new_data[new_capacity - 1] = 0; \ if (Owned && !is_using_local_buf()) \ - STR_MEMFREE(Data); \ + _stringClassAllocator->free(Data, null); \ Data = new_data; \ Capacity = new_capacity; \ Owned = 1; \ @@ -846,14 +881,14 @@ public: if (new_capacity <= Capacity) \ return; \ if (Owned && !is_using_local_buf()) \ - STR_MEMFREE(Data); \ + _stringClassAllocator->free(Data, null); \ if (new_capacity < LocalBufSize) { \ Data = (char*)this + sizeof(String); \ Capacity = LocalBufSize; \ } else { \ while (Capacity < new_capacity) { \ Capacity = (s32)(Capacity * 1.5f); \ - Data = (char*) STR_MEMALLOC((size_t) Capacity * sizeof(char)); \ + Data = (char*) _stringClassAllocator->mallocate((size_t) Capacity * sizeof(char), null); \ } \ } \ Owned = 1; \ diff --git a/table.hpp b/table.hpp index cf719ff..fabb639 100644 --- a/table.hpp +++ b/table.hpp @@ -1,4 +1,5 @@ +#pragma once #ifndef ULE_TABLE_H #define ULE_TABLE_H @@ -245,6 +246,11 @@ struct Table { return (Table*) pMalloc(sizeof(Table)); } + float LoadFactor() { + ULE_TYPES_H_FTAG; + return ((float)this->length)/((float)this->lanes); + } + V insert(const char* key, u32 keyLength, V value) { ULE_TYPES_H_FTAG; TableEntry* entry = this->lookup(key, keyLength); @@ -295,7 +301,7 @@ struct Table { // do not set |freeValues| to true unless the template parameter 'T' is a pointer, // and the table is responsible for freeing the memory. - void clear(bool freeValues = false) { + void clear(bool freeValues = false, bool freeKeys = true) { ULE_TYPES_H_FTAG; for (u32 i = 0; i < this->lanes; i++) { TableEntry** lane = &this->entries[i]; @@ -304,7 +310,7 @@ struct Table { while (entry != null) { auto next = entry->next; - pFree(entry->key); + if (freeKeys) pFree(entry->key); if (freeValues) { // @HACK - it's only relevant to free the value if it's an owned pointer // (the table is effectively 'responsible' for that memory) @@ -337,7 +343,8 @@ struct Table { void traverse(const std::function *)>& entryCallback) { ULE_TYPES_H_FTAG; - for (u32 i = 0; i < this->lanes; i++) { + const auto lanes = this->lanes; + for (u32 i = 0; i < lanes; i++) { TableEntry* entry = this->entries[i]; while (entry != null) { @@ -437,7 +444,7 @@ static void deserialize(char** buffer, Table** table) { // a key size of 16, 32, or 64. // //================================================================================ -//#include +#include template ::type* = nullptr> static inline bool fixedKeySizeMemEq(u8* m1, u8* m2) { ULE_TYPES_H_FTAG; @@ -515,21 +522,105 @@ struct FixedKeySizeTableEntry { V value; }; +template +struct DenseHashSetEntry { + +}; + +template +struct DenseHashSet { + enum ControlByte { + EMPTY = -128, // 0b10000000 // indicates a free slot, and a place to stop probing + DELETED = -2, // 0b11111110 // indicates there is no value here, but you should keep probing + SENTINEL = -1, // 0b11111111 // indicates end of stream + //FULL = // 0b0xxxxxxx // indicates an occupied slot + }; + + u8* metadata; // parallel array to 'values', each byte is a 'ControlByte'. + u32 length; + u32 capacity; + V* values; + Allocator* allocator; + + // a 'group' represents a capacity for 16 elements in the table. the actual capacity will be |_numGroups * 16| + // we use some SSE instructions that operate on chunks of 16 at a time, so we need this constraint. + DenseHashSet(u32 _numGroups = 4, Allocator* _allocator = &Allocator::GetDefault()) { + ULE_TYPES_H_FTAG; + this->length = 0; + this->capacity = _numGroups; + + const size_t valueSize = sizeof(T) * this->capacity * 16; + const size_t metadataSize = sizeof(u8) * this->capacity * 16; + this->values = (T*) _allocator->mallocate(valueSize, _allocator->state); + this->metadata = (u8*) _allocator->mallocate(metadataSize, _allocator->state); + + String::memset(this->values, '\0', valueSize); + String::memset(this->metadata, (u8) DenseHashSet::EMPTY, metadataSize); + } + + float LoadFactor() { + ULE_TYPES_H_FTAG; + return ((float)this->length)/((float)this->lanes); + } + + u32 H1(u32 h) { + ULE_TYPES_H_FTAG; + return h >> 7; + } + + u8 H2(u32 h) { + ULE_TYPES_H_FTAG; + return (u8) (h & 0x7F); + } + + u32 Match(u8 h2) const { + auto match = _mm_set1_epi8(h2); + return _mm_movemask_epi8(_mm_cmpeq_epi8(match, metadata)); + } + + V Insert(const char* key, size_t keyLength, V value) { + ULE_TYPES_H_FTAG; + // @TODO check if we need to resize + } + + bool Lookup(const char* key, size_t keyLength, V* outValue) { + ULE_TYPES_H_FTAG; + u32 h = APHash(key, keyLength); + size_t group = this->H1(h) % this->capacity; + while (1) { + u8* g = this->metadata + group * 16; + u32 mask = this->Match(this->H2(h)); + for (u32 i = 0; i < 32; i++) { + u32 idx = mask & (1 << i); + } + if (mask == 0) return false; + group = (group + 1) % this->capacity; + } + } +}; + template struct FixedKeySizeTable { + FixedKeySizeTableEntry** entries; u32 lanes; u32 length; - FixedKeySizeTableEntry** entries; + Allocator* allocator; - FixedKeySizeTable(u32 _lanes = 16) { + FixedKeySizeTable(u32 _lanes = 16, Allocator* allocator = &Allocator::GetDefault()) { ULE_TYPES_H_FTAG; this->lanes = _lanes; this->length = 0; - this->entries = (FixedKeySizeTableEntry**) pCalloc(sizeof(FixedKeySizeTableEntry*), this->lanes); + this->allocator = allocator; + this->entries = (FixedKeySizeTableEntry**) allocator->callocate(sizeof(FixedKeySizeTableEntry*), this->lanes, null); } void* operator new(size_t size) { ULE_TYPES_H_FTAG; - return (FixedKeySizeTable*) pMalloc(sizeof(FixedKeySizeTable)); + return (FixedKeySizeTable*) this->allocator->mallocate(sizeof(FixedKeySizeTable), null); + } + + float LoadFactor() { + ULE_TYPES_H_FTAG; + return ((float)this->length)/((float)this->lanes); } V insert(const char* key, V value) { @@ -537,7 +628,7 @@ struct FixedKeySizeTable { FixedKeySizeTableEntry* entry = this->lookup(key); if (!entry) { // no entry with that key exists - entry = (FixedKeySizeTableEntry*) pCalloc(sizeof(FixedKeySizeTableEntry), 1); + entry = (FixedKeySizeTableEntry*) this->allocator->callocate(sizeof(FixedKeySizeTableEntry), 1, null); String::write((char*)entry->key, key, KEY_SIZE); entry->value = value; @@ -604,12 +695,12 @@ struct FixedKeySizeTable { #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wint-to-void-pointer-cast" #endif - pFree((void*) entry->value); + if (this->allocator->free) this->allocator->free((void*) entry->value, null); #ifdef __clang__ #pragma clang diagnostic pop #endif } - pFree(entry); + if (this->allocator->free) this->allocator->free(entry, null); entry = next; } @@ -622,9 +713,9 @@ struct FixedKeySizeTable { void traverse(const std::function *)>& entryCallback) { ULE_TYPES_H_FTAG; - for (u32 i = 0; i < this->lanes; i++) { + const auto lanes = this->lanes; + for (u32 i = 0; i < lanes; i++) { FixedKeySizeTableEntry* entry = this->entries[i]; - while (entry != null) { entryCallback(entry); entry = entry->next;