ule/table.hpp


#ifndef TABLE_H
#define TABLE_H

#include <new> // new
#include <functional> // std::function for traversal
#include <type_traits> // std::enable_if

#include "alloc.h"
#include "string.h"
#include "types.h"


// what follows is a collection of hash functions taken from: https://www.partow.net/programming/hashfunctions/#:~:text=The%20hash%20functions%20in%20this,containers%20such%20as%20hash%2Dtables.
//
// Available Hash Functions
// The General Hash Functions Library has the following mix of additive and rotative general purpose string hashing algorithms. The following algorithms vary in usefulness and functionality and are mainly intended as an example for learning how hash functions operate and what they basically look like in code form.
//
// 00 - RS Hash Function
// A simple hash function from Robert Sedgwicks Algorithms in C book. I've added some simple optimizations to the algorithm in order to speed up its hashing process.
#if 1
static unsigned int RSHash(const char* str, unsigned int length)
{
   unsigned int b    = 378551;
   unsigned int a    = 63689;
   unsigned int hash = 0;
   unsigned int i    = 0;

   for (i = 0; i < length; ++str, ++i)
   {
      hash = hash * a + (*str);
      a    = a * b;
   }

   return hash;
}
#endif

// 01 - JS Hash Function
// A bitwise hash function written by Justin Sobel
#if 1
static unsigned int JSHash(const char* str, unsigned int length)
{
   unsigned int hash = 1315423911;
   unsigned int i    = 0;

   for (i = 0; i < length; ++str, ++i)
   {
      hash ^= ((hash << 5) + (*str) + (hash >> 2));
   }

   return hash;
}
#endif 

// 02 - PJW Hash Function
// This hash algorithm is based on work by Peter J. Weinberger of Renaissance Technologies. The book Compilers (Principles, Techniques and Tools) by Aho, Sethi and Ulman, recommends the use of hash functions that employ the hashing methodology found in this particular algorithm.
#if 1
static unsigned int PJWHash(const char* str, unsigned int length)
{
   const unsigned int BitsInUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8);
   const unsigned int ThreeQuarters     = (unsigned int)((BitsInUnsignedInt  * 3) / 4);
   const unsigned int OneEighth         = (unsigned int)(BitsInUnsignedInt / 8);
   const unsigned int HighBits          =
                      (unsigned int)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);
   unsigned int hash = 0;
   unsigned int test = 0;
   unsigned int i    = 0;

   for (i = 0; i < length; ++str, ++i)
   {
      hash = (hash << OneEighth) + (*str);

      if ((test = hash & HighBits) != 0)
      {
         hash = (( hash ^ (test >> ThreeQuarters)) & (~HighBits));
      }
   }

   return hash;
}
#endif

// 03 - ELF Hash Function
// Similar to the PJW Hash function, but tweaked for 32-bit processors. It is a widley used hash function on UNIX based systems.
#if 1
static unsigned int ELFHash(const char* str, unsigned int length)
{
   unsigned int hash = 0;
   unsigned int x    = 0;
   unsigned int i    = 0;

   for (i = 0; i < length; ++str, ++i)
   {
      hash = (hash << 4) + (*str);

      if ((x = hash & 0xF0000000L) != 0)
      {
         hash ^= (x >> 24);
      }

      hash &= ~x;
   }

   return hash;
}
#endif

// 04 - BKDR Hash Function
// This hash function comes from Brian Kernighan and Dennis Ritchie's book "The C Programming Language". It is a simple hash function using a strange set of possible seeds which all constitute a pattern of 31....31...31 etc, it seems to be very similar to the DJB hash function.
//
// @NOTE (nick) - this is basically the one I was using before I found this resource with all these other hash functions. (I took it from K&R)
// except, the seed in the version I used was '31', not '131'.
#if 1
static unsigned int BKDRHash(const char* str, unsigned int length)
{
   unsigned int seed = 131; /* 31 131 1313 13131 131313 etc.. */
   unsigned int hash = 0;
   unsigned int i    = 0;

   for (i = 0; i < length; ++str, ++i)
   {
      hash = (hash * seed) + (*str);
   }

   return hash;
}
#endif

// 05 - SDBM Hash Function
// This is the algorithm of choice which is used in the open source SDBM project. The hash function seems to have a good over-all distribution for many different data sets. It seems to work well in situations where there is a high variance in the MSBs of the elements in a data set.
#if 1
static unsigned int SDBMHash(const char* str, unsigned int length)
{
   unsigned int hash = 0;
   unsigned int i    = 0;

   for (i = 0; i < length; ++str, ++i)
   {
      hash = (*str) + (hash << 6) + (hash << 16) - hash;
   }

   return hash;
}
#endif 

// 06 - DJB Hash Function
// An algorithm produced by Professor Daniel J. Bernstein and shown first to the world on the usenet newsgroup comp.lang.c. It is one of the most efficient hash functions ever published.
#if 1
static unsigned int DJBHash(const char* str, unsigned int length)
{
   unsigned int hash = 5381;
   unsigned int i    = 0;

   for (i = 0; i < length; ++str, ++i)
   {
      hash = ((hash << 5) + hash) + (*str);
   }

   return hash;
}
#endif

// 07 - DEK Hash Function
// An algorithm proposed by Donald E. Knuth in The Art Of Computer Programming Volume 3, under the topic of sorting and search chapter 6.4.
#if 1
static unsigned int DEKHash(const char* str, unsigned int length)
{
   unsigned int hash = length;
   unsigned int i    = 0;

   for (i = 0; i < length; ++str, ++i)
   {
      hash = ((hash << 5) ^ (hash >> 27)) ^ (*str);
   }

   return hash;
}
#endif

// 08 - AP Hash Functionhing The Project Gutenberg Etext of Webster's Unabridged Dictionary, the longest encountered chain length was 7, the average chain length was 2, the number of empty buckets was 4579.
// An algorithm produced by me Arash Partow. I took ideas from all of the above hash functions making a hybrid rotative and additive hash function algorithm. There isn't any real mathematical analysis explaining why one should use this hash function instead of the others described above other than the fact that I tired to resemble the design as close as possible to a simple LFSR. An empirical result which demonstrated the distributive abilities of the hash algorithm was obtained using a hash-table with 100003 buckets, hashing The Project Gutenberg Etext of Webster's Unabridged Dictionary, the longest encountered chain length was 7, the average chain length was 2, the number of empty buckets was 4579.
// @NOTE(nick) - given the test case above of the whole english dictionary, I think this one makes sense for a general purpose, unknown-length key hash table.
#if 1
static unsigned int APHash(const char* str, unsigned int length)
{
   unsigned int hash = 0xAAAAAAAA;
   unsigned int i    = 0;

   for (i = 0; i < length; ++str, ++i)
   {
      hash ^= ((i & 1) == 0) ? (  (hash <<  7) ^ (*str) * (hash >> 3)) :
                               (~((hash << 11) + ((*str) ^ (hash >> 5))));
   }

   return hash;
}
#endif

static inline u32 fastModuloReductionDanielLemire(u32 v, u32 c) {
    return (((u64)v) * ((u64)c)) >> 32;
}

static inline u32 hash(const char* key, u32 keyLength, u32 capacity) {
    TYPES_H_FTAG;

    u32 value = APHash(key, keyLength);

    return fastModuloReductionDanielLemire(value, capacity);
}

template <typename V>
struct TableEntry {
    TableEntry<V>* next;

    const char* key;
    u32 keyLength;
    V value;
};

template <typename T>
static void serialize(String* str, TableEntry<T>* entry) {
    serialize(str, entry->key);
    serialize(str, entry->keyLength);
    serialize(str, entry->value);
}

template <typename V>
struct Table {
    u32 lanes;
    u32 length;
    TableEntry<V>** entries;

    Table<V>(u32 _lanes = 16) {
        TYPES_H_FTAG;
        this->lanes = _lanes;
        this->length = 0;
        this->entries = (TableEntry<V>**) pCalloc(sizeof(TableEntry<V>*), this->lanes);
    }
    void* operator new(size_t size) {
        TYPES_H_FTAG;
        return (Table<V>*) pMalloc(sizeof(Table<V>));
    }

    V insert(const char* key, u32 keyLength, V value) {
        TYPES_H_FTAG;
        TableEntry<V>* entry = this->lookup(key, keyLength);

        if (!entry) { // no entry with that key exists
            entry = (TableEntry<V>*) pMalloc(sizeof (TableEntry<V>));
            entry->key = String::cpy(key, keyLength);
            entry->keyLength = keyLength;
            entry->value = value;

            u32 hashValue = hash(key, keyLength, lanes);
            entry->next = entries[hashValue];
            entries[hashValue] = entry;
            this->length++;

            return (V) 0;

        } else { // entry already exists, replace its value
                 // pFree(entry->value); // @NOTE how to cleanup if overwriting an owned pointer?

            V oldValue = entry->value;
            entry->value = value;
            return oldValue;
        }
    }

    TableEntry<V>* lookup(const char* key, u32 keyLength) {
        TYPES_H_FTAG;
        TableEntry<V>* entry = this->entries[hash(key, keyLength, lanes)];

        for (; entry != null; entry = entry->next) {
            if (String::memeq((unsigned char*)key, keyLength, (unsigned char*)entry->key, entry->keyLength)) {
                return entry;
            }
        }

        return null;
    }

    V lookupWithDefault(const char* key, u32 keyLength, V defaultValue) {
        TYPES_H_FTAG;
        auto entry = this->lookup(key, keyLength);

        if (entry == null) return defaultValue;

        return entry->value;
    }

    // do not set |freeValues| to true unless the template parameter 'T' is a pointer,
    // and the table is responsible for freeing the memory.
    void clear(bool freeValues = false) {
        TYPES_H_FTAG;
        for (u32 i = 0; i < this->lanes; i++) {
            TableEntry<V>** lane = &this->entries[i];
            TableEntry<V>* entry = *lane;

            while (entry != null) {
                auto next = entry->next;

                pFree(entry->key);
                if (freeValues) {
                    // @HACK - it's only relevant to free the value if it's an owned pointer
                    // (the table is effectively 'responsible' for that memory)
                    // but you may have 'V' be a non-pointer value entirely, causing this cast to 
                    // be nonsensical/a bug in other cases.
                    //
                    // make sure you know what you're doing when you set |freeValues| to |true|.
                    //
                    // there's probably a 'better' way to do this using C++ template voodoo,
                    // but I have no interest in digging myself a deeper grave there.
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wint-to-void-pointer-cast"
#endif
                    pFree((void*) entry->value);
#ifdef __clang__
#pragma clang diagnostic pop
#endif
                }
                pFree(entry);

                entry = next;
            }

            *lane = null;
        }

        this->length = 0;
    }

    void traverse(const std::function <void (TableEntry<V>*)>& entryCallback) {
        TYPES_H_FTAG;
        for (u32 i = 0; i < this->lanes; i++) {
            TableEntry<V>* entry = this->entries[i];

            while (entry != null) {
                entryCallback(entry);
                entry = entry->next;
            }
        }
    }
};

template <typename T>
static void serialize(String* str, Table<T> table) {
    TYPES_H_FTAG;
    serialize(str, table.lanes);
    serialize(str, table.length);
    for (u32 i = 0; i < table.lanes; i++) {
        TableEntry<T>* entry = table.entries[i];

        while (entry) {
            serialize(str, entry);

            entry = entry->next;
        }
    }
}

template <typename T>
static void serialize(String* str, Table<T>* table) {
    TYPES_H_FTAG;
    SERIALIZE_HANDLE_NULL(str, table);
    serialize(str, table->lanes);
    serialize(str, table->length);
    for (u32 i = 0; i < table->lanes; i++) {
        TableEntry<T>* entry = table->entries[i];

        while (entry) {
            serialize(str, entry);

            entry = entry->next;
        }
    }
}

template <typename T>
static void deserialize(char** buffer, Table<T>* table) {
    TYPES_H_FTAG;
    deserialize(buffer, &table->lanes);
    u32 length;
    deserialize(buffer, &length);
    for (u32 i = 0; i < length; i++) {
        TableEntry<T> entry;
        deserialize(buffer, &entry.key);
        deserialize(buffer, &entry.keyLength);
        deserialize(buffer, &entry.value);
        table->insert(entry.key, entry.keyLength, entry.value);
        pFree(entry.key);
    }
    table->length = length;
}

template <typename T>
static void deserialize(char** buffer, Table<T>** table) {
    TYPES_H_FTAG;
    DESERIALIZE_HANDLE_NULL(buffer, table);
    u32 lanes;
    deserialize(buffer, &lanes);
    Table<T>* _table = new Table<T>(lanes);
    u32 length;
    deserialize(buffer, &length);
    for (u32 i = 0; i < length; i++) {
        TableEntry<T> entry;
        deserialize(buffer, &entry.key);
        deserialize(buffer, &entry.keyLength);
        deserialize(buffer, &entry.value);
        _table->insert(entry.key, entry.keyLength, entry.value);
        pFree(entry.key);
    }
    _table->length = length;
    *table = _table;
}

//================================================================================ 
// Fixed-key size table.
//
// Sometimes, you want a hash table and you know for a fact how big the keys will
// be at maximum.
//
// This commonly happens when the keys actually aren't strings, but are integers,
// or other packed values.
//
// You can use this instead in that case, and avoid keeping certain memory and
// some work the more generic table has to deal with.
//
// You'll enjoy some speedup from MMX/SSE/AVX if they are supported and you use
// a key size of 16, 32, or 64.
//
//================================================================================ 
//#include <mmintrin.h>
template <size_t KEY_SIZE, typename std::enable_if<KEY_SIZE == 64>::type* = nullptr>
static inline bool fixedKeySizeMemEq(u8* m1, u8* m2) {
    TYPES_H_FTAG;

    // AVX512:
    //__mmask32 result = _mm512_cmpeq_epi16_mask (*((__m512i*)m1), *((__m512i*)m2));

    //sse4.2:
    //int result = 0;
    //for (u32 i = 0; i < 4; i++) {
    //    __m128i s1 = *((__m128i*)(m1+(i*16)));
    //    __m128i s2 = *((__m128i*)(m2+(i*16)));
    //    result = _mm_cmpistro(s1, s2, _SIDD_UBYTE_OPS);
    //}

    // MMX: (this one is barely nanoseconds (~1-10ns) faster than String::memeq)
    //for (u32 i = 0; i < 4; i++) {
    //    u32 ii = i*16;
    //    __m64 s1 = *((__m64*)(m1+ii));
    //    __m64 s2 = *((__m64*)(m2+ii));
    //    __m64 result = _mm_cmpeq_pi32(s1, s2);
    //    if (((u64)result) != ~0ULL) {
    //        return false;
    //    }
    //}
    //return true;

    return String::memeq(m1, m2, KEY_SIZE);
}
template <size_t KEY_SIZE, typename std::enable_if<KEY_SIZE == 32>::type* = nullptr>
static inline bool fixedKeySizeMemEq(u8* m1, u8* m2) {
    TYPES_H_FTAG;
    //sse4.2:
    //int result = 0;
    //for (u32 i = 0; i < 4; i++) {
    //    __m128i s1 = *((__m128i*)(m1+(i*16)));
    //    __m128i s2 = *((__m128i*)(m2+(i*16)));
    //    result = _mm_cmpistro(s1, s2, _SIDD_UBYTE_OPS);
    //}

    // MMX: (this one is barely nanoseconds (~1-10ns) faster than String::memeq)
    //for (u32 i = 0; i < 2; i++) {
    //    u32 ii = i*16;
    //    __m64 s1 = *((__m64*)(m1+ii));
    //    __m64 s2 = *((__m64*)(m2+ii));
    //    __m64 result = _mm_cmpeq_pi32(s1, s2);
    //    if (((u64)result) != ~0ULL) {
    //        return false;
    //    }
    //}
    //return true;

    return String::memeq(m1, m2, KEY_SIZE);
}
template <size_t KEY_SIZE, typename std::enable_if<KEY_SIZE == 16>::type* = nullptr>
static inline bool fixedKeySizeMemEq(u8* m1, u8* m2) {
    TYPES_H_FTAG;
    // MMX: (this one is barely nanoseconds (~1-10ns) faster than String::memeq)
    //__m64 result = _mm_cmpeq_pi32(*((__m64*)m1), *((__m64*)m2));
    //return ((u64)result) == ~0ULL;

    return String::memeq(m1, m2, KEY_SIZE);
}
template <size_t KEY_SIZE, typename std::enable_if<KEY_SIZE != 64 && KEY_SIZE != 32 && KEY_SIZE != 16>::type* = nullptr>
static inline bool fixedKeySizeMemEq(u8* m1, u8* m2) {
    TYPES_H_FTAG;
    return String::memeq(m1, m2, KEY_SIZE);
}

template <size_t KEY_SIZE, typename V>
struct FixedKeySizeTableEntry {
    FixedKeySizeTableEntry<KEY_SIZE, V>* next;

    const char key[KEY_SIZE];
    V value;
};

template <size_t KEY_SIZE, typename V>
struct FixedKeySizeTable {
    u32 lanes;
    u32 length;
    FixedKeySizeTableEntry<KEY_SIZE, V>** entries;

    FixedKeySizeTable<KEY_SIZE, V>(u32 _lanes = 16) {
        TYPES_H_FTAG;
        this->lanes = _lanes;
        this->length = 0;
        this->entries = (FixedKeySizeTableEntry<KEY_SIZE, V>**) pCalloc(sizeof(FixedKeySizeTableEntry<KEY_SIZE, V>*), this->lanes);
    }
    void* operator new(size_t size) {
        TYPES_H_FTAG;
        return (FixedKeySizeTable<KEY_SIZE, V>*) pMalloc(sizeof(FixedKeySizeTable<KEY_SIZE, V>));
    }

    V insert(const char* key, V value) {
        TYPES_H_FTAG;
        FixedKeySizeTableEntry<KEY_SIZE, V>* entry = this->lookup(key);

        if (!entry) { // no entry with that key exists
            entry = (FixedKeySizeTableEntry<KEY_SIZE, V>*) pCalloc(sizeof(FixedKeySizeTableEntry<KEY_SIZE, V>), 1);
            String::write((char*)entry->key, key, KEY_SIZE);
            entry->value = value;

            u32 hashValue = hash(key, KEY_SIZE, lanes);
            entry->next = entries[hashValue];
            entries[hashValue] = entry;
            this->length++;

            return (V) 0;

        } else { // entry already exists, replace its value
                 // pFree(entry->value); // @NOTE how to cleanup if overwriting an owned pointer?

            V oldValue = entry->value;
            entry->value = value;
            return oldValue;
        }
    }

    FixedKeySizeTableEntry<KEY_SIZE, V>* lookup(const char* key) {
        TYPES_H_FTAG;
        FixedKeySizeTableEntry<KEY_SIZE, V>* entry = this->entries[hash(key, KEY_SIZE, lanes)];

        for (; entry != null; entry = entry->next) {
            if (fixedKeySizeMemEq<KEY_SIZE>((unsigned char*)key, (unsigned char*)entry->key)) {
                return entry;
            }
        }

        return null;
    }

    V lookupWithDefault(const char* key, V defaultValue) {
        TYPES_H_FTAG;
        auto entry = this->lookup(key);

        if (entry == null) return defaultValue;

        return entry->value;
    }

    // do not set |freeValues| to true unless the template parameter 'T' is a pointer,
    // and the table is responsible for freeing the memory.
    void clear(bool freeValues = false) {
        TYPES_H_FTAG;
        for (u32 i = 0; i < this->lanes; i++) {
            FixedKeySizeTableEntry<KEY_SIZE, V>** lane = &this->entries[i];
            FixedKeySizeTableEntry<KEY_SIZE, V>* entry = *lane;

            while (entry != null) {
                auto next = entry->next;

                if (freeValues) {
                    // @HACK - it's only relevant to free the value if it's an owned pointer
                    // (the table is effectively 'responsible' for that memory)
                    // but you may have 'V' be a non-pointer value entirely, causing this cast to 
                    // be nonsensical/a bug in other cases.
                    //
                    // make sure you know what you're doing when you set |freeValues| to |true|.
                    //
                    // there's probably a 'better' way to do this using C++ template voodoo,
                    // but I have no interest in digging myself a deeper grave there.
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wint-to-void-pointer-cast"
#endif
                    pFree((void*) entry->value);
#ifdef __clang__
#pragma clang diagnostic pop
#endif
                }
                pFree(entry);

                entry = next;
            }

            *lane = null;
        }

        this->length = 0;
    }

    void traverse(const std::function <void (FixedKeySizeTableEntry<KEY_SIZE, V>*)>& entryCallback) {
        TYPES_H_FTAG;
        for (u32 i = 0; i < this->lanes; i++) {
            FixedKeySizeTableEntry<KEY_SIZE, V>* entry = this->entries[i];

            while (entry != null) {
                entryCallback(entry);
                entry = entry->next;
            }
        }
    }
};

//================================================================================ 
// a better explaination of cache tables than I could possibly do in a comment:
// https://fgiesen.wordpress.com/2019/02/11/cache-tables/
struct CacheTableEntry {
    char* key;
    u32 keyLength;
    void* value;
};

struct CacheTable {
    u32 n, p;
    CacheTableEntry* entries; // n and p are the dimensions of the array. n is first.

    CacheTable(u32 _n = 8, u32 _p = 8) {
        TYPES_H_FTAG;
        this->n       = _n;
        this->p       = _p;
        this->entries = (CacheTableEntry*) pCalloc(this->n*this->p, sizeof(CacheTableEntry));
    }

    void* insert(const char* key, u32 keyLength, void* value) {
        TYPES_H_FTAG;

        CacheTableEntry* row = this->entries + hash(key, keyLength, this->n) * this->n;
        // We're going to insert in 'row'. We need some policy to decide which column to evict.
        // The (stupid) choice for now, is to just start at column 0, and increment every time we insert,
        // regardless of which row we chose.
        static u32 seed = 0;
        // increment, then modulo the number of columns.
        seed = ((u64) (seed+1) * (u64) this->p) >> 32;

        CacheTableEntry* entry = row + seed;
        if (entry->key != null) {
            // the entry was already populated. we have to free the memory for the key 
            // (because we always copy keys on insertion)
            // as well as return the address of the old value we overwrite, so the caller can free it
            // if necessary.
            pFree(entry->key);
            entry->key = String::cpy(key, keyLength);
            entry->keyLength = keyLength;
            void* oldValue = entry->value;
            entry->value = value;
            return oldValue;

        } else {
            entry->key = String::cpy(key, keyLength);
            entry->keyLength = keyLength;
            entry->value = value;
            return null;
        }
    }

    CacheTableEntry* lookup(const char* key, u32 keyLength) {
        TYPES_H_FTAG;

        CacheTableEntry* row = this->entries + hash(key, keyLength, this->n) * this->n;

        for (u32 i = 0; i < this->p; i++) {
            CacheTableEntry* entry = row + i;

            if (String::memeq((unsigned char*)key, keyLength, (unsigned char*)entry->key, entry->keyLength)) {
                return entry;
            }
        }

        return null;
    }

    void clear(bool freeValues = false) {
        TYPES_H_FTAG;

        for (u32 i = 0; i < this->n; i++) {
            CacheTableEntry* row = this->entries + i * this->n;
            for (u32 j = 0; j < this->p; j++) {
                CacheTableEntry* entry = row + j;

                if (entry->key != null) {
                    pFree(entry->key);
                }

                if (freeValues && entry->value != null) {
                    pFree(entry->value);
                }
            }
        }
    }
};

#endif