From 2159ae399a1fe0f9ff813f005b649a010b944bd0 Mon Sep 17 00:00:00 2001
From: gingerBill <ginger.bill.22@gmail.com>
Date: Fri, 27 Nov 2015 00:20:44 +0000
Subject: [PATCH] gb.hpp v0.25 - gb_math.hpp v0.03

0.25  - Faster Heap_Allocator for Windows using HeapAlloc
0.03  - Remove templated min/max/clamp
---
 README.md   |   2 +-
 gb.hpp      | 149 ++++++++++++++++++++++++++++++++++------------------
 gb_math.hpp | 120 ++++++++++++++++++++++++++++++------------
 3 files changed, 186 insertions(+), 85 deletions(-)
diff --git a/README.md b/README.md
index 00b9d05..a05ec2e 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ gb single-file public domain libraries for C &amp; C++
 library         | latest version | category | languages | description
 ----------------|----------------|----------|-----------|-------------
 **gb_string.h** | 0.93           | strings  | C, C++    | A better string library for C & C++
-**gb.hpp**      | 0.24b          | misc     | C++11     | (Experimental) A C++11 helper library without STL geared towards game development
+**gb.hpp**      | 0.25           | misc     | C++11     | (Experimental) A C++11 helper library without STL geared towards game development
 **gb_math.hpp** | 0.02b          | math     | C++11     | A C++11 math library geared towards game development
 **gb_ini.h**    | 0.91a          | misc     | C, C++    | A simple ini file loader library for C & C++
 
diff --git a/gb.hpp b/gb.hpp
index 6c52e17..56237cf 100644
--- a/gb.hpp
+++ b/gb.hpp
@@ -1,4 +1,4 @@
-// gb.hpp - v0.24b - public domain C++11 helper library - no warranty implied; use at your own risk
+// gb.hpp - v0.25 - public domain C++11 helper library - no warranty implied; use at your own risk
 // (Experimental) A C++11 helper library without STL geared towards game development
 
 /*
@@ -218,11 +218,6 @@ CONTENTS:
 	#include <sys/time.h>
 #endif
 
-#ifndef GB_ARRAY_BOUND_CHECKING
-#define GB_ARRAY_BOUND_CHECKING 1
-#endif
-
-
 #ifndef GB_DISABLE_COPY
 #define GB_DISABLE_COPY(Type)   \
 	Type(const Type&) = delete; \
@@ -838,7 +833,7 @@ namespace thread
 Thread make();
 void destroy(Thread* t);
 void start(Thread* t, Thread_Function* func, void* data = nullptr, usize stack_size = 0);
-void stop(Thread* t);
+void join(Thread* t);
 bool is_running(const Thread& t);
 u32  current_id();
 } // namespace thread
@@ -948,10 +943,18 @@ struct Allocator
 /// Allocations are padded with to align them to the desired alignment
 struct Heap_Allocator : Allocator
 {
-	Mutex mutex                 = mutex::make();
+	struct Header
+	{
+		usize size;
+	};
+
 	s64   total_allocated_count = 0;
 	s64   allocation_count      = 0;
 
+#if defined(GB_SYSTEM_WINDOWS)
+	HANDLE heap_handle = HeapCreate(0, 0, 0);
+#endif
+
 	Heap_Allocator() = default;
 	virtual ~Heap_Allocator();
 
@@ -1042,6 +1045,7 @@ void* copy(const void* src, usize bytes, void* dest);
 void* move(const void* src, usize bytes, void* dest);
 bool equals(const void* a, const void* b, usize bytes);
 
+// TODO(bill): Should this be just zero(T*) ???
 template <typename T>
 T* zero_struct(T* ptr);
 
@@ -1049,6 +1053,12 @@ template <typename T>
 T* copy_array(const T* src_array, usize count, T* dest_array);
 
 // TODO(bill): Should I implement something like std::copy, std::fill, std::fill_n ???
+
+template <typename T>
+void swap(T* a, T* b);
+
+template <typename T, usize N>
+void swap(T (& a)[N], T (& b)[N]);
 } // namespace memory
 
 void* alloc(Allocator* a, usize size, usize align = GB_DEFAULT_ALIGNMENT);
@@ -1148,6 +1158,10 @@ void format_uint(u64 value, char* buffer, usize len);
 ///                          ///
 ////////////////////////////////
 
+#ifndef GB_ARRAY_BOUND_CHECKING
+#define GB_ARRAY_BOUND_CHECKING 1
+#endif
+
 /// Dynamic resizable array for POD types only
 template <typename T>
 struct Array
@@ -1281,7 +1295,6 @@ template <typename T> void remove_entry(Hash_Table<T>* h, typename const Hash_Ta
 template <typename T> void remove_all(Hash_Table<T>* h, u64 key);
 } // namespace multi_hash_table
 
-
 ////////////////////////////////
 ///                          ///
 /// Hash                     ///
@@ -1519,9 +1532,9 @@ Array<T>::Array(const Array<T>& other)
 , capacity(0)
 , data(nullptr)
 {
-	const auto n = other.count;
-	array::set_capacity(this, n);
-	memory::copy(other.data, n * sizeof(T), data);
+	const auto count = other.count;
+	array::set_capacity(this, count);
+	memory::copy_array(other.data, count, data);
 	count = n;
 }
 
@@ -1552,9 +1565,9 @@ Array<T>::operator=(const Array<T>& other)
 {
 	if (allocator == nullptr)
 		allocator = other.allocator;
-	const auto n = other.count;
-	array::resize(this, n);
-	memory::copy(other.data, n * sizeof(T), data);
+	const auto count = other.count;
+	array::resize(this, count);
+	memory::copy_count(other.data, count, data);
 	return *this;
 }
 
@@ -1660,7 +1673,7 @@ append(Array<T>* a, const T* items, usize count)
 	if (a->capacity <= a->count + static_cast<s64>(count))
 		array::grow(a, a->count + count);
 
-	memory::copy(items, count * sizeof(T), &a->data[a->count]);
+	memory::copy_array(items, count, &a->data[a->count]);
 	a->count += count;
 }
 
@@ -1711,7 +1724,7 @@ set_capacity(Array<T>* a, usize capacity)
 	if (capacity > 0)
 	{
 		data = alloc_array<T>(a->allocator, capacity);
-		memory::copy(a->data, a->count * sizeof(T), data);
+		memory::copy_array(a->data, a->count, data);
 	}
 	dealloc(a->allocator, a->data);
 	a->data = data;
@@ -2200,6 +2213,23 @@ copy_array(const T* src_array, usize count, T* dest_array)
 {
 	return static_cast<T*>(memory::copy(src_array, count * sizeof(T), dest_array));
 }
+
+template <typename T>
+inline void
+swap(T* a, T* b)
+{
+	T c = __GB_NAMESPACE_START::move(*a);
+	*a  = __GB_NAMESPACE_START::move(*b);
+	*b  = __GB_NAMESPACE_START::move(c);
+}
+
+template <typename T, usize N>
+inline void
+swap(T (& a)[N], T (& b)[N])
+{
+	for (usize i = 0; i < N; i++)
+		math::swap(&a[i], &b[i]);
+}
 } // namespace memory
 
 
@@ -2594,11 +2624,11 @@ make()
 #else
 	t.posix_handle = 0;
 #endif
-	t.function = nullptr;
-	t.data = nullptr;
+	t.function   = nullptr;
+	t.data       = nullptr;
 	t.stack_size = 0;
 	t.is_running = false;
-	t.semaphore = semaphore::make();
+	t.semaphore  = semaphore::make();
 
 	return t;
 }
@@ -2607,7 +2637,7 @@ void
 destroy(Thread* t)
 {
 	if (t->is_running)
-		thread::stop(t);
+		thread::join(t);
 
 	semaphore::destroy(&t->semaphore);
 }
@@ -2677,7 +2707,7 @@ start(Thread* t, Thread_Function* func, void* data, usize stack_size)
 }
 
 void
-stop(Thread* t)
+join(Thread* t)
 {
 	if (!t->is_running)
 		return;
@@ -2705,13 +2735,11 @@ current_id()
 {
 	u32 thread_id;
 
-
 #if defined(GB_SYSTEM_WINDOWS)
 	u8* thread_local_storage = reinterpret_cast<u8*>(__readgsqword(0x30));
 	thread_id = *reinterpret_cast<u32*>(thread_local_storage + 0x48);
 
 #elif defined(GB_SYSTEM_OSX) && defined(GB_ARCH_64_BIT)
-	u32 thread_id;
 	asm("mov %%gs:0x00,%0" : "=r"(thread_id));
 #elif defined(GB_ARCH_32_BIT)
 	asm("mov %%gs:0x08,%0" : "=r"(thread_id));
@@ -2721,7 +2749,6 @@ current_id()
 	#error Unsupported architecture for thread::current_id()
 #endif
 
-
 	return thread_id;
 }
 
@@ -2886,24 +2913,32 @@ free(Data* tmp)
 
 #else
 
-//#define GB_HEAP_ALLOCATOR_HEADER_PAD_VALUE (usize)(-1)
 Heap_Allocator::~Heap_Allocator()
 {
-#if 0
-	GB_ASSERT(allocation_count == 0 && total_allocated() == 0,
-			  "Heap Allocator: allocation count = %lld; total allocated = %lld",
-			  allocation_count, total_allocated());
+#if defined (GB_SYSTEM_WINDOWS)
+	HeapDestroy(heap_handle);
+#else
+
 #endif
 }
 
 void*
 Heap_Allocator::alloc(usize size, usize align)
 {
-	mutex::lock(&mutex);
-	defer (mutex::unlock(&mutex));
-
 	usize total = size + align - (size % align);
+
+#if defined (GB_SYSTEM_WINDOWS)
+	total += sizeof(Header);
+
+	void* data = HeapAlloc(heap_handle, 0, total);
+	Header* h = static_cast<Header*>(data);
+	h->size = total;
+	data = (h + 1);
+
+#else
+	// TODO(bill): Find a better malloc alternative for this platform
 	void* data = malloc(total);
+#endif
 
 	total_allocated_count += total;
 	allocation_count++;
@@ -2917,27 +2952,34 @@ Heap_Allocator::dealloc(const void* ptr)
 	if (!ptr)
 		return;
 
-	mutex::lock(&mutex);
-	defer (mutex::unlock(&mutex));
-
 	total_allocated_count -= this->allocated_size(ptr);
 	allocation_count--;
 
+#if defined (GB_SYSTEM_WINDOWS)
+	ptr = static_cast<const Header*>(ptr) + 1;
+	HeapFree(heap_handle, 0, const_cast<void*>(ptr));
+
+#else
 	::free(const_cast<void*>(ptr));
+
+#endif
 }
 
 inline s64
 Heap_Allocator::allocated_size(const void* ptr)
 {
-	mutex::lock(&mutex);
-	defer (mutex::unlock(&mutex));
-
 #if defined(GB_SYSTEM_WINDOWS)
-	return static_cast<usize>(_msize(const_cast<void*>(ptr)));
+	const Header* h = static_cast<const Header*>(ptr) - 1;
+	return static_cast<usize>(h->size);
+
 #elif defined(GB_SYSTEM_OSX)
 	return static_cast<usize>(malloc_size(ptr));
-#else
+
+#elif defined(GB_SYSTEM_LINUX)
 	return static_cast<usize>(malloc_usable_size(const_cast<void*>(ptr)));
+
+#else
+	#error Implement Heap_Allocator::allocated_size
 #endif
 }
 
@@ -2971,8 +3013,10 @@ Arena_Allocator::~Arena_Allocator()
 	if (backing)
 		backing->dealloc(physical_start);
 
-	GB_ASSERT(total_allocated_count == 0,
-			  "Memory leak of %ld bytes, maybe you forgot to call arena_allocator::clear()?", total_allocated_count);
+	GB_ASSERT(temp_count == 0,
+			  "%ld Temporary_Arena_Memory have not be cleared", temp_count);
+
+	total_allocated_count = 0;
 }
 
 void*
@@ -3035,7 +3079,6 @@ free(Temporary_Arena_Memory* tmp)
 
 #endif
 
-
 ////////////////////////////////
 ///                          ///
 /// Memory                   ///
@@ -3197,17 +3240,19 @@ make(Allocator* a, const void* init_str, Size len)
 {
 	usize header_size = sizeof(string::Header);
 	void* ptr = alloc(a, header_size + len + 1);
+	if (!ptr)
+		return nullptr;
+
 	if (!init_str)
 		memory::zero(ptr, header_size + len + 1);
 
-	if (ptr == nullptr)
-		return nullptr;
-
 	String str = static_cast<char*>(ptr) + header_size;
+
 	string::Header* header = string::header(str);
 	header->allocator = a;
 	header->len = len;
 	header->cap = len;
+
 	if (len && init_str)
 		memory::copy(init_str, len, str);
 	str[len] = '\0';
@@ -3220,10 +3265,11 @@ free(String str)
 {
 	if (str == nullptr)
 		return;
+	
 	string::Header* h = string::header(str);
-	Allocator* a = h->allocator;
-	if (a)
-		dealloc(a, h);
+	
+	if (h->allocator)
+		dealloc(h->allocator, h);
 }
 
 inline String
@@ -3337,7 +3383,7 @@ make_space_for(String* str, Size add_len)
 	if (available >= add_len) // Return if there is enough space left
 		return;
 
-	void* ptr = reinterpret_cast<string::Header*>(*str) - 1;
+	void* ptr = reinterpret_cast<string::Header*>(str) - 1;
 	usize old_size = sizeof(string::Header) + string::length(*str) + 1;
 	usize new_size = sizeof(string::Header) + new_len + 1;
 
@@ -4302,6 +4348,7 @@ __GB_NAMESPACE_END
 
 /*
 Version History:
+	0.25  - Faster Heap_Allocator for Windows using HeapAlloc
 	0.24b - Even More Hash_Table Bug Fixes
 	0.24a - Hash_Table Bug Fixes
 	0.24  - More documentation and bug fixes
diff --git a/gb_math.hpp b/gb_math.hpp
index 664c8ec..0f4cd8a 100644
--- a/gb_math.hpp
+++ b/gb_math.hpp
@@ -1,9 +1,10 @@
-// gb_math.hpp - v0.02b - public domain C++11 math library - no warranty implied; use at your own risk
+// gb_math.hpp - v0.03 - public domain C++11 math library - no warranty implied; use at your own risk
 // A C++11 math library geared towards game development
 // This is meant to be used the gb.hpp library but it doesn't have to be
 
 /*
 Version History:
+	0.03  - Remove templated min/max/clamp
 	0.02b - Typo fixes
 	0.02a - Better `static` keywords
 	0.02  - More Angle Units and templated min/max/clamp/lerp
@@ -95,6 +96,21 @@ CONTENTS:
 	#error This operating system is not supported by gb.hpp
 #endif
 
+
+#if defined(_MSC_VER)
+	// Microsoft Visual Studio
+	#define GB_COMPILER_MSVC 1
+#elif defined(__clang__)
+	// Clang
+	#define GB_COMPILER_CLANG 1
+#elif defined(__GNUC__) || defined(__GNUG__) && !(defined(__clang__) || defined(__INTEL_COMPILER))
+	// GNU GCC/G++ Compiler
+	#define GB_COMPILER_GNU_GCC 1
+#elif defined(__INTEL_COMPILER)
+	// Intel C++ Compiler
+	#define GB_COMPILER_INTEL 1
+#endif
+
 ////////////////////////////////
 ///                          ///
 /// Environment Bit Size     ///
@@ -125,6 +141,7 @@ CONTENTS:
 	#endif
 #endif
 
+
 // TODO(bill): Get this to work
 // #if !defined(GB_LITTLE_EDIAN) && !defined(GB_BIG_EDIAN)
 
@@ -911,26 +928,24 @@ f32 kronecker_delta(f32 i, f32 j);
 #undef min
 #undef max
 
-template <typename T>
-const T& min(const T& a, const T& b);
+f32 min(f32 x, f32 y);
+s32 min(s32 x, s32 y);
+s64 min(s64 x, s64 y);
 
-template <typename T>
-const T& max(const T& a, const T& b);
+f32 max(f32 x, f32 y);
+s32 max(s32 x, s32 y);
+s64 max(s64 x, s64 y);
 
-template <typename T>
-T clamp(const T& x, const T& min, const T& max);
+f32 clamp(f32 x, f32 min, f32 max);
+s32 clamp(s32 x, s32 min, s32 max);
+s64 clamp(s64 x, s64 min, s64 max);
 
+// TODO(bill): Should this be a template or just normal function overloading?
 template <typename T>
 T lerp(const T& x, const T& y, f32 t);
 
 bool equals(f32 a, f32 b, f32 precision = F32_PRECISION);
 
-template <typename T>
-void swap(T* a, T* b);
-
-template <typename T, usize N>
-void swap(T (& a)[N], T (& b)[N]);
-
 // Vector2 functions
 f32 dot(const Vector2& a, const Vector2& b);
 f32 cross(const Vector2& a, const Vector2& b);
@@ -1138,9 +1153,6 @@ f32 perlin_3d(f32 x, f32 y, f32 z, s32 x_wrap = 0, s32 y_wrap = 0, s32 z_wrap =
 
 namespace math
 {
-template <typename T> inline const T& min(const T& a, const T& b) { return a < b ? a : b; }
-template <typename T> inline const T& max(const T& a, const T& b) { return a > b ? a : b; }
-
 template <typename T>
 inline T
 clamp(const T& x, const T& min, const T& max)
@@ -2361,29 +2373,71 @@ kronecker_delta(f32 i, f32 j)
 	return static_cast<f32>(i == j);
 }
 
+inline f32
+min(f32 x, f32 y)
+{
+	// TODO(bill): Check if this is even good
+	return x < y ? x : y;
+}
+
+inline s32
+min(s32 x, s32 y)
+{
+	return y + ((x-y) & (x-y)>>31);
+}
+
+inline s64
+min(s64 x, s64 y)
+{
+	return y + ((x-y) & (x-y)>>63);
+}
+
+inline f32
+max(f32 x, f32 y)
+{
+	// TODO(bill): Check if this is even good
+	return x > y ? x : y;
+}
+
+inline s32
+max(s32 x, s32 y)
+{
+	return x - ((x-y) & (x-y)>>31);
+}
+
+inline s64
+max(s64 x, s64 y)
+{
+	return x - ((x-y) & (x-y)>>63);
+}
+
+inline f32
+clamp(f32 x, f32 min, f32 max)
+{
+	const f32 t = x < min ? min : x;
+	return t > max ? max : t;
+}
+
+inline s32
+clamp(s32 x, s32 min, s32 max)
+{
+	const s32 t = x < min ? min : x;
+	return t > max ? max : t;
+}
+
+inline s64
+clamp(s64 x, s64 min, s64 max)
+{
+	const s64 t = x < min ? min : x;
+	return t > max ? max : t;
+}
+
 inline bool
 equals(f32 a, f32 b, f32 precision)
 {
 	return ((b <= (a + precision)) && (b >= (a - precision)));
 }
 
-template <typename T>
-inline void
-swap(T* a, T* b)
-{
-	T c = __GB_NAMESPACE_PREFIX::move(*a);
-	*a  = __GB_NAMESPACE_PREFIX::move(*b);
-	*b  = __GB_NAMESPACE_PREFIX::move(c);
-}
-
-template <typename T, usize N>
-inline void
-swap(T (& a)[N], T (& b)[N])
-{
-	for (usize i = 0; i < N; i++)
-		math::swap(&a[i], &b[i]);
-}
-
 // Vector2 functions
 inline f32
 dot(const Vector2& a, const Vector2& b)