13 files changed, 24898 insertions, 0 deletions
diff --git a/vendors/cjson/cJSON.c b/vendors/cjson/cJSON.c
new file mode 100644
index 0000000..35452cb
--- /dev/null
+++ b/vendors/cjson/cJSON.c
@@ -0,0 +1,596 @@
+/*
+  Copyright (c) 2009 Dave Gamble
+
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+  THE SOFTWARE.
+*/
+
+/* cJSON */
+/* JSON parser in C. */
+
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include <float.h>
+#include <limits.h>
+#include <ctype.h>
+#include "cJSON.h"
+
+static const char *ep;
+
+const char *cJSON_GetErrorPtr(void) {return ep;}
+
+static int cJSON_strcasecmp(const char *s1,const char *s2)
+{
+	if (!s1) return (s1==s2)?0:1;if (!s2) return 1;
+	for(; tolower(*s1) == tolower(*s2); ++s1, ++s2)	if(*s1 == 0)	return 0;
+	return tolower(*(const unsigned char *)s1) - tolower(*(const unsigned char *)s2);
+}
+
+static void *(*cJSON_malloc)(size_t sz) = malloc;
+static void (*cJSON_free)(void *ptr) = free;
+
+static char* cJSON_strdup(const char* str)
+{
+      size_t len;
+      char* copy;
+
+      len = strlen(str) + 1;
+      if (!(copy = (char*)cJSON_malloc(len))) return 0;
+      memcpy(copy,str,len);
+      return copy;
+}
+
+void cJSON_InitHooks(cJSON_Hooks* hooks)
+{
+    if (!hooks) { /* Reset hooks */
+        cJSON_malloc = malloc;
+        cJSON_free = free;
+        return;
+    }
+
+	cJSON_malloc = (hooks->malloc_fn)?hooks->malloc_fn:malloc;
+	cJSON_free	 = (hooks->free_fn)?hooks->free_fn:free;
+}
+
+/* Internal constructor. */
+static cJSON *cJSON_New_Item(void)
+{
+	cJSON* node = (cJSON*)cJSON_malloc(sizeof(cJSON));
+	if (node) memset(node,0,sizeof(cJSON));
+	return node;
+}
+
+/* Delete a cJSON structure. */
+void cJSON_Delete(cJSON *c)
+{
+	cJSON *next;
+	while (c)
+	{
+		next=c->next;
+		if (!(c->type&cJSON_IsReference) && c->child) cJSON_Delete(c->child);
+		if (!(c->type&cJSON_IsReference) && c->valuestring) cJSON_free(c->valuestring);
+		if (c->string) cJSON_free(c->string);
+		cJSON_free(c);
+		c=next;
+	}
+}
+
+/* Parse the input text to generate a number, and populate the result into item. */
+static const char *parse_number(cJSON *item,const char *num)
+{
+	double n=0,sign=1,scale=0;int subscale=0,signsubscale=1;
+
+	if (*num=='-') sign=-1,num++;	/* Has sign? */
+	if (*num=='0') num++;			/* is zero */
+	if (*num>='1' && *num<='9')	do	n=(n*10.0)+(*num++ -'0');	while (*num>='0' && *num<='9');	/* Number? */
+	if (*num=='.' && num[1]>='0' && num[1]<='9') {num++;		do	n=(n*10.0)+(*num++ -'0'),scale--; while (*num>='0' && *num<='9');}	/* Fractional part? */
+	if (*num=='e' || *num=='E')		/* Exponent? */
+	{	num++;if (*num=='+') num++;	else if (*num=='-') signsubscale=-1,num++;		/* With sign? */
+		while (*num>='0' && *num<='9') subscale=(subscale*10)+(*num++ - '0');	/* Number? */
+	}
+
+	n=sign*n*pow(10.0,(scale+subscale*signsubscale));	/* number = +/- number.fraction * 10^+/- exponent */
+	
+	item->valuedouble=n;
+	item->valueint=(int)n;
+	item->type=cJSON_Number;
+	return num;
+}
+
+/* Render the number nicely from the given item into a string. */
+static char *print_number(cJSON *item)
+{
+	char *str;
+	double d=item->valuedouble;
+	if (fabs(((double)item->valueint)-d)<=DBL_EPSILON && d<=INT_MAX && d>=INT_MIN)
+	{
+		str=(char*)cJSON_malloc(21);	/* 2^64+1 can be represented in 21 chars. */
+		if (str) sprintf(str,"%d",item->valueint);
+	}
+	else
+	{
+		str=(char*)cJSON_malloc(64);	/* This is a nice tradeoff. */
+		if (str)
+		{
+			if (fabs(floor(d)-d)<=DBL_EPSILON && fabs(d)<1.0e60)sprintf(str,"%.0f",d);
+			else if (fabs(d)<1.0e-6 || fabs(d)>1.0e9)			sprintf(str,"%e",d);
+			else												sprintf(str,"%f",d);
+		}
+	}
+	return str;
+}
+
+static unsigned parse_hex4(const char *str)
+{
+	unsigned h=0;
+	if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0;
+	h=h<<4;str++;
+	if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0;
+	h=h<<4;str++;
+	if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0;
+	h=h<<4;str++;
+	if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0;
+	return h;
+}
+
+/* Parse the input text into an unescaped cstring, and populate item. */
+static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+static const char *parse_string(cJSON *item,const char *str)
+{
+	const char *ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc,uc2;
+	if (*str!='\"') {ep=str;return 0;}	/* not a string! */
+	
+	while (*ptr!='\"' && *ptr && ++len) if (*ptr++ == '\\') ptr++;	/* Skip escaped quotes. */
+	
+	out=(char*)cJSON_malloc(len+1);	/* This is how long we need for the string, roughly. */
+	if (!out) return 0;
+	
+	ptr=str+1;ptr2=out;
+	while (*ptr!='\"' && *ptr)
+	{
+		if (*ptr!='\\') *ptr2++=*ptr++;
+		else
+		{
+			ptr++;
+			switch (*ptr)
+			{
+				case 'b': *ptr2++='\b';	break;
+				case 'f': *ptr2++='\f';	break;
+				case 'n': *ptr2++='\n';	break;
+				case 'r': *ptr2++='\r';	break;
+				case 't': *ptr2++='\t';	break;
+				case 'u':	 /* transcode utf16 to utf8. */
+					uc=parse_hex4(ptr+1);ptr+=4;	/* get the unicode char. */
+
+					if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0)	break;	/* check for invalid.	*/
+
+					if (uc>=0xD800 && uc<=0xDBFF)	/* UTF16 surrogate pairs.	*/
+					{
+						if (ptr[1]!='\\' || ptr[2]!='u')	break;	/* missing second-half of surrogate.	*/
+						uc2=parse_hex4(ptr+3);ptr+=6;
+						if (uc2<0xDC00 || uc2>0xDFFF)		break;	/* invalid second-half of surrogate.	*/
+						uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF));
+					}
+
+					len=4;if (uc<0x80) len=1;else if (uc<0x800) len=2;else if (uc<0x10000) len=3; ptr2+=len;
+					
+					switch (len) {
+						case 4: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6;
+						case 3: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6;
+						case 2: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6;
+						case 1: *--ptr2 =(uc | firstByteMark[len]);
+					}
+					ptr2+=len;
+					break;
+				default:  *ptr2++=*ptr; break;
+			}
+			ptr++;
+		}
+	}
+	*ptr2=0;
+	if (*ptr=='\"') ptr++;
+	item->valuestring=out;
+	item->type=cJSON_String;
+	return ptr;
+}
+
+/* Render the cstring provided to an escaped version that can be printed. */
+static char *print_string_ptr(const char *str)
+{
+	const char *ptr;char *ptr2,*out;int len=0;unsigned char token;
+	
+	if (!str) return cJSON_strdup("");
+	ptr=str;while ((token=*ptr) && ++len) {if (strchr("\"\\\b\f\n\r\t",token)) len++; else if (token<32) len+=5;ptr++;}
+	
+	out=(char*)cJSON_malloc(len+3);
+	if (!out) return 0;
+
+	ptr2=out;ptr=str;
+	*ptr2++='\"';
+	while (*ptr)
+	{
+		if ((unsigned char)*ptr>31 && *ptr!='\"' && *ptr!='\\') *ptr2++=*ptr++;
+		else
+		{
+			*ptr2++='\\';
+			switch (token=*ptr++)
+			{
+				case '\\':	*ptr2++='\\';	break;
+				case '\"':	*ptr2++='\"';	break;
+				case '\b':	*ptr2++='b';	break;
+				case '\f':	*ptr2++='f';	break;
+				case '\n':	*ptr2++='n';	break;
+				case '\r':	*ptr2++='r';	break;
+				case '\t':	*ptr2++='t';	break;
+				default: sprintf(ptr2,"u%04x",token);ptr2+=5;	break;	/* escape and print */
+			}
+		}
+	}
+	*ptr2++='\"';*ptr2++=0;
+	return out;
+}
+/* Invote print_string_ptr (which is useful) on an item. */
+static char *print_string(cJSON *item)	{return print_string_ptr(item->valuestring);}
+
+/* Predeclare these prototypes. */
+static const char *parse_value(cJSON *item,const char *value);
+static char *print_value(cJSON *item,int depth,int fmt);
+static const char *parse_array(cJSON *item,const char *value);
+static char *print_array(cJSON *item,int depth,int fmt);
+static const char *parse_object(cJSON *item,const char *value);
+static char *print_object(cJSON *item,int depth,int fmt);
+
+/* Utility to jump whitespace and cr/lf */
+static const char *skip(const char *in) {while (in && *in && (unsigned char)*in<=32) in++; return in;}
+
+/* Parse an object - create a new root, and populate. */
+cJSON *cJSON_ParseWithOpts(const char *value,const char **return_parse_end,int require_null_terminated)
+{
+	const char *end=0;
+	cJSON *c=cJSON_New_Item();
+	ep=0;
+	if (!c) return 0;       /* memory fail */
+
+	end=parse_value(c,skip(value));
+	if (!end)	{cJSON_Delete(c);return 0;}	/* parse failure. ep is set. */
+
+	/* if we require null-terminated JSON without appended garbage, skip and then check for a null terminator */
+	if (require_null_terminated) {end=skip(end);if (*end) {cJSON_Delete(c);ep=end;return 0;}}
+	if (return_parse_end) *return_parse_end=end;
+	return c;
+}
+/* Default options for cJSON_Parse */
+cJSON *cJSON_Parse(const char *value) {return cJSON_ParseWithOpts(value,0,0);}
+
+/* Render a cJSON item/entity/structure to text. */
+char *cJSON_Print(cJSON *item)				{return print_value(item,0,1);}
+char *cJSON_PrintUnformatted(cJSON *item)	{return print_value(item,0,0);}
+
+/* Parser core - when encountering text, process appropriately. */
+static const char *parse_value(cJSON *item,const char *value)
+{
+	if (!value)						return 0;	/* Fail on null. */
+	if (!strncmp(value,"null",4))	{ item->type=cJSON_NULL;  return value+4; }
+	if (!strncmp(value,"false",5))	{ item->type=cJSON_False; return value+5; }
+	if (!strncmp(value,"true",4))	{ item->type=cJSON_True; item->valueint=1;	return value+4; }
+	if (*value=='\"')				{ return parse_string(item,value); }
+	if (*value=='-' || (*value>='0' && *value<='9'))	{ return parse_number(item,value); }
+	if (*value=='[')				{ return parse_array(item,value); }
+	if (*value=='{')				{ return parse_object(item,value); }
+
+	ep=value;return 0;	/* failure. */
+}
+
+/* Render a value to text. */
+static char *print_value(cJSON *item,int depth,int fmt)
+{
+	char *out=0;
+	if (!item) return 0;
+	switch ((item->type)&255)
+	{
+		case cJSON_NULL:	out=cJSON_strdup("null");	break;
+		case cJSON_False:	out=cJSON_strdup("false");break;
+		case cJSON_True:	out=cJSON_strdup("true"); break;
+		case cJSON_Number:	out=print_number(item);break;
+		case cJSON_String:	out=print_string(item);break;
+		case cJSON_Array:	out=print_array(item,depth,fmt);break;
+		case cJSON_Object:	out=print_object(item,depth,fmt);break;
+	}
+	return out;
+}
+
+/* Build an array from input text. */
+static const char *parse_array(cJSON *item,const char *value)
+{
+	cJSON *child;
+	if (*value!='[')	{ep=value;return 0;}	/* not an array! */
+
+	item->type=cJSON_Array;
+	value=skip(value+1);
+	if (*value==']') return value+1;	/* empty array. */
+
+	item->child=child=cJSON_New_Item();
+	if (!item->child) return 0;		 /* memory fail */
+	value=skip(parse_value(child,skip(value)));	/* skip any spacing, get the value. */
+	if (!value) return 0;
+
+	while (*value==',')
+	{
+		cJSON *new_item;
+		if (!(new_item=cJSON_New_Item())) return 0; 	/* memory fail */
+		child->next=new_item;new_item->prev=child;child=new_item;
+		value=skip(parse_value(child,skip(value+1)));
+		if (!value) return 0;	/* memory fail */
+	}
+
+	if (*value==']') return value+1;	/* end of array */
+	ep=value;return 0;	/* malformed. */
+}
+
+/* Render an array to text */
+static char *print_array(cJSON *item,int depth,int fmt)
+{
+	char **entries;
+	char *out=0,*ptr,*ret;int len=5;
+	cJSON *child=item->child;
+	int numentries=0,i=0,fail=0;
+	
+	/* How many entries in the array? */
+	while (child) numentries++,child=child->next;
+	/* Explicitly handle numentries==0 */
+	if (!numentries)
+	{
+		out=(char*)cJSON_malloc(3);
+		if (out) strcpy(out,"[]");
+		return out;
+	}
+	/* Allocate an array to hold the values for each */
+	entries=(char**)cJSON_malloc(numentries*sizeof(char*));
+	if (!entries) return 0;
+	memset(entries,0,numentries*sizeof(char*));
+	/* Retrieve all the results: */
+	child=item->child;
+	while (child && !fail)
+	{
+		ret=print_value(child,depth+1,fmt);
+		entries[i++]=ret;
+		if (ret) len+=strlen(ret)+2+(fmt?1:0); else fail=1;
+		child=child->next;
+	}
+	
+	/* If we didn't fail, try to malloc the output string */
+	if (!fail) out=(char*)cJSON_malloc(len);
+	/* If that fails, we fail. */
+	if (!out) fail=1;
+
+	/* Handle failure. */
+	if (fail)
+	{
+		for (i=0;i<numentries;i++) if (entries[i]) cJSON_free(entries[i]);
+		cJSON_free(entries);
+		return 0;
+	}
+	
+	/* Compose the output array. */
+	*out='[';
+	ptr=out+1;*ptr=0;
+	for (i=0;i<numentries;i++)
+	{
+		strcpy(ptr,entries[i]);ptr+=strlen(entries[i]);
+		if (i!=numentries-1) {*ptr++=',';if(fmt)*ptr++=' ';*ptr=0;}
+		cJSON_free(entries[i]);
+	}
+	cJSON_free(entries);
+	*ptr++=']';*ptr++=0;
+	return out;	
+}
+
+/* Build an object from the text. */
+static const char *parse_object(cJSON *item,const char *value)
+{
+	cJSON *child;
+	if (*value!='{')	{ep=value;return 0;}	/* not an object! */
+	
+	item->type=cJSON_Object;
+	value=skip(value+1);
+	if (*value=='}') return value+1;	/* empty array. */
+	
+	item->child=child=cJSON_New_Item();
+	if (!item->child) return 0;
+	value=skip(parse_string(child,skip(value)));
+	if (!value) return 0;
+	child->string=child->valuestring;child->valuestring=0;
+	if (*value!=':') {ep=value;return 0;}	/* fail! */
+	value=skip(parse_value(child,skip(value+1)));	/* skip any spacing, get the value. */
+	if (!value) return 0;
+	
+	while (*value==',')
+	{
+		cJSON *new_item;
+		if (!(new_item=cJSON_New_Item()))	return 0; /* memory fail */
+		child->next=new_item;new_item->prev=child;child=new_item;
+		value=skip(parse_string(child,skip(value+1)));
+		if (!value) return 0;
+		child->string=child->valuestring;child->valuestring=0;
+		if (*value!=':') {ep=value;return 0;}	/* fail! */
+		value=skip(parse_value(child,skip(value+1)));	/* skip any spacing, get the value. */
+		if (!value) return 0;
+	}
+	
+	if (*value=='}') return value+1;	/* end of array */
+	ep=value;return 0;	/* malformed. */
+}
+
+/* Render an object to text. */
+static char *print_object(cJSON *item,int depth,int fmt)
+{
+	char **entries=0,**names=0;
+	char *out=0,*ptr,*ret,*str;int len=7,i=0,j;
+	cJSON *child=item->child;
+	int numentries=0,fail=0;
+	/* Count the number of entries. */
+	while (child) numentries++,child=child->next;
+	/* Explicitly handle empty object case */
+	if (!numentries)
+	{
+		out=(char*)cJSON_malloc(fmt?depth+4:3);
+		if (!out)	return 0;
+		ptr=out;*ptr++='{';
+		if (fmt) {*ptr++='\n';for (i=0;i<depth-1;i++) *ptr++='\t';}
+		*ptr++='}';*ptr++=0;
+		return out;
+	}
+	/* Allocate space for the names and the objects */
+	entries=(char**)cJSON_malloc(numentries*sizeof(char*));
+	if (!entries) return 0;
+	names=(char**)cJSON_malloc(numentries*sizeof(char*));
+	if (!names) {cJSON_free(entries);return 0;}
+	memset(entries,0,sizeof(char*)*numentries);
+	memset(names,0,sizeof(char*)*numentries);
+
+	/* Collect all the results into our arrays: */
+	child=item->child;depth++;if (fmt) len+=depth;
+	while (child)
+	{
+		names[i]=str=print_string_ptr(child->string);
+		entries[i++]=ret=print_value(child,depth,fmt);
+		if (str && ret) len+=strlen(ret)+strlen(str)+2+(fmt?2+depth:0); else fail=1;
+		child=child->next;
+	}
+	
+	/* Try to allocate the output string */
+	if (!fail) out=(char*)cJSON_malloc(len);
+	if (!out) fail=1;
+
+	/* Handle failure */
+	if (fail)
+	{
+		for (i=0;i<numentries;i++) {if (names[i]) cJSON_free(names[i]);if (entries[i]) cJSON_free(entries[i]);}
+		cJSON_free(names);cJSON_free(entries);
+		return 0;
+	}
+	
+	/* Compose the output: */
+	*out='{';ptr=out+1;if (fmt)*ptr++='\n';*ptr=0;
+	for (i=0;i<numentries;i++)
+	{
+		if (fmt) for (j=0;j<depth;j++) *ptr++='\t';
+		strcpy(ptr,names[i]);ptr+=strlen(names[i]);
+		*ptr++=':';if (fmt) *ptr++='\t';
+		strcpy(ptr,entries[i]);ptr+=strlen(entries[i]);
+		if (i!=numentries-1) *ptr++=',';
+		if (fmt) *ptr++='\n';*ptr=0;
+		cJSON_free(names[i]);cJSON_free(entries[i]);
+	}
+	
+	cJSON_free(names);cJSON_free(entries);
+	if (fmt) for (i=0;i<depth-1;i++) *ptr++='\t';
+	*ptr++='}';*ptr++=0;
+	return out;	
+}
+
+/* Get Array size/item / object item. */
+int    cJSON_GetArraySize(cJSON *array)							{cJSON *c=array->child;int i=0;while(c)i++,c=c->next;return i;}
+cJSON *cJSON_GetArrayItem(cJSON *array,int item)				{cJSON *c=array->child;  while (c && item>0) item--,c=c->next; return c;}
+cJSON *cJSON_GetObjectItem(cJSON *object,const char *string)	{cJSON *c=object->child; while (c && cJSON_strcasecmp(c->string,string)) c=c->next; return c;}
+
+/* Utility for array list handling. */
+static void suffix_object(cJSON *prev,cJSON *item) {prev->next=item;item->prev=prev;}
+/* Utility for handling references. */
+static cJSON *create_reference(cJSON *item) {cJSON *ref=cJSON_New_Item();if (!ref) return 0;memcpy(ref,item,sizeof(cJSON));ref->string=0;ref->type|=cJSON_IsReference;ref->next=ref->prev=0;return ref;}
+
+/* Add item to array/object. */
+void   cJSON_AddItemToArray(cJSON *array, cJSON *item)						{cJSON *c=array->child;if (!item) return; if (!c) {array->child=item;} else {while (c && c->next) c=c->next; suffix_object(c,item);}}
+void   cJSON_AddItemToObject(cJSON *object,const char *string,cJSON *item)	{if (!item) return; if (item->string) cJSON_free(item->string);item->string=cJSON_strdup(string);cJSON_AddItemToArray(object,item);}
+void	cJSON_AddItemReferenceToArray(cJSON *array, cJSON *item)						{cJSON_AddItemToArray(array,create_reference(item));}
+void	cJSON_AddItemReferenceToObject(cJSON *object,const char *string,cJSON *item)	{cJSON_AddItemToObject(object,string,create_reference(item));}
+
+cJSON *cJSON_DetachItemFromArray(cJSON *array,int which)			{cJSON *c=array->child;while (c && which>0) c=c->next,which--;if (!c) return 0;
+	if (c->prev) c->prev->next=c->next;if (c->next) c->next->prev=c->prev;if (c==array->child) array->child=c->next;c->prev=c->next=0;return c;}
+void   cJSON_DeleteItemFromArray(cJSON *array,int which)			{cJSON_Delete(cJSON_DetachItemFromArray(array,which));}
+cJSON *cJSON_DetachItemFromObject(cJSON *object,const char *string) {int i=0;cJSON *c=object->child;while (c && cJSON_strcasecmp(c->string,string)) i++,c=c->next;if (c) return cJSON_DetachItemFromArray(object,i);return 0;}
+void   cJSON_DeleteItemFromObject(cJSON *object,const char *string) {cJSON_Delete(cJSON_DetachItemFromObject(object,string));}
+
+/* Replace array/object items with new ones. */
+void   cJSON_ReplaceItemInArray(cJSON *array,int which,cJSON *newitem)		{cJSON *c=array->child;while (c && which>0) c=c->next,which--;if (!c) return;
+	newitem->next=c->next;newitem->prev=c->prev;if (newitem->next) newitem->next->prev=newitem;
+	if (c==array->child) array->child=newitem; else newitem->prev->next=newitem;c->next=c->prev=0;cJSON_Delete(c);}
+void   cJSON_ReplaceItemInObject(cJSON *object,const char *string,cJSON *newitem){int i=0;cJSON *c=object->child;while(c && cJSON_strcasecmp(c->string,string))i++,c=c->next;if(c){newitem->string=cJSON_strdup(string);cJSON_ReplaceItemInArray(object,i,newitem);}}
+
+/* Create basic types: */
+cJSON *cJSON_CreateNull(void)					{cJSON *item=cJSON_New_Item();if(item)item->type=cJSON_NULL;return item;}
+cJSON *cJSON_CreateTrue(void)					{cJSON *item=cJSON_New_Item();if(item)item->type=cJSON_True;return item;}
+cJSON *cJSON_CreateFalse(void)					{cJSON *item=cJSON_New_Item();if(item)item->type=cJSON_False;return item;}
+cJSON *cJSON_CreateBool(int b)					{cJSON *item=cJSON_New_Item();if(item)item->type=b?cJSON_True:cJSON_False;return item;}
+cJSON *cJSON_CreateNumber(double num)			{cJSON *item=cJSON_New_Item();if(item){item->type=cJSON_Number;item->valuedouble=num;item->valueint=(int)num;}return item;}
+cJSON *cJSON_CreateString(const char *string)	{cJSON *item=cJSON_New_Item();if(item){item->type=cJSON_String;item->valuestring=cJSON_strdup(string);}return item;}
+cJSON *cJSON_CreateArray(void)					{cJSON *item=cJSON_New_Item();if(item)item->type=cJSON_Array;return item;}
+cJSON *cJSON_CreateObject(void)					{cJSON *item=cJSON_New_Item();if(item)item->type=cJSON_Object;return item;}
+
+/* Create Arrays: */
+cJSON *cJSON_CreateIntArray(const int *numbers,int count)		{int i;cJSON *n=0,*p=0,*a=cJSON_CreateArray();for(i=0;a && i<count;i++){n=cJSON_CreateNumber(numbers[i]);if(!i)a->child=n;else suffix_object(p,n);p=n;}return a;}
+cJSON *cJSON_CreateFloatArray(const float *numbers,int count)	{int i;cJSON *n=0,*p=0,*a=cJSON_CreateArray();for(i=0;a && i<count;i++){n=cJSON_CreateNumber(numbers[i]);if(!i)a->child=n;else suffix_object(p,n);p=n;}return a;}
+cJSON *cJSON_CreateDoubleArray(const double *numbers,int count)	{int i;cJSON *n=0,*p=0,*a=cJSON_CreateArray();for(i=0;a && i<count;i++){n=cJSON_CreateNumber(numbers[i]);if(!i)a->child=n;else suffix_object(p,n);p=n;}return a;}
+cJSON *cJSON_CreateStringArray(const char **strings,int count)	{int i;cJSON *n=0,*p=0,*a=cJSON_CreateArray();for(i=0;a && i<count;i++){n=cJSON_CreateString(strings[i]);if(!i)a->child=n;else suffix_object(p,n);p=n;}return a;}
+
+/* Duplication */
+cJSON *cJSON_Duplicate(cJSON *item,int recurse)
+{
+	cJSON *newitem,*cptr,*nptr=0,*newchild;
+	/* Bail on bad ptr */
+	if (!item) return 0;
+	/* Create new item */
+	newitem=cJSON_New_Item();
+	if (!newitem) return 0;
+	/* Copy over all vars */
+	newitem->type=item->type&(~cJSON_IsReference),newitem->valueint=item->valueint,newitem->valuedouble=item->valuedouble;
+	if (item->valuestring)	{newitem->valuestring=cJSON_strdup(item->valuestring);	if (!newitem->valuestring)	{cJSON_Delete(newitem);return 0;}}
+	if (item->string)		{newitem->string=cJSON_strdup(item->string);			if (!newitem->string)		{cJSON_Delete(newitem);return 0;}}
+	/* If non-recursive, then we're done! */
+	if (!recurse) return newitem;
+	/* Walk the ->next chain for the child. */
+	cptr=item->child;
+	while (cptr)
+	{
+		newchild=cJSON_Duplicate(cptr,1);		/* Duplicate (with recurse) each item in the ->next chain */
+		if (!newchild) {cJSON_Delete(newitem);return 0;}
+		if (nptr)	{nptr->next=newchild,newchild->prev=nptr;nptr=newchild;}	/* If newitem->child already set, then crosswire ->prev and ->next and move on */
+		else		{newitem->child=newchild;nptr=newchild;}					/* Set newitem->child and move to it */
+		cptr=cptr->next;
+	}
+	return newitem;
+}
+
+void cJSON_Minify(char *json)
+{
+	char *into=json;
+	while (*json)
+	{
+		if (*json==' ') json++;
+		else if (*json=='\t') json++;	// Whitespace characters.
+		else if (*json=='\r') json++;
+		else if (*json=='\n') json++;
+		else if (*json=='/' && json[1]=='/')  while (*json && *json!='\n') json++;	// double-slash comments, to end of line.
+		else if (*json=='/' && json[1]=='*') {while (*json && !(*json=='*' && json[1]=='/')) json++;json+=2;}	// multiline comments.
+		else if (*json=='\"'){*into++=*json++;while (*json && *json!='\"'){if (*json=='\\') *into++=*json++;*into++=*json++;}*into++=*json++;} // string literals, which are \" sensitive.
+		else *into++=*json++;			// All other characters.
+	}
+	*into=0;	// and null-terminate.
+}
diff --git a/vendors/cjson/cJSON.h b/vendors/cjson/cJSON.h
new file mode 100644
index 0000000..1ce513d
--- /dev/null
+++ b/vendors/cjson/cJSON.h
@@ -0,0 +1,146 @@
+/*
+  Copyright (c) 2009 Dave Gamble
+ 
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+ 
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+ 
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+  THE SOFTWARE.
+*/
+
+#ifndef cJSON__h
+#define cJSON__h
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* cJSON Types: */
+#define cJSON_False 0
+#define cJSON_True 1
+#define cJSON_NULL 2
+#define cJSON_Number 3
+#define cJSON_String 4
+#define cJSON_Array 5
+#define cJSON_Object 6
+	
+#define cJSON_IsReference 256
+
+#define cJSON_ArrayForEach(pos, head) \
+                for(pos = (head)->child; pos != NULL; pos = pos->next)
+
+/* The cJSON structure: */
+typedef struct cJSON {
+	struct cJSON *next,*prev;	/* next/prev allow you to walk array/object chains. Alternatively, use GetArraySize/GetArrayItem/GetObjectItem */
+	struct cJSON *child;		/* An array or object item will have a child pointer pointing to a chain of the items in the array/object. */
+
+	int type;					/* The type of the item, as above. */
+
+	char *valuestring;			/* The item's string, if type==cJSON_String */
+	int valueint;				/* The item's number, if type==cJSON_Number */
+	double valuedouble;			/* The item's number, if type==cJSON_Number */
+
+	char *string;				/* The item's name string, if this item is the child of, or is in the list of subitems of an object. */
+} cJSON;
+
+typedef struct cJSON_Hooks {
+      void *(*malloc_fn)(size_t sz);
+      void (*free_fn)(void *ptr);
+} cJSON_Hooks;
+
+/* Supply malloc, realloc and free functions to cJSON */
+extern void cJSON_InitHooks(cJSON_Hooks* hooks);
+
+
+/* Supply a block of JSON, and this returns a cJSON object you can interrogate. Call cJSON_Delete when finished. */
+extern cJSON *cJSON_Parse(const char *value);
+/* Render a cJSON entity to text for transfer/storage. Free the char* when finished. */
+extern char  *cJSON_Print(cJSON *item);
+/* Render a cJSON entity to text for transfer/storage without any formatting. Free the char* when finished. */
+extern char  *cJSON_PrintUnformatted(cJSON *item);
+/* Delete a cJSON entity and all subentities. */
+extern void   cJSON_Delete(cJSON *c);
+
+/* Returns the number of items in an array (or object). */
+extern int	  cJSON_GetArraySize(cJSON *array);
+/* Retrieve item number "item" from array "array". Returns NULL if unsuccessful. */
+extern cJSON *cJSON_GetArrayItem(cJSON *array,int item);
+/* Get item "string" from object. Case insensitive. */
+extern cJSON *cJSON_GetObjectItem(cJSON *object,const char *string);
+
+/* For analysing failed parses. This returns a pointer to the parse error. You'll probably need to look a few chars back to make sense of it. Defined when cJSON_Parse() returns 0. 0 when cJSON_Parse() succeeds. */
+extern const char *cJSON_GetErrorPtr(void);
+	
+/* These calls create a cJSON item of the appropriate type. */
+extern cJSON *cJSON_CreateNull(void);
+extern cJSON *cJSON_CreateTrue(void);
+extern cJSON *cJSON_CreateFalse(void);
+extern cJSON *cJSON_CreateBool(int b);
+extern cJSON *cJSON_CreateNumber(double num);
+extern cJSON *cJSON_CreateString(const char *string);
+extern cJSON *cJSON_CreateArray(void);
+extern cJSON *cJSON_CreateObject(void);
+
+/* These utilities create an Array of count items. */
+extern cJSON *cJSON_CreateIntArray(const int *numbers,int count);
+extern cJSON *cJSON_CreateFloatArray(const float *numbers,int count);
+extern cJSON *cJSON_CreateDoubleArray(const double *numbers,int count);
+extern cJSON *cJSON_CreateStringArray(const char **strings,int count);
+
+/* Append item to the specified array/object. */
+extern void cJSON_AddItemToArray(cJSON *array, cJSON *item);
+extern void	cJSON_AddItemToObject(cJSON *object,const char *string,cJSON *item);
+/* Append reference to item to the specified array/object. Use this when you want to add an existing cJSON to a new cJSON, but don't want to corrupt your existing cJSON. */
+extern void cJSON_AddItemReferenceToArray(cJSON *array, cJSON *item);
+extern void	cJSON_AddItemReferenceToObject(cJSON *object,const char *string,cJSON *item);
+
+/* Remove/Detatch items from Arrays/Objects. */
+extern cJSON *cJSON_DetachItemFromArray(cJSON *array,int which);
+extern void   cJSON_DeleteItemFromArray(cJSON *array,int which);
+extern cJSON *cJSON_DetachItemFromObject(cJSON *object,const char *string);
+extern void   cJSON_DeleteItemFromObject(cJSON *object,const char *string);
+	
+/* Update array items. */
+extern void cJSON_ReplaceItemInArray(cJSON *array,int which,cJSON *newitem);
+extern void cJSON_ReplaceItemInObject(cJSON *object,const char *string,cJSON *newitem);
+
+/* Duplicate a cJSON item */
+extern cJSON *cJSON_Duplicate(cJSON *item,int recurse);
+/* Duplicate will create a new, identical cJSON item to the one you pass, in new memory that will
+need to be released. With recurse!=0, it will duplicate any children connected to the item.
+The item->next and ->prev pointers are always zero on return from Duplicate. */
+
+/* ParseWithOpts allows you to require (and check) that the JSON is null terminated, and to retrieve the pointer to the final byte parsed. */
+extern cJSON *cJSON_ParseWithOpts(const char *value,const char **return_parse_end,int require_null_terminated);
+
+extern void cJSON_Minify(char *json);
+
+/* Macros for creating things quickly. */
+#define cJSON_AddNullToObject(object,name)		cJSON_AddItemToObject(object, name, cJSON_CreateNull())
+#define cJSON_AddTrueToObject(object,name)		cJSON_AddItemToObject(object, name, cJSON_CreateTrue())
+#define cJSON_AddFalseToObject(object,name)		cJSON_AddItemToObject(object, name, cJSON_CreateFalse())
+#define cJSON_AddBoolToObject(object,name,b)	cJSON_AddItemToObject(object, name, cJSON_CreateBool(b))
+#define cJSON_AddNumberToObject(object,name,n)	cJSON_AddItemToObject(object, name, cJSON_CreateNumber(n))
+#define cJSON_AddStringToObject(object,name,s)	cJSON_AddItemToObject(object, name, cJSON_CreateString(s))
+
+/* When assigning an integer value, it needs to be propagated to valuedouble too. */
+#define cJSON_SetIntValue(object,val)			((object)?(object)->valueint=(object)->valuedouble=(val):(val))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/vendors/hdr_histogram/hdr_histogram.c b/vendors/hdr_histogram/hdr_histogram.c
new file mode 100644
index 0000000..e1caaf2
--- /dev/null
+++ b/vendors/hdr_histogram/hdr_histogram.c
@@ -0,0 +1,1016 @@
+/**
+ * hdr_histogram.c
+ * Written by Michael Barker and released to the public domain,
+ * as explained at http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <math.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+#include <inttypes.h>
+
+#include "hdr_histogram.h"
+
+//  ######   #######  ##     ## ##    ## ########  ######
+// ##    ## ##     ## ##     ## ###   ##    ##    ##    ##
+// ##       ##     ## ##     ## ####  ##    ##    ##
+// ##       ##     ## ##     ## ## ## ##    ##     ######
+// ##       ##     ## ##     ## ##  ####    ##          ##
+// ##    ## ##     ## ##     ## ##   ###    ##    ##    ##
+//  ######   #######   #######  ##    ##    ##     ######
+
+static int32_t normalize_index(const struct hdr_histogram* h, int32_t index)
+{
+    if (h->normalizing_index_offset == 0)
+    {
+        return index;
+    }
+
+    int32_t normalized_index = index - h->normalizing_index_offset;
+    int32_t adjustment = 0;
+
+    if (normalized_index < 0)
+    {
+        adjustment = h->counts_len;
+    }
+    else if (normalized_index >= h->counts_len)
+    {
+        adjustment = -h->counts_len;
+    }
+
+    return normalized_index + adjustment;
+}
+
+static int64_t counts_get_direct(const struct hdr_histogram* h, int32_t index)
+{
+    return h->counts[index];
+}
+
+static int64_t counts_get_normalised(const struct hdr_histogram* h, int32_t index)
+{
+    return counts_get_direct(h, normalize_index(h, index));
+}
+
+static void counts_inc_normalised(
+    struct hdr_histogram* h, int32_t index, int64_t value)
+{
+    int32_t normalised_index = normalize_index(h, index);
+    h->counts[normalised_index] += value;
+    h->total_count += value;
+}
+
+static void update_min_max(struct hdr_histogram* h, int64_t value)
+{
+    h->min_value = (value < h->min_value && value != 0) ? value : h->min_value;
+    h->max_value = (value > h->max_value) ? value : h->max_value;
+}
+
+// ##     ## ######## #### ##       #### ######## ##    ##
+// ##     ##    ##     ##  ##        ##     ##     ##  ##
+// ##     ##    ##     ##  ##        ##     ##      ####
+// ##     ##    ##     ##  ##        ##     ##       ##
+// ##     ##    ##     ##  ##        ##     ##       ##
+// ##     ##    ##     ##  ##        ##     ##       ##
+//  #######     ##    #### ######## ####    ##       ##
+
+static int64_t power(int64_t base, int64_t exp)
+{
+    int64_t result = 1;
+    while(exp)
+    {
+        result *= base; exp--;
+    }
+    return result;
+}
+
+#if defined(_MSC_VER)
+#pragma intrinsic(_BitScanReverse64)
+#endif
+
+static int32_t get_bucket_index(const struct hdr_histogram* h, int64_t value)
+{
+#if defined(_MSC_VER)
+    uint32_t leading_zero = 0;
+    _BitScanReverse64(&leading_zero, value | h->sub_bucket_mask);
+    int32_t pow2ceiling = 64 - (63 - leading_zero); // smallest power of 2 containing value
+#else
+    int32_t pow2ceiling = 64 - __builtin_clzll(value | h->sub_bucket_mask); // smallest power of 2 containing value
+#endif
+    return pow2ceiling - h->unit_magnitude - (h->sub_bucket_half_count_magnitude + 1);
+}
+
+static int32_t get_sub_bucket_index(int64_t value, int32_t bucket_index, int32_t unit_magnitude)
+{
+    return (int32_t)(value >> (bucket_index + unit_magnitude));
+}
+
+static int32_t counts_index(const struct hdr_histogram* h, int32_t bucket_index, int32_t sub_bucket_index)
+{
+    // Calculate the index for the first entry in the bucket:
+    // (The following is the equivalent of ((bucket_index + 1) * subBucketHalfCount) ):
+    int32_t bucket_base_index = (bucket_index + 1) << h->sub_bucket_half_count_magnitude;
+    // Calculate the offset in the bucket:
+    int32_t offset_in_bucket = sub_bucket_index - h->sub_bucket_half_count;
+    // The following is the equivalent of ((sub_bucket_index  - subBucketHalfCount) + bucketBaseIndex;
+    return bucket_base_index + offset_in_bucket;
+}
+
+static int64_t value_from_index(int32_t bucket_index, int32_t sub_bucket_index, int32_t unit_magnitude)
+{
+    return ((int64_t) sub_bucket_index) << (bucket_index + unit_magnitude);
+}
+
+int32_t counts_index_for(const struct hdr_histogram* h, int64_t value)
+{
+    int32_t bucket_index     = get_bucket_index(h, value);
+    int32_t sub_bucket_index = get_sub_bucket_index(value, bucket_index, h->unit_magnitude);
+
+    return counts_index(h, bucket_index, sub_bucket_index);
+}
+
+int64_t hdr_value_at_index(const struct hdr_histogram *h, int32_t index)
+{
+    int32_t bucket_index = (index >> h->sub_bucket_half_count_magnitude) - 1;
+    int32_t sub_bucket_index = (index & (h->sub_bucket_half_count - 1)) + h->sub_bucket_half_count;
+
+    if (bucket_index < 0)
+    {
+        sub_bucket_index -= h->sub_bucket_half_count;
+        bucket_index = 0;
+    }
+
+    return value_from_index(bucket_index, sub_bucket_index, h->unit_magnitude);
+}
+
+int64_t hdr_size_of_equivalent_value_range(const struct hdr_histogram* h, int64_t value)
+{
+    int32_t bucket_index     = get_bucket_index(h, value);
+    int32_t sub_bucket_index = get_sub_bucket_index(value, bucket_index, h->unit_magnitude);
+    int32_t adjusted_bucket  = (sub_bucket_index >= h->sub_bucket_count) ? (bucket_index + 1) : bucket_index;
+    return INT64_C(1) << (h->unit_magnitude + adjusted_bucket);
+}
+
+static int64_t lowest_equivalent_value(const struct hdr_histogram* h, int64_t value)
+{
+    int32_t bucket_index     = get_bucket_index(h, value);
+    int32_t sub_bucket_index = get_sub_bucket_index(value, bucket_index, h->unit_magnitude);
+    return value_from_index(bucket_index, sub_bucket_index, h->unit_magnitude);
+}
+
+int64_t hdr_next_non_equivalent_value(const struct hdr_histogram *h, int64_t value)
+{
+    return lowest_equivalent_value(h, value) + hdr_size_of_equivalent_value_range(h, value);
+}
+
+static int64_t highest_equivalent_value(const struct hdr_histogram* h, int64_t value)
+{
+    return hdr_next_non_equivalent_value(h, value) - 1;
+}
+
+int64_t hdr_median_equivalent_value(const struct hdr_histogram *h, int64_t value)
+{
+    return lowest_equivalent_value(h, value) + (hdr_size_of_equivalent_value_range(h, value) >> 1);
+}
+
+static int64_t non_zero_min(const struct hdr_histogram* h)
+{
+    if (INT64_MAX == h->min_value)
+    {
+        return INT64_MAX;
+    }
+
+    return lowest_equivalent_value(h, h->min_value);
+}
+
+void hdr_reset_internal_counters(struct hdr_histogram* h)
+{
+    int min_non_zero_index = -1;
+    int max_index = -1;
+    int64_t observed_total_count = 0;
+    int i;
+
+    for (i = 0; i < h->counts_len; i++)
+    {
+        int64_t count_at_index;
+
+        if ((count_at_index = counts_get_direct(h, i)) > 0)
+        {
+            observed_total_count += count_at_index;
+            max_index = i;
+            if (min_non_zero_index == -1 && i != 0)
+            {
+                min_non_zero_index = i;
+            }
+        }
+    }
+
+    if (max_index == -1)
+    {
+        h->max_value = 0;
+    }
+    else
+    {
+        int64_t max_value = hdr_value_at_index(h, max_index);
+        h->max_value = highest_equivalent_value(h, max_value);
+    }
+
+    if (min_non_zero_index == -1)
+    {
+        h->min_value = INT64_MAX;
+    }
+    else
+    {
+        h->min_value = hdr_value_at_index(h, min_non_zero_index);
+    }
+
+    h->total_count = observed_total_count;
+}
+
+static int32_t buckets_needed_to_cover_value(int64_t value, int32_t sub_bucket_count, int32_t unit_magnitude)
+{
+    int64_t smallest_untrackable_value = ((int64_t) sub_bucket_count) << unit_magnitude;
+    int32_t buckets_needed = 1;
+    while (smallest_untrackable_value <= value)
+    {
+        if (smallest_untrackable_value > INT64_MAX / 2)
+        {
+            return buckets_needed + 1;
+        }
+        smallest_untrackable_value <<= 1;
+        buckets_needed++;
+    }
+
+    return buckets_needed;
+}
+
+// ##     ## ######## ##     ##  #######  ########  ##    ##
+// ###   ### ##       ###   ### ##     ## ##     ##  ##  ##
+// #### #### ##       #### #### ##     ## ##     ##   ####
+// ## ### ## ######   ## ### ## ##     ## ########     ##
+// ##     ## ##       ##     ## ##     ## ##   ##      ##
+// ##     ## ##       ##     ## ##     ## ##    ##     ##
+// ##     ## ######## ##     ##  #######  ##     ##    ##
+
+int hdr_calculate_bucket_config(
+        int64_t lowest_trackable_value,
+        int64_t highest_trackable_value,
+        int significant_figures,
+        struct hdr_histogram_bucket_config* cfg)
+{
+    if (lowest_trackable_value < 1 ||
+            significant_figures < 1 || 5 < significant_figures)
+    {
+        return EINVAL;
+    }
+    else if (lowest_trackable_value * 2 > highest_trackable_value)
+    {
+        return EINVAL;
+    }
+
+    cfg->lowest_trackable_value = lowest_trackable_value;
+    cfg->significant_figures = significant_figures;
+    cfg->highest_trackable_value = highest_trackable_value;
+
+    int64_t largest_value_with_single_unit_resolution = 2 * power(10, significant_figures);
+    int32_t sub_bucket_count_magnitude = (int32_t) ceil(log((double)largest_value_with_single_unit_resolution) / log(2));
+    cfg->sub_bucket_half_count_magnitude = ((sub_bucket_count_magnitude > 1) ? sub_bucket_count_magnitude : 1) - 1;
+
+    cfg->unit_magnitude = (int32_t) floor(log((double)lowest_trackable_value) / log(2));
+
+    cfg->sub_bucket_count      = (int32_t) pow(2, (cfg->sub_bucket_half_count_magnitude + 1));
+    cfg->sub_bucket_half_count = cfg->sub_bucket_count / 2;
+    cfg->sub_bucket_mask       = ((int64_t) cfg->sub_bucket_count - 1) << cfg->unit_magnitude;
+
+    if (cfg->unit_magnitude + cfg->sub_bucket_half_count_magnitude > 61)
+    {
+        return EINVAL;
+    }
+
+    cfg->bucket_count = buckets_needed_to_cover_value(highest_trackable_value, cfg->sub_bucket_count, (int32_t)cfg->unit_magnitude);
+    cfg->counts_len = (cfg->bucket_count + 1) * (cfg->sub_bucket_count / 2);
+
+    return 0;
+}
+
+void hdr_init_preallocated(struct hdr_histogram* h, struct hdr_histogram_bucket_config* cfg)
+{
+    h->lowest_trackable_value          = cfg->lowest_trackable_value;
+    h->highest_trackable_value         = cfg->highest_trackable_value;
+    h->unit_magnitude                  = (int32_t)cfg->unit_magnitude;
+    h->significant_figures             = (int32_t)cfg->significant_figures;
+    h->sub_bucket_half_count_magnitude = cfg->sub_bucket_half_count_magnitude;
+    h->sub_bucket_half_count           = cfg->sub_bucket_half_count;
+    h->sub_bucket_mask                 = cfg->sub_bucket_mask;
+    h->sub_bucket_count                = cfg->sub_bucket_count;
+    h->min_value                       = INT64_MAX;
+    h->max_value                       = 0;
+    h->normalizing_index_offset        = 0;
+    h->conversion_ratio                = 1.0;
+    h->bucket_count                    = cfg->bucket_count;
+    h->counts_len                      = cfg->counts_len;
+    h->total_count                     = 0;
+}
+
+int hdr_init(
+        int64_t lowest_trackable_value,
+        int64_t highest_trackable_value,
+        int significant_figures,
+        struct hdr_histogram** result)
+{
+    struct hdr_histogram_bucket_config cfg;
+
+    int r = hdr_calculate_bucket_config(lowest_trackable_value, highest_trackable_value, significant_figures, &cfg);
+    if (r)
+    {
+        return r;
+    }
+
+    int64_t* counts = calloc((size_t) cfg.counts_len, sizeof(int64_t));
+    struct hdr_histogram* histogram = calloc(1, sizeof(struct hdr_histogram));
+
+    if (!counts || !histogram)
+    {
+        return ENOMEM;
+    }
+
+    histogram->counts = counts;
+
+    hdr_init_preallocated(histogram, &cfg);
+    *result = histogram;
+
+    return 0;
+}
+
+void hdr_close(struct hdr_histogram* h)
+{
+    free(h->counts);
+    free(h);
+}
+
+int hdr_alloc(int64_t highest_trackable_value, int significant_figures, struct hdr_histogram** result)
+{
+    return hdr_init(1, highest_trackable_value, significant_figures, result);
+}
+
+// reset a histogram to zero.
+void hdr_reset(struct hdr_histogram *h)
+{
+     h->total_count=0;
+     h->min_value = INT64_MAX;
+     h->max_value = 0;
+     memset(h->counts, 0, (sizeof(int64_t) * h->counts_len));
+}
+
+size_t hdr_get_memory_size(struct hdr_histogram *h)
+{
+    return sizeof(struct hdr_histogram) + h->counts_len * sizeof(int64_t);
+}
+
+// ##     ## ########  ########     ###    ######## ########  ######
+// ##     ## ##     ## ##     ##   ## ##      ##    ##       ##    ##
+// ##     ## ##     ## ##     ##  ##   ##     ##    ##       ##
+// ##     ## ########  ##     ## ##     ##    ##    ######    ######
+// ##     ## ##        ##     ## #########    ##    ##             ##
+// ##     ## ##        ##     ## ##     ##    ##    ##       ##    ##
+//  #######  ##        ########  ##     ##    ##    ########  ######
+
+
+bool hdr_record_value(struct hdr_histogram* h, int64_t value)
+{
+    return hdr_record_values(h, value, 1);
+}
+
+bool hdr_record_values(struct hdr_histogram* h, int64_t value, int64_t count)
+{
+    if (value < 0)
+    {
+        return false;
+    }
+
+    int32_t counts_index = counts_index_for(h, value);
+
+    if (counts_index < 0 || h->counts_len <= counts_index)
+    {
+        return false;
+    }
+
+    counts_inc_normalised(h, counts_index, count);
+    update_min_max(h, value);
+
+    return true;
+}
+
+bool hdr_record_corrected_value(struct hdr_histogram* h, int64_t value, int64_t expected_interval)
+{
+    return hdr_record_corrected_values(h, value, 1, expected_interval);
+}
+
+
+bool hdr_record_corrected_values(struct hdr_histogram* h, int64_t value, int64_t count, int64_t expected_interval)
+{
+    if (!hdr_record_values(h, value, count))
+    {
+        return false;
+    }
+
+    if (expected_interval <= 0 || value <= expected_interval)
+    {
+        return true;
+    }
+
+    int64_t missing_value = value - expected_interval;
+    for (; missing_value >= expected_interval; missing_value -= expected_interval)
+    {
+        if (!hdr_record_values(h, missing_value, count))
+        {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+int64_t hdr_add(struct hdr_histogram* h, const struct hdr_histogram* from)
+{
+    struct hdr_iter iter;
+    hdr_iter_recorded_init(&iter, from);
+    int64_t dropped = 0;
+
+    while (hdr_iter_next(&iter))
+    {
+        int64_t value = iter.value;
+        int64_t count = iter.count;
+
+        if (!hdr_record_values(h, value, count))
+        {
+            dropped += count;
+        }
+    }
+
+    return dropped;
+}
+
+int64_t hdr_add_while_correcting_for_coordinated_omission(
+        struct hdr_histogram* h, struct hdr_histogram* from, int64_t expected_interval)
+{
+    struct hdr_iter iter;
+    hdr_iter_recorded_init(&iter, from);
+    int64_t dropped = 0;
+
+    while (hdr_iter_next(&iter))
+    {
+        int64_t value = iter.value;
+        int64_t count = iter.count;
+
+        if (!hdr_record_corrected_values(h, value, count, expected_interval))
+        {
+            dropped += count;
+        }
+    }
+
+    return dropped;
+}
+
+
+
+// ##     ##    ###    ##       ##     ## ########  ######
+// ##     ##   ## ##   ##       ##     ## ##       ##    ##
+// ##     ##  ##   ##  ##       ##     ## ##       ##
+// ##     ## ##     ## ##       ##     ## ######    ######
+//  ##   ##  ######### ##       ##     ## ##             ##
+//   ## ##   ##     ## ##       ##     ## ##       ##    ##
+//    ###    ##     ## ########  #######  ########  ######
+
+
+int64_t hdr_max(const struct hdr_histogram* h)
+{
+    if (0 == h->max_value)
+    {
+        return 0;
+    }
+
+    return highest_equivalent_value(h, h->max_value);
+}
+
+int64_t hdr_min(const struct hdr_histogram* h)
+{
+    if (0 < hdr_count_at_index(h, 0))
+    {
+        return 0;
+    }
+
+    return non_zero_min(h);
+}
+
+int64_t hdr_value_at_percentile(const struct hdr_histogram* h, double percentile)
+{
+    struct hdr_iter iter;
+    hdr_iter_init(&iter, h);
+
+    double requested_percentile = percentile < 100.0 ? percentile : 100.0;
+    int64_t count_at_percentile =
+        (int64_t) (((requested_percentile / 100) * h->total_count) + 0.5);
+    count_at_percentile = count_at_percentile > 1 ? count_at_percentile : 1;
+    int64_t total = 0;
+
+    while (hdr_iter_next(&iter))
+    {
+        total += iter.count;
+
+        if (total >= count_at_percentile)
+        {
+            int64_t value_from_index = iter.value;
+            return highest_equivalent_value(h, value_from_index);
+        }
+    }
+
+    return 0;
+}
+
+double hdr_mean(const struct hdr_histogram* h)
+{
+    struct hdr_iter iter;
+    int64_t total = 0;
+
+    hdr_iter_init(&iter, h);
+
+    while (hdr_iter_next(&iter))
+    {
+        if (0 != iter.count)
+        {
+            total += iter.count * hdr_median_equivalent_value(h, iter.value);
+        }
+    }
+
+    return (total * 1.0) / h->total_count;
+}
+
+double hdr_stddev(const struct hdr_histogram* h)
+{
+    double mean = hdr_mean(h);
+    double geometric_dev_total = 0.0;
+
+    struct hdr_iter iter;
+    hdr_iter_init(&iter, h);
+
+    while (hdr_iter_next(&iter))
+    {
+        if (0 != iter.count)
+        {
+            double dev = (hdr_median_equivalent_value(h, iter.value) * 1.0) - mean;
+            geometric_dev_total += (dev * dev) * iter.count;
+        }
+    }
+
+    return sqrt(geometric_dev_total / h->total_count);
+}
+
+bool hdr_values_are_equivalent(const struct hdr_histogram* h, int64_t a, int64_t b)
+{
+    return lowest_equivalent_value(h, a) == lowest_equivalent_value(h, b);
+}
+
+int64_t hdr_lowest_equivalent_value(const struct hdr_histogram* h, int64_t value)
+{
+    return lowest_equivalent_value(h, value);
+}
+
+int64_t hdr_count_at_value(const struct hdr_histogram* h, int64_t value)
+{
+    return counts_get_normalised(h, counts_index_for(h, value));
+}
+
+int64_t hdr_count_at_index(const struct hdr_histogram* h, int32_t index)
+{
+    return counts_get_normalised(h, index);
+}
+
+
+// #### ######## ######## ########     ###    ########  #######  ########   ######
+//  ##     ##    ##       ##     ##   ## ##      ##    ##     ## ##     ## ##    ##
+//  ##     ##    ##       ##     ##  ##   ##     ##    ##     ## ##     ## ##
+//  ##     ##    ######   ########  ##     ##    ##    ##     ## ########   ######
+//  ##     ##    ##       ##   ##   #########    ##    ##     ## ##   ##         ##
+//  ##     ##    ##       ##    ##  ##     ##    ##    ##     ## ##    ##  ##    ##
+// ####    ##    ######## ##     ## ##     ##    ##     #######  ##     ##  ######
+
+
+static bool has_buckets(struct hdr_iter* iter)
+{
+    return iter->counts_index < iter->h->counts_len;
+}
+
+static bool has_next(struct hdr_iter* iter)
+{
+    return iter->cumulative_count < iter->total_count;
+}
+
+static bool move_next(struct hdr_iter* iter)
+{
+    iter->counts_index++;
+
+    if (!has_buckets(iter))
+    {
+        return false;
+    }
+
+    iter->count = counts_get_normalised(iter->h, iter->counts_index);
+    iter->cumulative_count += iter->count;
+
+    iter->value = hdr_value_at_index(iter->h, iter->counts_index);
+    iter->highest_equivalent_value = highest_equivalent_value(iter->h, iter->value);
+    iter->lowest_equivalent_value = lowest_equivalent_value(iter->h, iter->value);
+    iter->median_equivalent_value = hdr_median_equivalent_value(iter->h, iter->value);
+
+    return true;
+}
+
+static int64_t peek_next_value_from_index(struct hdr_iter* iter)
+{
+    return hdr_value_at_index(iter->h, iter->counts_index + 1);
+}
+
+static bool next_value_greater_than_reporting_level_upper_bound(
+    struct hdr_iter *iter, int64_t reporting_level_upper_bound)
+{
+    if (iter->counts_index >= iter->h->counts_len)
+    {
+        return false;
+    }
+
+    return peek_next_value_from_index(iter) > reporting_level_upper_bound;
+}
+
+static bool _basic_iter_next(struct hdr_iter *iter)
+{
+    if (!has_next(iter) || iter->counts_index >= iter->h->counts_len)
+    {
+        return false;
+    }
+
+    move_next(iter);
+
+    return true;
+}
+
+static void _update_iterated_values(struct hdr_iter* iter, int64_t new_value_iterated_to)
+{
+    iter->value_iterated_from = iter->value_iterated_to;
+    iter->value_iterated_to = new_value_iterated_to;
+}
+
+static bool _all_values_iter_next(struct hdr_iter* iter)
+{
+    bool result = move_next(iter);
+
+    if (result)
+    {
+        _update_iterated_values(iter, iter->value);
+    }
+
+    return result;
+}
+
+void hdr_iter_init(struct hdr_iter* iter, const struct hdr_histogram* h)
+{
+    iter->h = h;
+
+    iter->counts_index = -1;
+    iter->total_count = h->total_count;
+    iter->count = 0;
+    iter->cumulative_count = 0;
+    iter->value = 0;
+    iter->highest_equivalent_value = 0;
+    iter->value_iterated_from = 0;
+    iter->value_iterated_to = 0;
+
+    iter->_next_fp = _all_values_iter_next;
+}
+
+bool hdr_iter_next(struct hdr_iter* iter)
+{
+    return iter->_next_fp(iter);
+}
+
+// ########  ######## ########   ######  ######## ##    ## ######## #### ##       ########  ######
+// ##     ## ##       ##     ## ##    ## ##       ###   ##    ##     ##  ##       ##       ##    ##
+// ##     ## ##       ##     ## ##       ##       ####  ##    ##     ##  ##       ##       ##
+// ########  ######   ########  ##       ######   ## ## ##    ##     ##  ##       ######    ######
+// ##        ##       ##   ##   ##       ##       ##  ####    ##     ##  ##       ##             ##
+// ##        ##       ##    ##  ##    ## ##       ##   ###    ##     ##  ##       ##       ##    ##
+// ##        ######## ##     ##  ######  ######## ##    ##    ##    #### ######## ########  ######
+
+static bool _percentile_iter_next(struct hdr_iter* iter)
+{
+    struct hdr_iter_percentiles* percentiles = &iter->specifics.percentiles;
+
+    if (!has_next(iter))
+    {
+        if (percentiles->seen_last_value)
+        {
+            return false;
+        }
+
+        percentiles->seen_last_value = true;
+        percentiles->percentile = 100.0;
+
+        return true;
+    }
+
+    if (iter->counts_index == -1 && !_basic_iter_next(iter))
+    {
+        return false;
+    }
+
+    do
+    {
+        double current_percentile = (100.0 * (double) iter->cumulative_count) / iter->h->total_count;
+        if (iter->count != 0 &&
+                percentiles->percentile_to_iterate_to <= current_percentile)
+        {
+            _update_iterated_values(iter, highest_equivalent_value(iter->h, iter->value));
+
+            percentiles->percentile = percentiles->percentile_to_iterate_to;
+            int64_t temp = (int64_t)(log(100 / (100.0 - (percentiles->percentile_to_iterate_to))) / log(2)) + 1;
+            int64_t half_distance = (int64_t) pow(2, (double) temp);
+            int64_t percentile_reporting_ticks = percentiles->ticks_per_half_distance * half_distance;
+            percentiles->percentile_to_iterate_to += 100.0 / percentile_reporting_ticks;
+
+            return true;
+        }
+    }
+    while (_basic_iter_next(iter));
+
+    return true;
+}
+
+void hdr_iter_percentile_init(struct hdr_iter* iter, const struct hdr_histogram* h, int32_t ticks_per_half_distance)
+{
+    iter->h = h;
+
+    hdr_iter_init(iter, h);
+
+    iter->specifics.percentiles.seen_last_value          = false;
+    iter->specifics.percentiles.ticks_per_half_distance  = ticks_per_half_distance;
+    iter->specifics.percentiles.percentile_to_iterate_to = 0.0;
+    iter->specifics.percentiles.percentile               = 0.0;
+
+    iter->_next_fp = _percentile_iter_next;
+}
+
+static void format_line_string(char* str, size_t len, int significant_figures, format_type format)
+{
+#if defined(_MSC_VER)
+#define snprintf _snprintf
+#pragma warning(push)
+#pragma warning(disable: 4996)
+#endif
+    const char* format_str = "%s%d%s";
+
+    switch (format)
+    {
+        case CSV:
+            snprintf(str, len, format_str, "%.", significant_figures, "f,%f,%d,%.2f\n");
+            break;
+        case CLASSIC:
+            snprintf(str, len, format_str, "%12.", significant_figures, "f %12f %12d %12.2f\n");
+            break;
+        default:
+            snprintf(str, len, format_str, "%12.", significant_figures, "f %12f %12d %12.2f\n");
+    }
+#if defined(_MSC_VER)
+#undef snprintf
+#pragma warning(pop)
+#endif
+}
+
+
+// ########  ########  ######   #######  ########  ########  ######## ########
+// ##     ## ##       ##    ## ##     ## ##     ## ##     ## ##       ##     ##
+// ##     ## ##       ##       ##     ## ##     ## ##     ## ##       ##     ##
+// ########  ######   ##       ##     ## ########  ##     ## ######   ##     ##
+// ##   ##   ##       ##       ##     ## ##   ##   ##     ## ##       ##     ##
+// ##    ##  ##       ##    ## ##     ## ##    ##  ##     ## ##       ##     ##
+// ##     ## ########  ######   #######  ##     ## ########  ######## ########
+
+
+static bool _recorded_iter_next(struct hdr_iter* iter)
+{
+    while (_basic_iter_next(iter))
+    {
+        if (iter->count != 0)
+        {
+            _update_iterated_values(iter, iter->value);
+
+            iter->specifics.recorded.count_added_in_this_iteration_step = iter->count;
+            return true;
+        }
+    }
+
+    return false;
+}
+
+void hdr_iter_recorded_init(struct hdr_iter* iter, const struct hdr_histogram* h)
+{
+    hdr_iter_init(iter, h);
+
+    iter->specifics.recorded.count_added_in_this_iteration_step = 0;
+
+    iter->_next_fp = _recorded_iter_next;
+}
+
+// ##       #### ##    ## ########    ###    ########
+// ##        ##  ###   ## ##         ## ##   ##     ##
+// ##        ##  ####  ## ##        ##   ##  ##     ##
+// ##        ##  ## ## ## ######   ##     ## ########
+// ##        ##  ##  #### ##       ######### ##   ##
+// ##        ##  ##   ### ##       ##     ## ##    ##
+// ######## #### ##    ## ######## ##     ## ##     ##
+
+
+static bool _iter_linear_next(struct hdr_iter* iter)
+{
+    struct hdr_iter_linear* linear = &iter->specifics.linear;
+
+    linear->count_added_in_this_iteration_step = 0;
+
+    if (has_next(iter) ||
+        next_value_greater_than_reporting_level_upper_bound(
+            iter, linear->next_value_reporting_level_lowest_equivalent))
+    {
+        do
+        {
+            if (iter->value >= linear->next_value_reporting_level_lowest_equivalent)
+            {
+                _update_iterated_values(iter, linear->next_value_reporting_level);
+
+                linear->next_value_reporting_level += linear->value_units_per_bucket;
+                linear->next_value_reporting_level_lowest_equivalent =
+                    lowest_equivalent_value(iter->h, linear->next_value_reporting_level);
+
+                return true;
+            }
+
+            if (!move_next(iter))
+            {
+                return true;
+            }
+
+            linear->count_added_in_this_iteration_step += iter->count;
+        }
+        while (true);
+    }
+
+    return false;
+}
+
+
+void hdr_iter_linear_init(struct hdr_iter* iter, const struct hdr_histogram* h, int64_t value_units_per_bucket)
+{
+    hdr_iter_init(iter, h);
+
+    iter->specifics.linear.count_added_in_this_iteration_step = 0;
+    iter->specifics.linear.value_units_per_bucket = value_units_per_bucket;
+    iter->specifics.linear.next_value_reporting_level = value_units_per_bucket;
+    iter->specifics.linear.next_value_reporting_level_lowest_equivalent = lowest_equivalent_value(h, value_units_per_bucket);
+
+    iter->_next_fp = _iter_linear_next;
+}
+
+// ##        #######   ######      ###    ########  #### ######## ##     ## ##     ## ####  ######
+// ##       ##     ## ##    ##    ## ##   ##     ##  ##     ##    ##     ## ###   ###  ##  ##    ##
+// ##       ##     ## ##         ##   ##  ##     ##  ##     ##    ##     ## #### ####  ##  ##
+// ##       ##     ## ##   #### ##     ## ########   ##     ##    ######### ## ### ##  ##  ##
+// ##       ##     ## ##    ##  ######### ##   ##    ##     ##    ##     ## ##     ##  ##  ##
+// ##       ##     ## ##    ##  ##     ## ##    ##   ##     ##    ##     ## ##     ##  ##  ##    ##
+// ########  #######   ######   ##     ## ##     ## ####    ##    ##     ## ##     ## ####  ######
+
+static bool _log_iter_next(struct hdr_iter *iter)
+{
+    struct hdr_iter_log* logarithmic = &iter->specifics.log;
+
+    logarithmic->count_added_in_this_iteration_step = 0;
+
+    if (has_next(iter) ||
+        next_value_greater_than_reporting_level_upper_bound(
+            iter, logarithmic->next_value_reporting_level_lowest_equivalent))
+    {
+        do
+        {
+            if (iter->value >= logarithmic->next_value_reporting_level_lowest_equivalent)
+            {
+                _update_iterated_values(iter, logarithmic->next_value_reporting_level);
+
+                logarithmic->next_value_reporting_level *= (int64_t)logarithmic->log_base;
+                logarithmic->next_value_reporting_level_lowest_equivalent = lowest_equivalent_value(iter->h, logarithmic->next_value_reporting_level);
+
+                return true;
+            }
+
+            if (!move_next(iter))
+            {
+                return true;
+            }
+
+            logarithmic->count_added_in_this_iteration_step += iter->count;
+        }
+        while (true);
+    }
+
+    return false;
+}
+
+void hdr_iter_log_init(
+        struct hdr_iter* iter,
+        const struct hdr_histogram* h,
+        int64_t value_units_first_bucket,
+        double log_base)
+{
+    hdr_iter_init(iter, h);
+    iter->specifics.log.count_added_in_this_iteration_step = 0;
+    iter->specifics.log.log_base = log_base;
+    iter->specifics.log.next_value_reporting_level = value_units_first_bucket;
+    iter->specifics.log.next_value_reporting_level_lowest_equivalent = lowest_equivalent_value(h, value_units_first_bucket);
+
+    iter->_next_fp = _log_iter_next;
+}
+
+// Printing.
+
+static const char* format_head_string(format_type format)
+{
+    switch (format)
+    {
+        case CSV:
+            return "%s,%s,%s,%s\n";
+        case CLASSIC:
+            return "%12s %12s %12s %12s\n\n";
+        default:
+            return "%12s %12s %12s %12s\n\n";
+    }
+}
+
+static const char CLASSIC_FOOTER[] =
+    "#[Mean    = %12.3f, StdDeviation   = %12.3f]\n"
+    "#[Max     = %12.3f, Total count    = %12" PRIu64 "]\n"
+    "#[Buckets = %12d, SubBuckets     = %12d]\n";
+
+int hdr_percentiles_print(
+        struct hdr_histogram* h, FILE* stream, int32_t ticks_per_half_distance,
+        double value_scale, format_type format)
+{
+    char line_format[25];
+    format_line_string(line_format, 25, h->significant_figures, format);
+    const char* head_format = format_head_string(format);
+    int rc = 0;
+
+    struct hdr_iter iter;
+    hdr_iter_percentile_init(&iter, h, ticks_per_half_distance);
+
+    if (fprintf(
+            stream, head_format,
+            "Value", "Percentile", "TotalCount", "1/(1-Percentile)") < 0)
+    {
+        rc = EIO;
+        goto cleanup;
+    }
+
+    struct hdr_iter_percentiles * percentiles = &iter.specifics.percentiles;
+    while (hdr_iter_next(&iter))
+    {
+        double  value               = iter.highest_equivalent_value / value_scale;
+        double  percentile          = percentiles->percentile / 100.0;
+        int64_t total_count         = iter.cumulative_count;
+        double  inverted_percentile = (1.0 / (1.0 - percentile));
+
+        if (fprintf(
+                stream, line_format, value, percentile, total_count, inverted_percentile) < 0)
+        {
+            rc = EIO;
+            goto cleanup;
+        }
+    }
+
+    if (CLASSIC == format)
+    {
+        double mean   = hdr_mean(h)   / value_scale;
+        double stddev = hdr_stddev(h) / value_scale;
+        double max    = hdr_max(h)    / value_scale;
+
+        if (fprintf(
+                stream, CLASSIC_FOOTER,  mean, stddev, max,
+                h->total_count, h->bucket_count, h->sub_bucket_count) < 0)
+        {
+            rc = EIO;
+            goto cleanup;
+        }
+    }
+
+    cleanup:
+    return rc;
+}
diff --git a/vendors/hdr_histogram/hdr_histogram.h b/vendors/hdr_histogram/hdr_histogram.h
new file mode 100644
index 0000000..14eebdd
--- /dev/null
+++ b/vendors/hdr_histogram/hdr_histogram.h
@@ -0,0 +1,435 @@
+/**
+ * hdr_histogram.h
+ * Written by Michael Barker and released to the public domain,
+ * as explained at http://creativecommons.org/publicdomain/zero/1.0/
+ *
+ * The source for the hdr_histogram utilises a few C99 constructs, specifically
+ * the use of stdint/stdbool and inline variable declaration.
+ */
+
+#ifndef HDR_HISTOGRAM_H
+#define HDR_HISTOGRAM_H 1
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdio.h>
+
+struct hdr_histogram
+{
+    int64_t lowest_trackable_value;
+    int64_t highest_trackable_value;
+    int32_t unit_magnitude;
+    int32_t significant_figures;
+    int32_t sub_bucket_half_count_magnitude;
+    int32_t sub_bucket_half_count;
+    int64_t sub_bucket_mask;
+    int32_t sub_bucket_count;
+    int32_t bucket_count;
+    int64_t min_value;
+    int64_t max_value;
+    int32_t normalizing_index_offset;
+    double conversion_ratio;
+    int32_t counts_len;
+    int64_t total_count;
+    int64_t* counts;
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Allocate the memory and initialise the hdr_histogram.
+ *
+ * Due to the size of the histogram being the result of some reasonably
+ * involved math on the input parameters this function it is tricky to stack allocate.
+ * The histogram is allocated in a single contigious block so can be delete via free,
+ * without any structure specific destructor.
+ *
+ * @param lowest_trackable_value The smallest possible value to be put into the
+ * histogram.
+ * @param highest_trackable_value The largest possible value to be put into the
+ * histogram.
+ * @param significant_figures The level of precision for this histogram, i.e. the number
+ * of figures in a decimal number that will be maintained.  E.g. a value of 3 will mean
+ * the results from the histogram will be accurate up to the first three digits.  Must
+ * be a value between 1 and 5 (inclusive).
+ * @param result Output parameter to capture allocated histogram.
+ * @return 0 on success, EINVAL if lowest_trackable_value is < 1 or the
+ * significant_figure value is outside of the allowed range, ENOMEM if malloc
+ * failed.
+ */
+int hdr_init(
+    int64_t lowest_trackable_value,
+    int64_t highest_trackable_value,
+    int significant_figures,
+    struct hdr_histogram** result);
+
+/**
+ * Free the memory and close the hdr_histogram.
+ *
+ * @param h The histogram you want to close.
+ */
+void hdr_close(struct hdr_histogram* h);
+
+/**
+ * Allocate the memory and initialise the hdr_histogram.  This is the equivalent of calling
+ * hdr_init(1, highest_trackable_value, significant_figures, result);
+ *
+ * @deprecated use hdr_init.
+ */
+int hdr_alloc(int64_t highest_trackable_value, int significant_figures, struct hdr_histogram** result);
+
+
+/**
+ * Reset a histogram to zero - empty out a histogram and re-initialise it
+ *
+ * If you want to re-use an existing histogram, but reset everything back to zero, this
+ * is the routine to use.
+ *
+ * @param h The histogram you want to reset to empty.
+ *
+ */
+void hdr_reset(struct hdr_histogram* h);
+
+/**
+ * Get the memory size of the hdr_histogram.
+ *
+ * @param h "This" pointer
+ * @return The amount of memory used by the hdr_histogram in bytes
+ */
+size_t hdr_get_memory_size(struct hdr_histogram* h);
+
+/**
+ * Records a value in the histogram, will round this value of to a precision at or better
+ * than the significant_figure specified at construction time.
+ *
+ * @param h "This" pointer
+ * @param value Value to add to the histogram
+ * @return false if the value is larger than the highest_trackable_value and can't be recorded,
+ * true otherwise.
+ */
+bool hdr_record_value(struct hdr_histogram* h, int64_t value);
+
+/**
+ * Records count values in the histogram, will round this value of to a
+ * precision at or better than the significant_figure specified at construction
+ * time.
+ *
+ * @param h "This" pointer
+ * @param value Value to add to the histogram
+ * @param count Number of 'value's to add to the histogram
+ * @return false if any value is larger than the highest_trackable_value and can't be recorded,
+ * true otherwise.
+ */
+bool hdr_record_values(struct hdr_histogram* h, int64_t value, int64_t count);
+
+
+/**
+ * Record a value in the histogram and backfill based on an expected interval.
+ *
+ * Records a value in the histogram, will round this value of to a precision at or better
+ * than the significant_figure specified at contruction time.  This is specifically used
+ * for recording latency.  If the value is larger than the expected_interval then the
+ * latency recording system has experienced co-ordinated omission.  This method fills in the
+ * values that would have occured had the client providing the load not been blocked.
+
+ * @param h "This" pointer
+ * @param value Value to add to the histogram
+ * @param expected_interval The delay between recording values.
+ * @return false if the value is larger than the highest_trackable_value and can't be recorded,
+ * true otherwise.
+ */
+bool hdr_record_corrected_value(struct hdr_histogram* h, int64_t value, int64_t expexcted_interval);
+/**
+ * Record a value in the histogram 'count' times.  Applies the same correcting logic
+ * as 'hdr_record_corrected_value'.
+ *
+ * @param h "This" pointer
+ * @param value Value to add to the histogram
+ * @param count Number of 'value's to add to the histogram
+ * @param expected_interval The delay between recording values.
+ * @return false if the value is larger than the highest_trackable_value and can't be recorded,
+ * true otherwise.
+ */
+bool hdr_record_corrected_values(struct hdr_histogram* h, int64_t value, int64_t count, int64_t expected_interval);
+
+/**
+ * Adds all of the values from 'from' to 'this' histogram.  Will return the
+ * number of values that are dropped when copying.  Values will be dropped
+ * if they around outside of h.lowest_trackable_value and
+ * h.highest_trackable_value.
+ *
+ * @param h "This" pointer
+ * @param from Histogram to copy values from.
+ * @return The number of values dropped when copying.
+ */
+int64_t hdr_add(struct hdr_histogram* h, const struct hdr_histogram* from);
+
+/**
+ * Adds all of the values from 'from' to 'this' histogram.  Will return the
+ * number of values that are dropped when copying.  Values will be dropped
+ * if they around outside of h.lowest_trackable_value and
+ * h.highest_trackable_value.
+ *
+ * @param h "This" pointer
+ * @param from Histogram to copy values from.
+ * @return The number of values dropped when copying.
+ */
+int64_t hdr_add_while_correcting_for_coordinated_omission(
+    struct hdr_histogram* h, struct hdr_histogram* from, int64_t expected_interval);
+
+/**
+ * Get minimum value from the histogram.  Will return 2^63-1 if the histogram
+ * is empty.
+ *
+ * @param h "This" pointer
+ */
+int64_t hdr_min(const struct hdr_histogram* h);
+
+/**
+ * Get maximum value from the histogram.  Will return 0 if the histogram
+ * is empty.
+ *
+ * @param h "This" pointer
+ */
+int64_t hdr_max(const struct hdr_histogram* h);
+
+/**
+ * Get the value at a specific percentile.
+ *
+ * @param h "This" pointer.
+ * @param percentile The percentile to get the value for
+ */
+int64_t hdr_value_at_percentile(const struct hdr_histogram* h, double percentile);
+
+/**
+ * Gets the standard deviation for the values in the histogram.
+ *
+ * @param h "This" pointer
+ * @return The standard deviation
+ */
+double hdr_stddev(const struct hdr_histogram* h);
+
+/**
+ * Gets the mean for the values in the histogram.
+ *
+ * @param h "This" pointer
+ * @return The mean
+ */
+double hdr_mean(const struct hdr_histogram* h);
+
+/**
+ * Determine if two values are equivalent with the histogram's resolution.
+ * Where "equivalent" means that value samples recorded for any two
+ * equivalent values are counted in a common total count.
+ *
+ * @param h "This" pointer
+ * @param a first value to compare
+ * @param b second value to compare
+ * @return 'true' if values are equivalent with the histogram's resolution.
+ */
+bool hdr_values_are_equivalent(const struct hdr_histogram* h, int64_t a, int64_t b);
+
+/**
+ * Get the lowest value that is equivalent to the given value within the histogram's resolution.
+ * Where "equivalent" means that value samples recorded for any two
+ * equivalent values are counted in a common total count.
+ *
+ * @param h "This" pointer
+ * @param value The given value
+ * @return The lowest value that is equivalent to the given value within the histogram's resolution.
+ */
+int64_t hdr_lowest_equivalent_value(const struct hdr_histogram* h, int64_t value);
+
+/**
+ * Get the count of recorded values at a specific value
+ * (to within the histogram resolution at the value level).
+ *
+ * @param h "This" pointer
+ * @param value The value for which to provide the recorded count
+ * @return The total count of values recorded in the histogram within the value range that is
+ * {@literal >=} lowestEquivalentValue(<i>value</i>) and {@literal <=} highestEquivalentValue(<i>value</i>)
+ */
+int64_t hdr_count_at_value(const struct hdr_histogram* h, int64_t value);
+
+int64_t hdr_count_at_index(const struct hdr_histogram* h, int32_t index);
+
+int64_t hdr_value_at_index(const struct hdr_histogram* h, int32_t index);
+
+struct hdr_iter_percentiles
+{
+    bool seen_last_value;
+    int32_t ticks_per_half_distance;
+    double percentile_to_iterate_to;
+    double percentile;
+};
+
+struct hdr_iter_recorded
+{
+    int64_t count_added_in_this_iteration_step;
+};
+
+struct hdr_iter_linear
+{
+    int64_t value_units_per_bucket;
+    int64_t count_added_in_this_iteration_step;
+    int64_t next_value_reporting_level;
+    int64_t next_value_reporting_level_lowest_equivalent;
+};
+
+struct hdr_iter_log
+{
+    double log_base;
+    int64_t count_added_in_this_iteration_step;
+    int64_t next_value_reporting_level;
+    int64_t next_value_reporting_level_lowest_equivalent;
+};
+
+/**
+ * The basic iterator.  This is a generic structure
+ * that supports all of the types of iteration.  Use
+ * the appropriate initialiser to get the desired
+ * iteration.
+ *
+ * @
+ */
+struct hdr_iter
+{
+    const struct hdr_histogram* h;
+    /** raw index into the counts array */
+    int32_t counts_index;
+    /** snapshot of the length at the time the iterator is created */
+    int32_t total_count;
+    /** value directly from array for the current counts_index */
+    int64_t count;
+    /** sum of all of the counts up to and including the count at this index */
+    int64_t cumulative_count;
+    /** The current value based on counts_index */
+    int64_t value;
+    int64_t highest_equivalent_value;
+    int64_t lowest_equivalent_value;
+    int64_t median_equivalent_value;
+    int64_t value_iterated_from;
+    int64_t value_iterated_to;
+
+    union
+    {
+        struct hdr_iter_percentiles percentiles;
+        struct hdr_iter_recorded recorded;
+        struct hdr_iter_linear linear;
+        struct hdr_iter_log log;
+    } specifics;
+
+    bool (* _next_fp)(struct hdr_iter* iter);
+
+};
+
+/**
+ * Initalises the basic iterator.
+ *
+ * @param itr 'This' pointer
+ * @param h The histogram to iterate over
+ */
+void hdr_iter_init(struct hdr_iter* iter, const struct hdr_histogram* h);
+
+/**
+ * Initialise the iterator for use with percentiles.
+ */
+void hdr_iter_percentile_init(struct hdr_iter* iter, const struct hdr_histogram* h, int32_t ticks_per_half_distance);
+
+/**
+ * Initialise the iterator for use with recorded values.
+ */
+void hdr_iter_recorded_init(struct hdr_iter* iter, const struct hdr_histogram* h);
+
+/**
+ * Initialise the iterator for use with linear values.
+ */
+void hdr_iter_linear_init(
+    struct hdr_iter* iter,
+    const struct hdr_histogram* h,
+    int64_t value_units_per_bucket);
+
+/**
+ * Initialise the iterator for use with logarithmic values
+ */
+void hdr_iter_log_init(
+    struct hdr_iter* iter,
+    const struct hdr_histogram* h,
+    int64_t value_units_first_bucket,
+    double log_base);
+
+/**
+ * Iterate to the next value for the iterator.  If there are no more values
+ * available return faluse.
+ *
+ * @param itr 'This' pointer
+ * @return 'false' if there are no values remaining for this iterator.
+ */
+bool hdr_iter_next(struct hdr_iter* iter);
+
+typedef enum
+{
+    CLASSIC,
+    CSV
+} format_type;
+
+/**
+ * Print out a percentile based histogram to the supplied stream.  Note that
+ * this call will not flush the FILE, this is left up to the user.
+ *
+ * @param h 'This' pointer
+ * @param stream The FILE to write the output to
+ * @param ticks_per_half_distance The number of iteration steps per half-distance to 100%
+ * @param value_scale Scale the output values by this amount
+ * @param format_type Format to use, e.g. CSV.
+ * @return 0 on success, error code on failure.  EIO if an error occurs writing
+ * the output.
+ */
+int hdr_percentiles_print(
+    struct hdr_histogram* h, FILE* stream, int32_t ticks_per_half_distance,
+    double value_scale, format_type format);
+
+/**
+* Internal allocation methods, used by hdr_dbl_histogram.
+*/
+struct hdr_histogram_bucket_config
+{
+    int64_t lowest_trackable_value;
+    int64_t highest_trackable_value;
+    int64_t unit_magnitude;
+    int64_t significant_figures;
+    int32_t sub_bucket_half_count_magnitude;
+    int32_t sub_bucket_half_count;
+    int64_t sub_bucket_mask;
+    int32_t sub_bucket_count;
+    int32_t bucket_count;
+    int32_t counts_len;
+};
+
+int hdr_calculate_bucket_config(
+    int64_t lowest_trackable_value,
+    int64_t highest_trackable_value,
+    int significant_figures,
+    struct hdr_histogram_bucket_config* cfg);
+
+void hdr_init_preallocated(struct hdr_histogram* h, struct hdr_histogram_bucket_config* cfg);
+
+int64_t hdr_size_of_equivalent_value_range(const struct hdr_histogram* h, int64_t value);
+
+int64_t hdr_next_non_equivalent_value(const struct hdr_histogram* h, int64_t value);
+
+int64_t hdr_median_equivalent_value(const struct hdr_histogram* h, int64_t value);
+
+/**
+ * Used to reset counters after importing data manuallying into the histogram, used by the logging code
+ * and other custom serialisation tools.
+ */
+void hdr_reset_internal_counters(struct hdr_histogram* h);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/vendors/minheap/heap.c b/vendors/minheap/heap.c
new file mode 100644
index 0000000..9fb17c6
--- /dev/null
+++ b/vendors/minheap/heap.c
@@ -0,0 +1,134 @@
+#include "heap.h"
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <stdio.h>
+
+static void min_heap_shift_down_(min_heap_t* s, unsigned hole_index, void* node);
+static void min_heap_shift_up_(min_heap_t* s, unsigned hole_index, void *node);
+static void min_heap_shift_up_unconditional_(min_heap_t* s, unsigned hole_index, void* node);
+
+struct entry_data {
+    char *str;
+    size_t len;
+};
+
+int init_heap(struct minheap *heap, int max_size, minheap_cmp cmp, minheap_getindex get, minheap_setindex set)
+{
+	heap->max_size = max_size;
+	heap->cur_size = 0;
+	heap->nodes = malloc(sizeof(void *) * max_size);
+	if (!heap->nodes)
+		return (-1);
+	assert(cmp);
+	heap->cmp = cmp;
+	heap->get = get;
+	heap->set = set;
+	return (0);
+}
+
+int get_node_index(struct minheap *heap, void *node)
+{
+	assert(heap->get);
+	int index = heap->get(node);
+    if (heap->cur_size <= index) {
+        printf("heap->cur_size: %d, index: %d\n", heap->cur_size, index);
+        printf("content: %s\n", ((struct entry_data*)((heap_entry *)node)->value)->str);
+    }
+	assert(heap->cur_size > index);
+	assert(heap->nodes[index] == node);
+	return index;
+}
+
+int push_heap(struct minheap *heap, void *node)
+{
+	if (heap->cur_size >= heap->max_size)
+		return (-1);
+	min_heap_shift_up_(heap, heap->cur_size++, node);	
+	return (0);
+}
+void *pop_heap(struct minheap *heap)
+{
+    if (heap->cur_size <= 0)
+    {
+        return NULL;
+    }
+
+    // 保存最小值
+    void *ret = heap->nodes[0];
+	min_heap_shift_down_(heap, 0u, heap->nodes[--heap->cur_size]);
+	return ret;
+}
+int adjust_heap_node(struct minheap *heap, void *node)
+{
+	int index = get_node_index(heap, node);
+	unsigned parent = (index - 1) / 2;	
+
+	if (index > 0 && heap->cmp(node, heap->nodes[parent]))
+		min_heap_shift_up_unconditional_(heap, index, node);
+	else
+		min_heap_shift_down_(heap, index, node);
+	return 0;
+}
+
+int erase_heap_node(struct minheap* heap, void *node)
+{
+	int index = get_node_index(heap, node);
+	void *last = heap->nodes[--heap->cur_size];
+	unsigned parent = (index - 1) / 2;
+	if (index > 0 && heap->cmp(last, heap->nodes[parent]))
+		min_heap_shift_up_unconditional_(heap, index, last);
+	else
+		min_heap_shift_down_(heap, index, last);
+	return (0);
+}
+
+static void min_heap_shift_up_unconditional_(min_heap_t* s, unsigned hole_index, void* node)
+{
+	assert(s->set);
+    unsigned parent = (hole_index - 1) / 2;
+    do
+    {
+		s->nodes[hole_index] = s->nodes[parent];
+		s->set(hole_index, s->nodes[hole_index]);
+		hole_index = parent;
+		parent = (hole_index - 1) / 2;
+    } while (hole_index && s->cmp(node, s->nodes[parent]));
+    s->nodes[hole_index] = node;
+	s->set(hole_index, s->nodes[hole_index]);	
+}
+
+static void min_heap_shift_up_(min_heap_t* s, unsigned hole_index, void *node)
+{
+	assert(s->set);	
+    unsigned parent = (hole_index - 1) / 2;
+    while (hole_index && s->cmp(node, s->nodes[parent]))
+	{
+		s->nodes[hole_index] = s->nodes[parent];
+		s->set(hole_index, s->nodes[hole_index]);				
+		hole_index = parent;
+		parent = (hole_index - 1) / 2;
+	}
+	s->nodes[hole_index] = node;
+	s->set(hole_index, s->nodes[hole_index]);		
+}
+
+static void min_heap_shift_down_(min_heap_t* s, unsigned hole_index, void* node)
+{
+	assert(s->set);	
+    unsigned min_child = 2 * (hole_index + 1);
+    while (min_child <= s->cur_size)
+	{
+		if (min_child == s->cur_size
+			|| s->cmp(s->nodes[min_child - 1], s->nodes[min_child]))
+			--min_child;
+		if (s->cmp(node, s->nodes[min_child]))
+			break;
+		s->nodes[hole_index] = s->nodes[min_child];
+		s->set(hole_index, s->nodes[hole_index]);				
+		hole_index = min_child;
+		min_child = 2 * (hole_index + 1);
+	}
+	s->nodes[hole_index] = node;
+	s->set(hole_index, s->nodes[hole_index]);			
+}
diff --git a/vendors/minheap/heap.h b/vendors/minheap/heap.h
new file mode 100644
index 0000000..87c3159
--- /dev/null
+++ b/vendors/minheap/heap.h
@@ -0,0 +1,35 @@
+#ifndef MINHEAP_H
+#define MINHEAP_H
+
+#include <stdbool.h>
+
+typedef bool (*minheap_cmp)(void *a, void *b);
+typedef int (*minheap_getindex)(void *a);
+typedef void (*minheap_setindex)(int index, void *a);	
+
+typedef struct minheap
+{
+	int max_size;
+	int cur_size;
+	void **nodes;
+	minheap_cmp cmp;
+	minheap_getindex get;
+	minheap_setindex set;
+} min_heap_t;
+
+typedef struct heap_entry {
+	int index;
+    void* key;   // Key for this entry
+    void* value; // Value for this entry
+} heap_entry;
+
+typedef min_heap_t heap;
+
+int init_heap(struct minheap *heap, int max_size, minheap_cmp cmp, minheap_getindex get, minheap_setindex set);
+int get_node_index(struct minheap *heap, void *node);
+
+int push_heap(struct minheap *heap, void *node);
+void *pop_heap(struct minheap *heap);
+int adjust_heap_node(struct minheap *heap, void *node);
+int erase_heap_node(struct minheap* heap, void *node);
+#endif /* MINHEAP_H */
diff --git a/vendors/mpack/mpack.c b/vendors/mpack/mpack.c
new file mode 100644
index 0000000..4f0dab4
--- /dev/null
+++ b/vendors/mpack/mpack.c
@@ -0,0 +1,7304 @@
+/**
+ * The MIT License (MIT)
+ * 
+ * Copyright (c) 2015-2021 Nicholas Fraser and the MPack authors
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ * 
+ */
+
+/*
+ * This is the MPack 1.1.1 amalgamation package.
+ *
+ * http://github.com/ludocode/mpack
+ */
+
+#define MPACK_INTERNAL 1
+#define MPACK_EMIT_INLINE_DEFS 1
+
+#include "mpack.h"
+
+
+/* mpack/mpack-platform.c.c */
+
+
+// We define MPACK_EMIT_INLINE_DEFS and include mpack.h to emit
+// standalone definitions of all (non-static) inline functions in MPack.
+
+#define MPACK_INTERNAL 1
+#define MPACK_EMIT_INLINE_DEFS 1
+
+/* #include "mpack-platform.h" */
+/* #include "mpack.h" */
+
+MPACK_SILENCE_WARNINGS_BEGIN
+
+#if MPACK_DEBUG
+
+#if MPACK_STDIO
+void mpack_assert_fail_format(const char* format, ...) {
+    char buffer[512];
+    va_list args;
+    va_start(args, format);
+    vsnprintf(buffer, sizeof(buffer), format, args);
+    va_end(args);
+    buffer[sizeof(buffer) - 1] = 0;
+    mpack_assert_fail_wrapper(buffer);
+}
+
+void mpack_break_hit_format(const char* format, ...) {
+    char buffer[512];
+    va_list args;
+    va_start(args, format);
+    vsnprintf(buffer, sizeof(buffer), format, args);
+    va_end(args);
+    buffer[sizeof(buffer) - 1] = 0;
+    mpack_break_hit(buffer);
+}
+#endif
+
+#if !MPACK_CUSTOM_ASSERT
+void mpack_assert_fail(const char* message) {
+    MPACK_UNUSED(message);
+
+    #if MPACK_STDIO
+    fprintf(stderr, "%s\n", message);
+    #endif
+}
+#endif
+
+// We split the assert failure from the wrapper so that a
+// custom assert function can return.
+void mpack_assert_fail_wrapper(const char* message) {
+
+    #ifdef MPACK_GCOV
+    // gcov marks even __builtin_unreachable() as an uncovered line. this
+    // silences it.
+    (mpack_assert_fail(message), __builtin_unreachable());
+
+    #else
+    mpack_assert_fail(message);
+
+    // mpack_assert_fail() is not supposed to return. in case it does, we
+    // abort.
+
+    #if !MPACK_NO_BUILTINS
+    #if defined(__GNUC__) || defined(__clang__)
+    __builtin_trap();
+    #elif defined(WIN32)
+    __debugbreak();
+    #endif
+    #endif
+
+    #if (defined(__GNUC__) || defined(__clang__)) && !MPACK_NO_BUILTINS
+    __builtin_abort();
+    #elif MPACK_STDLIB
+    abort();
+    #endif
+
+    MPACK_UNREACHABLE;
+    #endif
+}
+
+#if !MPACK_CUSTOM_BREAK
+
+// If we have a custom assert handler, break wraps it by default.
+// This allows users of MPack to only implement mpack_assert_fail() without
+// having to worry about the difference between assert and break.
+//
+// MPACK_CUSTOM_BREAK is available to define a separate break handler
+// (which is needed by the unit test suite), but this is not offered in
+// mpack-config.h for simplicity.
+
+#if MPACK_CUSTOM_ASSERT
+void mpack_break_hit(const char* message) {
+    mpack_assert_fail_wrapper(message);
+}
+#else
+void mpack_break_hit(const char* message) {
+    MPACK_UNUSED(message);
+
+    #if MPACK_STDIO
+    fprintf(stderr, "%s\n", message);
+    #endif
+
+    #if defined(__GNUC__) || defined(__clang__) && !MPACK_NO_BUILTINS
+    __builtin_trap();
+    #elif defined(WIN32) && !MPACK_NO_BUILTINS
+    __debugbreak();
+    #elif MPACK_STDLIB
+    abort();
+    #endif
+}
+#endif
+
+#endif
+
+#endif
+
+
+
+// The below are adapted from the C wikibook:
+//     https://en.wikibooks.org/wiki/C_Programming/Strings
+
+#ifndef mpack_memcmp
+int mpack_memcmp(const void* s1, const void* s2, size_t n) {
+     const unsigned char *us1 = (const unsigned char *) s1;
+     const unsigned char *us2 = (const unsigned char *) s2;
+     while (n-- != 0) {
+         if (*us1 != *us2)
+             return (*us1 < *us2) ? -1 : +1;
+         us1++;
+         us2++;
+     }
+     return 0;
+}
+#endif
+
+#ifndef mpack_memcpy
+void* mpack_memcpy(void* MPACK_RESTRICT s1, const void* MPACK_RESTRICT s2, size_t n) {
+    char* MPACK_RESTRICT dst = (char *)s1;
+    const char* MPACK_RESTRICT src = (const char *)s2;
+    while (n-- != 0)
+        *dst++ = *src++;
+    return s1;
+}
+#endif
+
+#ifndef mpack_memmove
+void* mpack_memmove(void* s1, const void* s2, size_t n) {
+    char *p1 = (char *)s1;
+    const char *p2 = (const char *)s2;
+    if (p2 < p1 && p1 < p2 + n) {
+        p2 += n;
+        p1 += n;
+        while (n-- != 0)
+            *--p1 = *--p2;
+    } else
+        while (n-- != 0)
+            *p1++ = *p2++;
+    return s1;
+}
+#endif
+
+#ifndef mpack_memset
+void* mpack_memset(void* s, int c, size_t n) {
+    unsigned char *us = (unsigned char *)s;
+    unsigned char uc = (unsigned char)c;
+    while (n-- != 0)
+        *us++ = uc;
+    return s;
+}
+#endif
+
+#ifndef mpack_strlen
+size_t mpack_strlen(const char* s) {
+    const char* p = s;
+    while (*p != '\0')
+        p++;
+    return (size_t)(p - s);
+}
+#endif
+
+
+
+#if defined(MPACK_MALLOC) && !defined(MPACK_REALLOC)
+void* mpack_realloc(void* old_ptr, size_t used_size, size_t new_size) {
+    if (new_size == 0) {
+        if (old_ptr)
+            MPACK_FREE(old_ptr);
+        return NULL;
+    }
+
+    void* new_ptr = MPACK_MALLOC(new_size);
+    if (new_ptr == NULL)
+        return NULL;
+
+    mpack_memcpy(new_ptr, old_ptr, used_size);
+    MPACK_FREE(old_ptr);
+    return new_ptr;
+}
+#endif
+
+MPACK_SILENCE_WARNINGS_END
+
+/* mpack/mpack-common.c.c */
+
+#define MPACK_INTERNAL 1
+
+/* #include "mpack-common.h" */
+
+MPACK_SILENCE_WARNINGS_BEGIN
+
+const char* mpack_error_to_string(mpack_error_t error) {
+    #if MPACK_STRINGS
+    switch (error) {
+        #define MPACK_ERROR_STRING_CASE(e) case e: return #e
+        MPACK_ERROR_STRING_CASE(mpack_ok);
+        MPACK_ERROR_STRING_CASE(mpack_error_io);
+        MPACK_ERROR_STRING_CASE(mpack_error_invalid);
+        MPACK_ERROR_STRING_CASE(mpack_error_unsupported);
+        MPACK_ERROR_STRING_CASE(mpack_error_type);
+        MPACK_ERROR_STRING_CASE(mpack_error_too_big);
+        MPACK_ERROR_STRING_CASE(mpack_error_memory);
+        MPACK_ERROR_STRING_CASE(mpack_error_bug);
+        MPACK_ERROR_STRING_CASE(mpack_error_data);
+        MPACK_ERROR_STRING_CASE(mpack_error_eof);
+        #undef MPACK_ERROR_STRING_CASE
+    }
+    mpack_assert(0, "unrecognized error %i", (int)error);
+    return "(unknown mpack_error_t)";
+    #else
+    MPACK_UNUSED(error);
+    return "";
+    #endif
+}
+
+const char* mpack_type_to_string(mpack_type_t type) {
+    #if MPACK_STRINGS
+    switch (type) {
+        #define MPACK_TYPE_STRING_CASE(e) case e: return #e
+        MPACK_TYPE_STRING_CASE(mpack_type_missing);
+        MPACK_TYPE_STRING_CASE(mpack_type_nil);
+        MPACK_TYPE_STRING_CASE(mpack_type_bool);
+        MPACK_TYPE_STRING_CASE(mpack_type_float);
+        MPACK_TYPE_STRING_CASE(mpack_type_double);
+        MPACK_TYPE_STRING_CASE(mpack_type_int);
+        MPACK_TYPE_STRING_CASE(mpack_type_uint);
+        MPACK_TYPE_STRING_CASE(mpack_type_str);
+        MPACK_TYPE_STRING_CASE(mpack_type_bin);
+        MPACK_TYPE_STRING_CASE(mpack_type_array);
+        MPACK_TYPE_STRING_CASE(mpack_type_map);
+        #if MPACK_EXTENSIONS
+        MPACK_TYPE_STRING_CASE(mpack_type_ext);
+        #endif
+        #undef MPACK_TYPE_STRING_CASE
+    }
+    mpack_assert(0, "unrecognized type %i", (int)type);
+    return "(unknown mpack_type_t)";
+    #else
+    MPACK_UNUSED(type);
+    return "";
+    #endif
+}
+
+int mpack_tag_cmp(mpack_tag_t left, mpack_tag_t right) {
+
+    // positive numbers may be stored as int; convert to uint
+    if (left.type == mpack_type_int && left.v.i >= 0) {
+        left.type = mpack_type_uint;
+        left.v.u = (uint64_t)left.v.i;
+    }
+    if (right.type == mpack_type_int && right.v.i >= 0) {
+        right.type = mpack_type_uint;
+        right.v.u = (uint64_t)right.v.i;
+    }
+
+    if (left.type != right.type)
+        return ((int)left.type < (int)right.type) ? -1 : 1;
+
+    switch (left.type) {
+        case mpack_type_missing: // fallthrough
+        case mpack_type_nil:
+            return 0;
+
+        case mpack_type_bool:
+            return (int)left.v.b - (int)right.v.b;
+
+        case mpack_type_int:
+            if (left.v.i == right.v.i)
+                return 0;
+            return (left.v.i < right.v.i) ? -1 : 1;
+
+        case mpack_type_uint:
+            if (left.v.u == right.v.u)
+                return 0;
+            return (left.v.u < right.v.u) ? -1 : 1;
+
+        case mpack_type_array:
+        case mpack_type_map:
+            if (left.v.n == right.v.n)
+                return 0;
+            return (left.v.n < right.v.n) ? -1 : 1;
+
+        case mpack_type_str:
+        case mpack_type_bin:
+            if (left.v.l == right.v.l)
+                return 0;
+            return (left.v.l < right.v.l) ? -1 : 1;
+
+        #if MPACK_EXTENSIONS
+        case mpack_type_ext:
+            if (left.exttype == right.exttype) {
+                if (left.v.l == right.v.l)
+                    return 0;
+                return (left.v.l < right.v.l) ? -1 : 1;
+            }
+            return (int)left.exttype - (int)right.exttype;
+        #endif
+
+        // floats should not normally be compared for equality. we compare
+        // with memcmp() to silence compiler warnings, but this will return
+        // equal if both are NaNs with the same representation (though we may
+        // want this, for instance if you are for some bizarre reason using
+        // floats as map keys.) i'm not sure what the right thing to
+        // do is here. check for NaN first? always return false if the type
+        // is float? use operator== and pragmas to silence compiler warning?
+        // please send me your suggestions.
+        // note also that we don't convert floats to doubles, so when this is
+        // used for ordering purposes, all floats are ordered before all
+        // doubles.
+        case mpack_type_float:
+            return mpack_memcmp(&left.v.f, &right.v.f, sizeof(left.v.f));
+        case mpack_type_double:
+            return mpack_memcmp(&left.v.d, &right.v.d, sizeof(left.v.d));
+    }
+
+    mpack_assert(0, "unrecognized type %i", (int)left.type);
+    return false;
+}
+
+#if MPACK_DEBUG && MPACK_STDIO
+static char mpack_hex_char(uint8_t hex_value) {
+    // Older compilers (e.g. GCC 4.4.7) promote the result of this ternary to
+    // int and warn under -Wconversion, so we have to cast it back to char.
+    return (char)((hex_value < 10) ? (char)('0' + hex_value) : (char)('a' + (hex_value - 10)));
+}
+
+static void mpack_tag_debug_complete_bin_ext(mpack_tag_t tag, size_t string_length, char* buffer, size_t buffer_size,
+        const char* prefix, size_t prefix_size)
+{
+    // If at any point in this function we run out of space in the buffer, we
+    // bail out. The outer tag print wrapper will make sure we have a
+    // null-terminator.
+
+    if (string_length == 0 || string_length >= buffer_size)
+        return;
+    buffer += string_length;
+    buffer_size -= string_length;
+
+    size_t total = mpack_tag_bytes(&tag);
+    if (total == 0) {
+        strncpy(buffer, ">", buffer_size);
+        return;
+    }
+
+    strncpy(buffer, ": ", buffer_size);
+    if (buffer_size < 2)
+        return;
+    buffer += 2;
+    buffer_size -= 2;
+
+    size_t hex_bytes = 0;
+    size_t i;
+    for (i = 0; i < MPACK_PRINT_BYTE_COUNT && i < prefix_size && buffer_size > 2; ++i) {
+        uint8_t byte = (uint8_t)prefix[i];
+        buffer[0] = mpack_hex_char((uint8_t)(byte >> 4));
+        buffer[1] = mpack_hex_char((uint8_t)(byte & 0xfu));
+        buffer += 2;
+        buffer_size -= 2;
+        ++hex_bytes;
+    }
+
+    if (buffer_size != 0)
+        mpack_snprintf(buffer, buffer_size, "%s>", (total > hex_bytes) ? "..." : "");
+}
+
+static void mpack_tag_debug_pseudo_json_bin(mpack_tag_t tag, char* buffer, size_t buffer_size,
+        const char* prefix, size_t prefix_size)
+{
+    mpack_assert(mpack_tag_type(&tag) == mpack_type_bin);
+    size_t length = (size_t)mpack_snprintf(buffer, buffer_size, "<binary data of length %" PRIu32 "", tag.v.l);
+    mpack_tag_debug_complete_bin_ext(tag, length, buffer, buffer_size, prefix, prefix_size);
+}
+
+#if MPACK_EXTENSIONS
+static void mpack_tag_debug_pseudo_json_ext(mpack_tag_t tag, char* buffer, size_t buffer_size,
+        const char* prefix, size_t prefix_size)
+{
+    mpack_assert(mpack_tag_type(&tag) == mpack_type_ext);
+    size_t length = (size_t)mpack_snprintf(buffer, buffer_size, "<ext data of type %i and length %" PRIu32 "",
+            mpack_tag_ext_exttype(&tag), mpack_tag_ext_length(&tag));
+    mpack_tag_debug_complete_bin_ext(tag, length, buffer, buffer_size, prefix, prefix_size);
+}
+#endif
+
+static void mpack_tag_debug_pseudo_json_impl(mpack_tag_t tag, char* buffer, size_t buffer_size,
+        const char* prefix, size_t prefix_size)
+{
+    switch (tag.type) {
+        case mpack_type_missing:
+            mpack_snprintf(buffer, buffer_size, "<missing!>");
+            return;
+        case mpack_type_nil:
+            mpack_snprintf(buffer, buffer_size, "null");
+            return;
+        case mpack_type_bool:
+            mpack_snprintf(buffer, buffer_size, tag.v.b ? "true" : "false");
+            return;
+        case mpack_type_int:
+            mpack_snprintf(buffer, buffer_size, "%" PRIi64, tag.v.i);
+            return;
+        case mpack_type_uint:
+            mpack_snprintf(buffer, buffer_size, "%" PRIu64, tag.v.u);
+            return;
+        case mpack_type_float:
+            #if MPACK_FLOAT
+            mpack_snprintf(buffer, buffer_size, "%f", tag.v.f);
+            #else
+            mpack_snprintf(buffer, buffer_size, "<float>");
+            #endif
+            return;
+        case mpack_type_double:
+            #if MPACK_DOUBLE
+            mpack_snprintf(buffer, buffer_size, "%f", tag.v.d);
+            #else
+            mpack_snprintf(buffer, buffer_size, "<double>");
+            #endif
+            return;
+
+        case mpack_type_str:
+            mpack_snprintf(buffer, buffer_size, "<string of %" PRIu32 " bytes>", tag.v.l);
+            return;
+        case mpack_type_bin:
+            mpack_tag_debug_pseudo_json_bin(tag, buffer, buffer_size, prefix, prefix_size);
+            return;
+        #if MPACK_EXTENSIONS
+        case mpack_type_ext:
+            mpack_tag_debug_pseudo_json_ext(tag, buffer, buffer_size, prefix, prefix_size);
+            return;
+        #endif
+
+        case mpack_type_array:
+            mpack_snprintf(buffer, buffer_size, "<array of %" PRIu32 " elements>", tag.v.n);
+            return;
+        case mpack_type_map:
+            mpack_snprintf(buffer, buffer_size, "<map of %" PRIu32 " key-value pairs>", tag.v.n);
+            return;
+    }
+
+    mpack_snprintf(buffer, buffer_size, "<unknown!>");
+}
+
+void mpack_tag_debug_pseudo_json(mpack_tag_t tag, char* buffer, size_t buffer_size,
+        const char* prefix, size_t prefix_size)
+{
+    mpack_assert(buffer_size > 0, "buffer size cannot be zero!");
+    buffer[0] = 0;
+
+    mpack_tag_debug_pseudo_json_impl(tag, buffer, buffer_size, prefix, prefix_size);
+
+    // We always null-terminate the buffer manually just in case the snprintf()
+    // function doesn't null-terminate when the string doesn't fit.
+    buffer[buffer_size - 1] = 0;
+}
+
+static void mpack_tag_debug_describe_impl(mpack_tag_t tag, char* buffer, size_t buffer_size) {
+    switch (tag.type) {
+        case mpack_type_missing:
+            mpack_snprintf(buffer, buffer_size, "missing");
+            return;
+        case mpack_type_nil:
+            mpack_snprintf(buffer, buffer_size, "nil");
+            return;
+        case mpack_type_bool:
+            mpack_snprintf(buffer, buffer_size, tag.v.b ? "true" : "false");
+            return;
+        case mpack_type_int:
+            mpack_snprintf(buffer, buffer_size, "int %" PRIi64, tag.v.i);
+            return;
+        case mpack_type_uint:
+            mpack_snprintf(buffer, buffer_size, "uint %" PRIu64, tag.v.u);
+            return;
+        case mpack_type_float:
+            #if MPACK_FLOAT
+            mpack_snprintf(buffer, buffer_size, "float %f", tag.v.f);
+            #else
+            mpack_snprintf(buffer, buffer_size, "float");
+            #endif
+            return;
+        case mpack_type_double:
+            #if MPACK_DOUBLE
+            mpack_snprintf(buffer, buffer_size, "double %f", tag.v.d);
+            #else
+            mpack_snprintf(buffer, buffer_size, "double");
+            #endif
+            return;
+        case mpack_type_str:
+            mpack_snprintf(buffer, buffer_size, "str of %" PRIu32 " bytes", tag.v.l);
+            return;
+        case mpack_type_bin:
+            mpack_snprintf(buffer, buffer_size, "bin of %" PRIu32 " bytes", tag.v.l);
+            return;
+        #if MPACK_EXTENSIONS
+        case mpack_type_ext:
+            mpack_snprintf(buffer, buffer_size, "ext of type %i, %" PRIu32 " bytes",
+                    mpack_tag_ext_exttype(&tag), mpack_tag_ext_length(&tag));
+            return;
+        #endif
+        case mpack_type_array:
+            mpack_snprintf(buffer, buffer_size, "array of %" PRIu32 " elements", tag.v.n);
+            return;
+        case mpack_type_map:
+            mpack_snprintf(buffer, buffer_size, "map of %" PRIu32 " key-value pairs", tag.v.n);
+            return;
+    }
+
+    mpack_snprintf(buffer, buffer_size, "unknown!");
+}
+
+void mpack_tag_debug_describe(mpack_tag_t tag, char* buffer, size_t buffer_size) {
+    mpack_assert(buffer_size > 0, "buffer size cannot be zero!");
+    buffer[0] = 0;
+
+    mpack_tag_debug_describe_impl(tag, buffer, buffer_size);
+
+    // We always null-terminate the buffer manually just in case the snprintf()
+    // function doesn't null-terminate when the string doesn't fit.
+    buffer[buffer_size - 1] = 0;
+}
+#endif
+
+
+
+#if MPACK_READ_TRACKING || MPACK_WRITE_TRACKING
+
+#ifndef MPACK_TRACKING_INITIAL_CAPACITY
+// seems like a reasonable number. we grow by doubling, and it only
+// needs to be as long as the maximum depth of the message.
+#define MPACK_TRACKING_INITIAL_CAPACITY 8
+#endif
+
+mpack_error_t mpack_track_init(mpack_track_t* track) {
+    track->count = 0;
+    track->capacity = MPACK_TRACKING_INITIAL_CAPACITY;
+    track->elements = (mpack_track_element_t*)MPACK_MALLOC(sizeof(mpack_track_element_t) * track->capacity);
+    if (track->elements == NULL)
+        return mpack_error_memory;
+    return mpack_ok;
+}
+
+mpack_error_t mpack_track_grow(mpack_track_t* track) {
+    mpack_assert(track->elements, "null track elements!");
+    mpack_assert(track->count == track->capacity, "incorrect growing?");
+
+    size_t new_capacity = track->capacity * 2;
+
+    mpack_track_element_t* new_elements = (mpack_track_element_t*)mpack_realloc(track->elements,
+            sizeof(mpack_track_element_t) * track->count, sizeof(mpack_track_element_t) * new_capacity);
+    if (new_elements == NULL)
+        return mpack_error_memory;
+
+    track->elements = new_elements;
+    track->capacity = new_capacity;
+    return mpack_ok;
+}
+
+mpack_error_t mpack_track_push(mpack_track_t* track, mpack_type_t type, uint32_t count) {
+    mpack_assert(track->elements, "null track elements!");
+    mpack_log("track pushing %s count %i\n", mpack_type_to_string(type), (int)count);
+
+    // grow if needed
+    if (track->count == track->capacity) {
+        mpack_error_t error = mpack_track_grow(track);
+        if (error != mpack_ok)
+            return error;
+    }
+
+    // insert new track
+    track->elements[track->count].type = type;
+    track->elements[track->count].left = count;
+    track->elements[track->count].builder = false;
+    track->elements[track->count].key_needs_value = false;
+    ++track->count;
+    return mpack_ok;
+}
+
+// TODO dedupe this
+mpack_error_t mpack_track_push_builder(mpack_track_t* track, mpack_type_t type) {
+    mpack_assert(track->elements, "null track elements!");
+    mpack_log("track pushing %s builder\n", mpack_type_to_string(type));
+
+    // grow if needed
+    if (track->count == track->capacity) {
+        mpack_error_t error = mpack_track_grow(track);
+        if (error != mpack_ok)
+            return error;
+    }
+
+    // insert new track
+    track->elements[track->count].type = type;
+    track->elements[track->count].left = 0;
+    track->elements[track->count].builder = true;
+    track->elements[track->count].key_needs_value = false;
+    ++track->count;
+    return mpack_ok;
+}
+
+static mpack_error_t mpack_track_pop_impl(mpack_track_t* track, mpack_type_t type, bool builder) {
+    mpack_assert(track->elements, "null track elements!");
+    mpack_log("track popping %s\n", mpack_type_to_string(type));
+
+    if (track->count == 0) {
+        mpack_break("attempting to close a %s but nothing was opened!", mpack_type_to_string(type));
+        return mpack_error_bug;
+    }
+
+    mpack_track_element_t* element = &track->elements[track->count - 1];
+
+    if (element->type != type) {
+        mpack_break("attempting to close a %s but the open element is a %s!",
+                mpack_type_to_string(type), mpack_type_to_string(element->type));
+        return mpack_error_bug;
+    }
+
+    if (element->key_needs_value) {
+        mpack_assert(type == mpack_type_map, "key_needs_value can only be true for maps!");
+        mpack_break("attempting to close a %s but an odd number of elements were written",
+                mpack_type_to_string(type));
+        return mpack_error_bug;
+    }
+
+    if (element->left != 0) {
+        mpack_break("attempting to close a %s but there are %i %s left",
+                mpack_type_to_string(type), element->left,
+                (type == mpack_type_map || type == mpack_type_array) ? "elements" : "bytes");
+        return mpack_error_bug;
+    }
+
+    if (element->builder != builder) {
+        mpack_break("attempting to pop a %sbuilder but the open element is %sa builder",
+                builder ? "" : "non-",
+                element->builder ? "" : "not ");
+        return mpack_error_bug;
+    }
+
+    --track->count;
+    return mpack_ok;
+}
+
+mpack_error_t mpack_track_pop(mpack_track_t* track, mpack_type_t type) {
+    return mpack_track_pop_impl(track, type, false);
+}
+
+mpack_error_t mpack_track_pop_builder(mpack_track_t* track, mpack_type_t type) {
+    return mpack_track_pop_impl(track, type, true);
+}
+
+mpack_error_t mpack_track_peek_element(mpack_track_t* track, bool read) {
+    MPACK_UNUSED(read);
+    mpack_assert(track->elements, "null track elements!");
+
+    // if there are no open elements, that's fine, we can read/write elements at will
+    if (track->count == 0)
+        return mpack_ok;
+
+    mpack_track_element_t* element = &track->elements[track->count - 1];
+
+    if (element->type != mpack_type_map && element->type != mpack_type_array) {
+        mpack_break("elements cannot be %s within an %s", read ? "read" : "written",
+                mpack_type_to_string(element->type));
+        return mpack_error_bug;
+    }
+
+    if (!element->builder && element->left == 0 && !element->key_needs_value) {
+        mpack_break("too many elements %s for %s", read ? "read" : "written",
+                mpack_type_to_string(element->type));
+        return mpack_error_bug;
+    }
+
+    return mpack_ok;
+}
+
+mpack_error_t mpack_track_element(mpack_track_t* track, bool read) {
+    mpack_error_t error = mpack_track_peek_element(track, read);
+    if (track->count == 0 || error != mpack_ok)
+        return error;
+
+    mpack_track_element_t* element = &track->elements[track->count - 1];
+
+    if (element->type == mpack_type_map) {
+        if (!element->key_needs_value) {
+            element->key_needs_value = true;
+            return mpack_ok; // don't decrement
+        }
+        element->key_needs_value = false;
+    }
+
+    if (!element->builder)
+        --element->left;
+    return mpack_ok;
+}
+
+mpack_error_t mpack_track_bytes(mpack_track_t* track, bool read, size_t count) {
+    MPACK_UNUSED(read);
+    mpack_assert(track->elements, "null track elements!");
+
+    if (count > MPACK_UINT32_MAX) {
+        mpack_break("%s more bytes than could possibly fit in a str/bin/ext!",
+                read ? "reading" : "writing");
+        return mpack_error_bug;
+    }
+
+    if (track->count == 0) {
+        mpack_break("bytes cannot be %s with no open bin, str or ext", read ? "read" : "written");
+        return mpack_error_bug;
+    }
+
+    mpack_track_element_t* element = &track->elements[track->count - 1];
+
+    if (element->type == mpack_type_map || element->type == mpack_type_array) {
+        mpack_break("bytes cannot be %s within an %s", read ? "read" : "written",
+                mpack_type_to_string(element->type));
+        return mpack_error_bug;
+    }
+
+    if (element->left < count) {
+        mpack_break("too many bytes %s for %s", read ? "read" : "written",
+                mpack_type_to_string(element->type));
+        return mpack_error_bug;
+    }
+
+    element->left -= (uint32_t)count;
+    return mpack_ok;
+}
+
+mpack_error_t mpack_track_str_bytes_all(mpack_track_t* track, bool read, size_t count) {
+    mpack_error_t error = mpack_track_bytes(track, read, count);
+    if (error != mpack_ok)
+        return error;
+
+    mpack_track_element_t* element = &track->elements[track->count - 1];
+
+    if (element->type != mpack_type_str) {
+        mpack_break("the open type must be a string, not a %s", mpack_type_to_string(element->type));
+        return mpack_error_bug;
+    }
+
+    if (element->left != 0) {
+        mpack_break("not all bytes were read; the wrong byte count was requested for a string read.");
+        return mpack_error_bug;
+    }
+
+    return mpack_ok;
+}
+
+mpack_error_t mpack_track_check_empty(mpack_track_t* track) {
+    if (track->count != 0) {
+        mpack_break("unclosed %s", mpack_type_to_string(track->elements[0].type));
+        return mpack_error_bug;
+    }
+    return mpack_ok;
+}
+
+mpack_error_t mpack_track_destroy(mpack_track_t* track, bool cancel) {
+    mpack_error_t error = cancel ? mpack_ok : mpack_track_check_empty(track);
+    if (track->elements) {
+        MPACK_FREE(track->elements);
+        track->elements = NULL;
+    }
+    return error;
+}
+#endif
+
+
+
+static bool mpack_utf8_check_impl(const uint8_t* str, size_t count, bool allow_null) {
+    while (count > 0) {
+        uint8_t lead = str[0];
+
+        // NUL
+        if (!allow_null && lead == '\0') // we don't allow NUL bytes in MPack C-strings
+            return false;
+
+        // ASCII
+        if (lead <= 0x7F) {
+            ++str;
+            --count;
+
+        // 2-byte sequence
+        } else if ((lead & 0xE0) == 0xC0) {
+            if (count < 2) // truncated sequence
+                return false;
+
+            uint8_t cont = str[1];
+            if ((cont & 0xC0) != 0x80) // not a continuation byte
+                return false;
+
+            str += 2;
+            count -= 2;
+
+            uint32_t z = ((uint32_t)(lead & ~0xE0) << 6) |
+                          (uint32_t)(cont & ~0xC0);
+
+            if (z < 0x80) // overlong sequence
+                return false;
+
+        // 3-byte sequence
+        } else if ((lead & 0xF0) == 0xE0) {
+            if (count < 3) // truncated sequence
+                return false;
+
+            uint8_t cont1 = str[1];
+            if ((cont1 & 0xC0) != 0x80) // not a continuation byte
+                return false;
+            uint8_t cont2 = str[2];
+            if ((cont2 & 0xC0) != 0x80) // not a continuation byte
+                return false;
+
+            str += 3;
+            count -= 3;
+
+            uint32_t z = ((uint32_t)(lead  & ~0xF0) << 12) |
+                         ((uint32_t)(cont1 & ~0xC0) <<  6) |
+                          (uint32_t)(cont2 & ~0xC0);
+
+            if (z < 0x800) // overlong sequence
+                return false;
+            if (z >= 0xD800 && z <= 0xDFFF) // surrogate
+                return false;
+
+        // 4-byte sequence
+        } else if ((lead & 0xF8) == 0xF0) {
+            if (count < 4) // truncated sequence
+                return false;
+
+            uint8_t cont1 = str[1];
+            if ((cont1 & 0xC0) != 0x80) // not a continuation byte
+                return false;
+            uint8_t cont2 = str[2];
+            if ((cont2 & 0xC0) != 0x80) // not a continuation byte
+                return false;
+            uint8_t cont3 = str[3];
+            if ((cont3 & 0xC0) != 0x80) // not a continuation byte
+                return false;
+
+            str += 4;
+            count -= 4;
+
+            uint32_t z = ((uint32_t)(lead  & ~0xF8) << 18) |
+                         ((uint32_t)(cont1 & ~0xC0) << 12) |
+                         ((uint32_t)(cont2 & ~0xC0) <<  6) |
+                          (uint32_t)(cont3 & ~0xC0);
+
+            if (z < 0x10000) // overlong sequence
+                return false;
+            if (z > 0x10FFFF) // codepoint limit
+                return false;
+
+        } else {
+            return false; // continuation byte without a lead, or lead for a 5-byte sequence or longer
+        }
+    }
+    return true;
+}
+
+bool mpack_utf8_check(const char* str, size_t bytes) {
+    return mpack_utf8_check_impl((const uint8_t*)str, bytes, true);
+}
+
+bool mpack_utf8_check_no_null(const char* str, size_t bytes) {
+    return mpack_utf8_check_impl((const uint8_t*)str, bytes, false);
+}
+
+bool mpack_str_check_no_null(const char* str, size_t bytes) {
+    size_t i;
+    for (i = 0; i < bytes; ++i)
+        if (str[i] == '\0')
+            return false;
+    return true;
+}
+
+#if MPACK_DEBUG && MPACK_STDIO
+void mpack_print_append(mpack_print_t* print, const char* data, size_t count) {
+
+    // copy whatever fits into the buffer
+    size_t copy = print->size - print->count;
+    if (copy > count)
+        copy = count;
+    mpack_memcpy(print->buffer + print->count, data, copy);
+    print->count += copy;
+    data += copy;
+    count -= copy;
+
+    // if we don't need to flush or can't flush there's nothing else to do
+    if (count == 0 || print->callback == NULL)
+        return;
+
+    // flush the buffer
+    print->callback(print->context, print->buffer, print->count);
+
+    if (count > print->size / 2) {
+        // flush the rest of the data
+        print->count = 0;
+        print->callback(print->context, data, count);
+    } else {
+        // copy the rest of the data into the buffer
+        mpack_memcpy(print->buffer, data, count);
+        print->count = count;
+    }
+
+}
+
+void mpack_print_flush(mpack_print_t* print) {
+    if (print->count > 0 && print->callback != NULL) {
+        print->callback(print->context, print->buffer, print->count);
+        print->count = 0;
+    }
+}
+
+void mpack_print_file_callback(void* context, const char* data, size_t count) {
+    FILE* file = (FILE*)context;
+    fwrite(data, 1, count, file);
+}
+#endif
+
+MPACK_SILENCE_WARNINGS_END
+
+/* mpack/mpack-writer.c.c */
+
+#define MPACK_INTERNAL 1
+
+/* #include "mpack-writer.h" */
+
+MPACK_SILENCE_WARNINGS_BEGIN
+
+#if MPACK_WRITER
+
+#if MPACK_BUILDER
+static void mpack_builder_flush(mpack_writer_t* writer);
+#endif
+
+#if MPACK_WRITE_TRACKING
+static void mpack_writer_flag_if_error(mpack_writer_t* writer, mpack_error_t error) {
+    if (error != mpack_ok)
+        mpack_writer_flag_error(writer, error);
+}
+
+void mpack_writer_track_push(mpack_writer_t* writer, mpack_type_t type, uint32_t count) {
+    if (writer->error == mpack_ok)
+        mpack_writer_flag_if_error(writer, mpack_track_push(&writer->track, type, count));
+}
+
+void mpack_writer_track_push_builder(mpack_writer_t* writer, mpack_type_t type) {
+    if (writer->error == mpack_ok)
+        mpack_writer_flag_if_error(writer, mpack_track_push_builder(&writer->track, type));
+}
+
+void mpack_writer_track_pop(mpack_writer_t* writer, mpack_type_t type) {
+    if (writer->error == mpack_ok)
+        mpack_writer_flag_if_error(writer, mpack_track_pop(&writer->track, type));
+}
+
+void mpack_writer_track_pop_builder(mpack_writer_t* writer, mpack_type_t type) {
+    if (writer->error == mpack_ok)
+        mpack_writer_flag_if_error(writer, mpack_track_pop_builder(&writer->track, type));
+}
+
+void mpack_writer_track_bytes(mpack_writer_t* writer, size_t count) {
+    if (writer->error == mpack_ok)
+        mpack_writer_flag_if_error(writer, mpack_track_bytes(&writer->track, false, count));
+}
+#endif
+
+// This should probably be renamed. It's not solely used for tracking.
+static inline void mpack_writer_track_element(mpack_writer_t* writer) {
+    (void)writer;
+
+    #if MPACK_WRITE_TRACKING
+    if (writer->error == mpack_ok)
+        mpack_writer_flag_if_error(writer, mpack_track_element(&writer->track, false));
+    #endif
+
+    #if MPACK_BUILDER
+    if (writer->builder.current_build != NULL) {
+        mpack_build_t* build = writer->builder.current_build;
+        // We only track this write if it's not nested within another non-build
+        // map or array.
+        if (build->nested_compound_elements == 0) {
+            if (build->type != mpack_type_map) {
+                ++build->count;
+                mpack_log("adding element to build %p, now %" PRIu32 " elements\n", (void*)build, build->count);
+            } else if (build->key_needs_value) {
+                build->key_needs_value = false;
+                ++build->count;
+            } else {
+                build->key_needs_value = true;
+            }
+        }
+    }
+    #endif
+}
+
+static void mpack_writer_clear(mpack_writer_t* writer) {
+    #if MPACK_COMPATIBILITY
+    writer->version = mpack_version_current;
+    #endif
+    writer->flush = NULL;
+    writer->error_fn = NULL;
+    writer->teardown = NULL;
+    writer->context = NULL;
+
+    writer->buffer = NULL;
+    writer->position = NULL;
+    writer->end = NULL;
+    writer->error = mpack_ok;
+
+    #if MPACK_WRITE_TRACKING
+    mpack_memset(&writer->track, 0, sizeof(writer->track));
+    #endif
+
+    #if MPACK_BUILDER
+    writer->builder.current_build = NULL;
+    writer->builder.latest_build = NULL;
+    writer->builder.current_page = NULL;
+    writer->builder.pages = NULL;
+    writer->builder.stash_buffer = NULL;
+    writer->builder.stash_position = NULL;
+    writer->builder.stash_end = NULL;
+    #endif
+}
+
+void mpack_writer_init(mpack_writer_t* writer, char* buffer, size_t size) {
+    mpack_assert(buffer != NULL, "cannot initialize writer with empty buffer");
+    mpack_writer_clear(writer);
+    writer->buffer = buffer;
+    writer->position = buffer;
+    writer->end = writer->buffer + size;
+
+    #if MPACK_WRITE_TRACKING
+    mpack_writer_flag_if_error(writer, mpack_track_init(&writer->track));
+    #endif
+
+    mpack_log("===========================\n");
+    mpack_log("initializing writer with buffer size %i\n", (int)size);
+}
+
+void mpack_writer_init_error(mpack_writer_t* writer, mpack_error_t error) {
+    mpack_writer_clear(writer);
+    writer->error = error;
+
+    mpack_log("===========================\n");
+    mpack_log("initializing writer in error state %i\n", (int)error);
+}
+
+void mpack_writer_set_flush(mpack_writer_t* writer, mpack_writer_flush_t flush) {
+    MPACK_STATIC_ASSERT(MPACK_WRITER_MINIMUM_BUFFER_SIZE >= MPACK_MAXIMUM_TAG_SIZE,
+            "minimum buffer size must fit any tag!");
+    MPACK_STATIC_ASSERT(31 + MPACK_TAG_SIZE_FIXSTR >= MPACK_WRITER_MINIMUM_BUFFER_SIZE,
+            "minimum buffer size must fit the largest possible fixstr!");
+
+    if (mpack_writer_buffer_size(writer) < MPACK_WRITER_MINIMUM_BUFFER_SIZE) {
+        mpack_break("buffer size is %i, but minimum buffer size for flush is %i",
+                (int)mpack_writer_buffer_size(writer), MPACK_WRITER_MINIMUM_BUFFER_SIZE);
+        mpack_writer_flag_error(writer, mpack_error_bug);
+        return;
+    }
+
+    writer->flush = flush;
+}
+
+#ifdef MPACK_MALLOC
+typedef struct mpack_growable_writer_t {
+    char** target_data;
+    size_t* target_size;
+} mpack_growable_writer_t;
+
+static char* mpack_writer_get_reserved(mpack_writer_t* writer) {
+    // This is in a separate function in order to avoid false strict aliasing
+    // warnings. We aren't actually violating strict aliasing (the reserved
+    // space is only ever dereferenced as an mpack_growable_writer_t.)
+    return (char*)writer->reserved;
+}
+
+static void mpack_growable_writer_flush(mpack_writer_t* writer, const char* data, size_t count) {
+
+    // This is an intrusive flush function which modifies the writer's buffer
+    // in response to a flush instead of emptying it in order to add more
+    // capacity for data. This removes the need to copy data from a fixed buffer
+    // into a growable one, improving performance.
+    //
+    // There are three ways flush can be called:
+    //   - flushing the buffer during writing (used is zero, count is all data, data is buffer)
+    //   - flushing extra data during writing (used is all flushed data, count is extra data, data is not buffer)
+    //   - flushing during teardown (used and count are both all flushed data, data is buffer)
+    //
+    // In the first two cases, we grow the buffer by at least double, enough
+    // to ensure that new data will fit. We ignore the teardown flush.
+
+    if (data == writer->buffer) {
+
+        // teardown, do nothing
+        if (mpack_writer_buffer_used(writer) == count)
+            return;
+
+        // otherwise leave the data in the buffer and just grow
+        writer->position = writer->buffer + count;
+        count = 0;
+    }
+
+    size_t used = mpack_writer_buffer_used(writer);
+    size_t size = mpack_writer_buffer_size(writer);
+
+    mpack_log("flush size %i used %i data %p buffer %p\n",
+            (int)count, (int)used, data, writer->buffer);
+
+    mpack_assert(data == writer->buffer || used + count > size,
+            "extra flush for %i but there is %i space left in the buffer! (%i/%i)",
+            (int)count, (int)mpack_writer_buffer_left(writer), (int)used, (int)size);
+
+    // grow to fit the data
+    // TODO: this really needs to correctly test for overflow
+    size_t new_size = size * 2;
+    while (new_size < used + count)
+        new_size *= 2;
+
+    mpack_log("flush growing buffer size from %i to %i\n", (int)size, (int)new_size);
+
+    // grow the buffer
+    char* new_buffer = (char*)mpack_realloc(writer->buffer, used, new_size);
+    if (new_buffer == NULL) {
+        mpack_writer_flag_error(writer, mpack_error_memory);
+        return;
+    }
+    writer->position = new_buffer + used;
+    writer->buffer = new_buffer;
+    writer->end = writer->buffer + new_size;
+
+    // append the extra data
+    if (count > 0) {
+        mpack_memcpy(writer->position, data, count);
+        writer->position += count;
+    }
+
+    mpack_log("new buffer %p, used %i\n", new_buffer, (int)mpack_writer_buffer_used(writer));
+}
+
+static void mpack_growable_writer_teardown(mpack_writer_t* writer) {
+    mpack_growable_writer_t* growable_writer = (mpack_growable_writer_t*)mpack_writer_get_reserved(writer);
+
+    if (mpack_writer_error(writer) == mpack_ok) {
+
+        // shrink the buffer to an appropriate size if the data is
+        // much smaller than the buffer
+        if (mpack_writer_buffer_used(writer) < mpack_writer_buffer_size(writer) / 2) {
+            size_t used = mpack_writer_buffer_used(writer);
+
+            // We always return a non-null pointer that must be freed, even if
+            // nothing was written. malloc() and realloc() do not necessarily
+            // do this so we enforce it ourselves.
+            size_t size = (used != 0) ? used : 1;
+
+            char* buffer = (char*)mpack_realloc(writer->buffer, used, size);
+            if (!buffer) {
+                MPACK_FREE(writer->buffer);
+                mpack_writer_flag_error(writer, mpack_error_memory);
+                return;
+            }
+            writer->buffer = buffer;
+            writer->end = (writer->position = writer->buffer + used);
+        }
+
+        *growable_writer->target_data = writer->buffer;
+        *growable_writer->target_size = mpack_writer_buffer_used(writer);
+        writer->buffer = NULL;
+
+    } else if (writer->buffer) {
+        MPACK_FREE(writer->buffer);
+        writer->buffer = NULL;
+    }
+
+    writer->context = NULL;
+}
+
+void mpack_writer_init_growable(mpack_writer_t* writer, char** target_data, size_t* target_size) {
+    mpack_assert(target_data != NULL, "cannot initialize writer without a destination for the data");
+    mpack_assert(target_size != NULL, "cannot initialize writer without a destination for the size");
+
+    *target_data = NULL;
+    *target_size = 0;
+
+    MPACK_STATIC_ASSERT(sizeof(mpack_growable_writer_t) <= sizeof(writer->reserved),
+            "not enough reserved space for growable writer!");
+    mpack_growable_writer_t* growable_writer = (mpack_growable_writer_t*)mpack_writer_get_reserved(writer);
+
+    growable_writer->target_data = target_data;
+    growable_writer->target_size = target_size;
+
+    size_t capacity = MPACK_BUFFER_SIZE;
+    char* buffer = (char*)MPACK_MALLOC(capacity);
+    if (buffer == NULL) {
+        mpack_writer_init_error(writer, mpack_error_memory);
+        return;
+    }
+
+    mpack_writer_init(writer, buffer, capacity);
+    mpack_writer_set_flush(writer, mpack_growable_writer_flush);
+    mpack_writer_set_teardown(writer, mpack_growable_writer_teardown);
+}
+#endif
+
+#if MPACK_STDIO
+static void mpack_file_writer_flush(mpack_writer_t* writer, const char* buffer, size_t count) {
+    FILE* file = (FILE*)writer->context;
+    size_t written = fwrite((const void*)buffer, 1, count, file);
+    if (written != count)
+        mpack_writer_flag_error(writer, mpack_error_io);
+}
+
+static void mpack_file_writer_teardown(mpack_writer_t* writer) {
+    MPACK_FREE(writer->buffer);
+    writer->buffer = NULL;
+    writer->context = NULL;
+}
+
+static void mpack_file_writer_teardown_close(mpack_writer_t* writer) {
+    FILE* file = (FILE*)writer->context;
+
+    if (file) {
+        int ret = fclose(file);
+        if (ret != 0)
+            mpack_writer_flag_error(writer, mpack_error_io);
+    }
+
+    mpack_file_writer_teardown(writer);
+}
+
+void mpack_writer_init_stdfile(mpack_writer_t* writer, FILE* file, bool close_when_done) {
+    mpack_assert(file != NULL, "file is NULL");
+
+    size_t capacity = MPACK_BUFFER_SIZE;
+    char* buffer = (char*)MPACK_MALLOC(capacity);
+    if (buffer == NULL) {
+        mpack_writer_init_error(writer, mpack_error_memory);
+        if (close_when_done) {
+            fclose(file);
+        }
+        return;
+    }
+
+    mpack_writer_init(writer, buffer, capacity);
+    mpack_writer_set_context(writer, file);
+    mpack_writer_set_flush(writer, mpack_file_writer_flush);
+    mpack_writer_set_teardown(writer, close_when_done ?
+            mpack_file_writer_teardown_close :
+            mpack_file_writer_teardown);
+}
+
+void mpack_writer_init_filename(mpack_writer_t* writer, const char* filename) {
+    mpack_assert(filename != NULL, "filename is NULL");
+
+    FILE* file = fopen(filename, "wb");
+    if (file == NULL) {
+        mpack_writer_init_error(writer, mpack_error_io);
+        return;
+    }
+
+    mpack_writer_init_stdfile(writer, file, true);
+}
+#endif
+
+void mpack_writer_flag_error(mpack_writer_t* writer, mpack_error_t error) {
+    mpack_log("writer %p setting error %i: %s\n", (void*)writer, (int)error, mpack_error_to_string(error));
+
+    if (writer->error == mpack_ok) {
+        writer->error = error;
+        if (writer->error_fn)
+            writer->error_fn(writer, writer->error);
+    }
+}
+
+MPACK_STATIC_INLINE void mpack_writer_flush_unchecked(mpack_writer_t* writer) {
+    // This is a bit ugly; we reset used before calling flush so that
+    // a flush function can distinguish between flushing the buffer
+    // versus flushing external data. see mpack_growable_writer_flush()
+    size_t used = mpack_writer_buffer_used(writer);
+    writer->position = writer->buffer;
+    writer->flush(writer, writer->buffer, used);
+}
+
+void mpack_writer_flush_message(mpack_writer_t* writer) {
+    if (writer->error != mpack_ok)
+        return;
+
+    #if MPACK_WRITE_TRACKING
+    // You cannot flush while there are elements open.
+    mpack_writer_flag_if_error(writer, mpack_track_check_empty(&writer->track));
+    if (writer->error != mpack_ok)
+        return;
+    #endif
+
+    #if MPACK_BUILDER
+    if (writer->builder.current_build != NULL) {
+        mpack_break("cannot call mpack_writer_flush_message() while there are elements open!");
+        mpack_writer_flag_error(writer, mpack_error_bug);
+        return;
+    }
+    #endif
+
+    if (writer->flush == NULL) {
+        mpack_break("cannot call mpack_writer_flush_message() without a flush function!");
+        mpack_writer_flag_error(writer, mpack_error_bug);
+        return;
+    }
+
+    if (mpack_writer_buffer_used(writer) > 0)
+        mpack_writer_flush_unchecked(writer);
+}
+
+// Ensures there are at least count bytes free in the buffer. This
+// will flag an error if the flush function fails to make enough
+// room in the buffer.
+MPACK_NOINLINE static bool mpack_writer_ensure(mpack_writer_t* writer, size_t count) {
+    mpack_assert(count != 0, "cannot ensure zero bytes!");
+    mpack_assert(count <= MPACK_WRITER_MINIMUM_BUFFER_SIZE,
+            "cannot ensure %i bytes, this is more than the minimum buffer size %i!",
+            (int)count, (int)MPACK_WRITER_MINIMUM_BUFFER_SIZE);
+    mpack_assert(count > mpack_writer_buffer_left(writer),
+            "request to ensure %i bytes but there are already %i left in the buffer!",
+            (int)count, (int)mpack_writer_buffer_left(writer));
+
+    mpack_log("ensuring %i bytes, %i left\n", (int)count, (int)mpack_writer_buffer_left(writer));
+
+    if (mpack_writer_error(writer) != mpack_ok)
+        return false;
+
+    #if MPACK_BUILDER
+    // if we have a build in progress, we just ask the builder for a page.
+    // either it will have space for a tag, or it will flag a memory error.
+    if (writer->builder.current_build != NULL) {
+        mpack_builder_flush(writer);
+        return mpack_writer_error(writer) == mpack_ok;
+    }
+    #endif
+
+    if (writer->flush == NULL) {
+        mpack_writer_flag_error(writer, mpack_error_too_big);
+        return false;
+    }
+
+    mpack_writer_flush_unchecked(writer);
+    if (mpack_writer_error(writer) != mpack_ok)
+        return false;
+
+    if (mpack_writer_buffer_left(writer) >= count)
+        return true;
+
+    mpack_writer_flag_error(writer, mpack_error_io);
+    return false;
+}
+
+// Writes encoded bytes to the buffer when we already know the data
+// does not fit in the buffer (i.e. it straddles the edge of the
+// buffer.) If there is a flush function, it is guaranteed to be
+// called; otherwise mpack_error_too_big is raised.
+MPACK_NOINLINE static void mpack_write_native_straddle(mpack_writer_t* writer, const char* p, size_t count) {
+    mpack_assert(count == 0 || p != NULL, "data pointer for %i bytes is NULL", (int)count);
+
+    if (mpack_writer_error(writer) != mpack_ok)
+        return;
+    mpack_log("big write for %i bytes from %p, %i space left in buffer\n",
+            (int)count, p, (int)mpack_writer_buffer_left(writer));
+    mpack_assert(count > mpack_writer_buffer_left(writer),
+            "big write requested for %i bytes, but there is %i available "
+            "space in buffer. should have called mpack_write_native() instead",
+            (int)count, (int)(mpack_writer_buffer_left(writer)));
+
+    #if MPACK_BUILDER
+    // if we have a build in progress, we can't flush. we need to copy all
+    // bytes into as many build buffer pages as it takes.
+    if (writer->builder.current_build != NULL) {
+        while (true) {
+            size_t step = (size_t)(writer->end - writer->position);
+            if (step > count)
+                step = count;
+            mpack_memcpy(writer->position, p, step);
+            writer->position += step;
+            p += step;
+            count -= step;
+
+            if (count == 0)
+                return;
+
+            mpack_builder_flush(writer);
+            if (mpack_writer_error(writer) != mpack_ok)
+                return;
+            mpack_assert(writer->position != writer->end);
+        }
+    }
+    #endif
+
+    // we'll need a flush function
+    if (!writer->flush) {
+        mpack_writer_flag_error(writer, mpack_error_too_big);
+        return;
+    }
+
+    // flush the buffer
+    mpack_writer_flush_unchecked(writer);
+    if (mpack_writer_error(writer) != mpack_ok)
+        return;
+
+    // note that an intrusive flush function (such as mpack_growable_writer_flush())
+    // may have changed size and/or reset used to a non-zero value. we treat both as
+    // though they may have changed, and there may still be data in the buffer.
+
+    // flush the extra data directly if it doesn't fit in the buffer
+    if (count > mpack_writer_buffer_left(writer)) {
+        writer->flush(writer, p, count);
+        if (mpack_writer_error(writer) != mpack_ok)
+            return;
+    } else {
+        mpack_memcpy(writer->position, p, count);
+        writer->position += count;
+    }
+}
+
+// Writes encoded bytes to the buffer, flushing if necessary.
+MPACK_STATIC_INLINE void mpack_write_native(mpack_writer_t* writer, const char* p, size_t count) {
+    mpack_assert(count == 0 || p != NULL, "data pointer for %i bytes is NULL", (int)count);
+
+    if (mpack_writer_buffer_left(writer) < count) {
+        mpack_write_native_straddle(writer, p, count);
+    } else {
+        mpack_memcpy(writer->position, p, count);
+        writer->position += count;
+    }
+}
+
+mpack_error_t mpack_writer_destroy(mpack_writer_t* writer) {
+
+    // clean up tracking, asserting if we're not already in an error state
+    #if MPACK_WRITE_TRACKING
+    mpack_track_destroy(&writer->track, writer->error != mpack_ok);
+    #endif
+
+    #if MPACK_BUILDER
+    mpack_builder_t* builder = &writer->builder;
+    if (builder->current_build != NULL) {
+        // A builder is open!
+
+        // Flag an error, if there's not already an error. You can only skip
+        // closing any open compound types if a write error occurred. If there
+        // wasn't already an error, it's a bug, which will assert in debug.
+        if (mpack_writer_error(writer) == mpack_ok) {
+            mpack_break("writer cannot be destroyed with an incomplete builder unless "
+                    "an error was flagged!");
+            mpack_writer_flag_error(writer, mpack_error_bug);
+        }
+
+        // Free any remaining builder pages
+        mpack_builder_page_t* page = builder->pages;
+        #if MPACK_BUILDER_INTERNAL_STORAGE
+        mpack_assert(page == (mpack_builder_page_t*)builder->internal);
+        page = page->next;
+        #endif
+        while (page != NULL) {
+            mpack_builder_page_t* next = page->next;
+            MPACK_FREE(page);
+            page = next;
+        }
+
+        // Restore the stashed pointers. The teardown function may need to free
+        // them (e.g. mpack_growable_writer_teardown().)
+        writer->buffer = builder->stash_buffer;
+        writer->position = builder->stash_position;
+        writer->end = builder->stash_end;
+
+        // Note: It's not necessary to clean up the current_build or other
+        // pointers at this point because we're guaranteed to be in an error
+        // state already so a user error callback can't longjmp out. This
+        // destroy function will complete no matter what so it doesn't matter
+        // what junk is left in the writer.
+    }
+    #endif
+
+    // flush any outstanding data
+    if (mpack_writer_error(writer) == mpack_ok && mpack_writer_buffer_used(writer) != 0 && writer->flush != NULL) {
+        writer->flush(writer, writer->buffer, mpack_writer_buffer_used(writer));
+        writer->flush = NULL;
+    }
+
+    if (writer->teardown) {
+        writer->teardown(writer);
+        writer->teardown = NULL;
+    }
+
+    return writer->error;
+}
+
+void mpack_write_tag(mpack_writer_t* writer, mpack_tag_t value) {
+    switch (value.type) {
+        case mpack_type_missing:
+            mpack_break("cannot write a missing value!");
+            mpack_writer_flag_error(writer, mpack_error_bug);
+            return;
+
+        case mpack_type_nil:    mpack_write_nil   (writer);            return;
+        case mpack_type_bool:   mpack_write_bool  (writer, value.v.b); return;
+        case mpack_type_int:    mpack_write_int   (writer, value.v.i); return;
+        case mpack_type_uint:   mpack_write_uint  (writer, value.v.u); return;
+
+        case mpack_type_float:
+            #if MPACK_FLOAT
+            mpack_write_float
+            #else
+            mpack_write_raw_float
+            #endif
+                (writer, value.v.f);
+            return;
+        case mpack_type_double:
+            #if MPACK_DOUBLE
+            mpack_write_double
+            #else
+            mpack_write_raw_double
+            #endif
+                (writer, value.v.d);
+            return;
+
+        case mpack_type_str: mpack_start_str(writer, value.v.l); return;
+        case mpack_type_bin: mpack_start_bin(writer, value.v.l); return;
+
+        #if MPACK_EXTENSIONS
+        case mpack_type_ext:
+            mpack_start_ext(writer, mpack_tag_ext_exttype(&value), mpack_tag_ext_length(&value));
+            return;
+        #endif
+
+        case mpack_type_array: mpack_start_array(writer, value.v.n); return;
+        case mpack_type_map:   mpack_start_map(writer, value.v.n);   return;
+    }
+
+    mpack_break("unrecognized type %i", (int)value.type);
+    mpack_writer_flag_error(writer, mpack_error_bug);
+}
+
+MPACK_STATIC_INLINE void mpack_write_byte_element(mpack_writer_t* writer, char value) {
+    mpack_writer_track_element(writer);
+    if (MPACK_LIKELY(mpack_writer_buffer_left(writer) >= 1) || mpack_writer_ensure(writer, 1))
+        *(writer->position++) = value;
+}
+
+void mpack_write_nil(mpack_writer_t* writer) {
+    mpack_write_byte_element(writer, (char)0xc0);
+}
+
+void mpack_write_bool(mpack_writer_t* writer, bool value) {
+    mpack_write_byte_element(writer, (char)(0xc2 | (value ? 1 : 0)));
+}
+
+void mpack_write_true(mpack_writer_t* writer) {
+    mpack_write_byte_element(writer, (char)0xc3);
+}
+
+void mpack_write_false(mpack_writer_t* writer) {
+    mpack_write_byte_element(writer, (char)0xc2);
+}
+
+void mpack_write_object_bytes(mpack_writer_t* writer, const char* data, size_t bytes) {
+    mpack_writer_track_element(writer);
+    mpack_write_native(writer, data, bytes);
+}
+
+/*
+ * Encode functions
+ */
+
+MPACK_STATIC_INLINE void mpack_encode_fixuint(char* p, uint8_t value) {
+    mpack_assert(value <= 127);
+    mpack_store_u8(p, value);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_u8(char* p, uint8_t value) {
+    mpack_assert(value > 127);
+    mpack_store_u8(p, 0xcc);
+    mpack_store_u8(p + 1, value);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_u16(char* p, uint16_t value) {
+    mpack_assert(value > MPACK_UINT8_MAX);
+    mpack_store_u8(p, 0xcd);
+    mpack_store_u16(p + 1, value);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_u32(char* p, uint32_t value) {
+    mpack_assert(value > MPACK_UINT16_MAX);
+    mpack_store_u8(p, 0xce);
+    mpack_store_u32(p + 1, value);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_u64(char* p, uint64_t value) {
+    mpack_assert(value > MPACK_UINT32_MAX);
+    mpack_store_u8(p, 0xcf);
+    mpack_store_u64(p + 1, value);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_fixint(char* p, int8_t value) {
+    // this can encode positive or negative fixints
+    mpack_assert(value >= -32);
+    mpack_store_i8(p, value);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_i8(char* p, int8_t value) {
+    mpack_assert(value < -32);
+    mpack_store_u8(p, 0xd0);
+    mpack_store_i8(p + 1, value);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_i16(char* p, int16_t value) {
+    mpack_assert(value < MPACK_INT8_MIN);
+    mpack_store_u8(p, 0xd1);
+    mpack_store_i16(p + 1, value);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_i32(char* p, int32_t value) {
+    mpack_assert(value < MPACK_INT16_MIN);
+    mpack_store_u8(p, 0xd2);
+    mpack_store_i32(p + 1, value);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_i64(char* p, int64_t value) {
+    mpack_assert(value < MPACK_INT32_MIN);
+    mpack_store_u8(p, 0xd3);
+    mpack_store_i64(p + 1, value);
+}
+
+#if MPACK_FLOAT
+MPACK_STATIC_INLINE void mpack_encode_float(char* p, float value) {
+    mpack_store_u8(p, 0xca);
+    mpack_store_float(p + 1, value);
+}
+#else
+MPACK_STATIC_INLINE void mpack_encode_raw_float(char* p, uint32_t value) {
+    mpack_store_u8(p, 0xca);
+    mpack_store_u32(p + 1, value);
+}
+#endif
+
+#if MPACK_DOUBLE
+MPACK_STATIC_INLINE void mpack_encode_double(char* p, double value) {
+    mpack_store_u8(p, 0xcb);
+    mpack_store_double(p + 1, value);
+}
+#else
+MPACK_STATIC_INLINE void mpack_encode_raw_double(char* p, uint64_t value) {
+    mpack_store_u8(p, 0xcb);
+    mpack_store_u64(p + 1, value);
+}
+#endif
+
+MPACK_STATIC_INLINE void mpack_encode_fixarray(char* p, uint8_t count) {
+    mpack_assert(count <= 15);
+    mpack_store_u8(p, (uint8_t)(0x90 | count));
+}
+
+MPACK_STATIC_INLINE void mpack_encode_array16(char* p, uint16_t count) {
+    mpack_assert(count > 15);
+    mpack_store_u8(p, 0xdc);
+    mpack_store_u16(p + 1, count);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_array32(char* p, uint32_t count) {
+    mpack_assert(count > MPACK_UINT16_MAX);
+    mpack_store_u8(p, 0xdd);
+    mpack_store_u32(p + 1, count);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_fixmap(char* p, uint8_t count) {
+    mpack_assert(count <= 15);
+    mpack_store_u8(p, (uint8_t)(0x80 | count));
+}
+
+MPACK_STATIC_INLINE void mpack_encode_map16(char* p, uint16_t count) {
+    mpack_assert(count > 15);
+    mpack_store_u8(p, 0xde);
+    mpack_store_u16(p + 1, count);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_map32(char* p, uint32_t count) {
+    mpack_assert(count > MPACK_UINT16_MAX);
+    mpack_store_u8(p, 0xdf);
+    mpack_store_u32(p + 1, count);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_fixstr(char* p, uint8_t count) {
+    mpack_assert(count <= 31);
+    mpack_store_u8(p, (uint8_t)(0xa0 | count));
+}
+
+MPACK_STATIC_INLINE void mpack_encode_str8(char* p, uint8_t count) {
+    mpack_assert(count > 31);
+    mpack_store_u8(p, 0xd9);
+    mpack_store_u8(p + 1, count);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_str16(char* p, uint16_t count) {
+    // we might be encoding a raw in compatibility mode, so we
+    // allow count to be in the range [32, MPACK_UINT8_MAX].
+    mpack_assert(count > 31);
+    mpack_store_u8(p, 0xda);
+    mpack_store_u16(p + 1, count);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_str32(char* p, uint32_t count) {
+    mpack_assert(count > MPACK_UINT16_MAX);
+    mpack_store_u8(p, 0xdb);
+    mpack_store_u32(p + 1, count);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_bin8(char* p, uint8_t count) {
+    mpack_store_u8(p, 0xc4);
+    mpack_store_u8(p + 1, count);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_bin16(char* p, uint16_t count) {
+    mpack_assert(count > MPACK_UINT8_MAX);
+    mpack_store_u8(p, 0xc5);
+    mpack_store_u16(p + 1, count);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_bin32(char* p, uint32_t count) {
+    mpack_assert(count > MPACK_UINT16_MAX);
+    mpack_store_u8(p, 0xc6);
+    mpack_store_u32(p + 1, count);
+}
+
+#if MPACK_EXTENSIONS
+MPACK_STATIC_INLINE void mpack_encode_fixext1(char* p, int8_t exttype) {
+    mpack_store_u8(p, 0xd4);
+    mpack_store_i8(p + 1, exttype);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_fixext2(char* p, int8_t exttype) {
+    mpack_store_u8(p, 0xd5);
+    mpack_store_i8(p + 1, exttype);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_fixext4(char* p, int8_t exttype) {
+    mpack_store_u8(p, 0xd6);
+    mpack_store_i8(p + 1, exttype);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_fixext8(char* p, int8_t exttype) {
+    mpack_store_u8(p, 0xd7);
+    mpack_store_i8(p + 1, exttype);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_fixext16(char* p, int8_t exttype) {
+    mpack_store_u8(p, 0xd8);
+    mpack_store_i8(p + 1, exttype);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_ext8(char* p, int8_t exttype, uint8_t count) {
+    mpack_assert(count != 1 && count != 2 && count != 4 && count != 8 && count != 16);
+    mpack_store_u8(p, 0xc7);
+    mpack_store_u8(p + 1, count);
+    mpack_store_i8(p + 2, exttype);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_ext16(char* p, int8_t exttype, uint16_t count) {
+    mpack_assert(count > MPACK_UINT8_MAX);
+    mpack_store_u8(p, 0xc8);
+    mpack_store_u16(p + 1, count);
+    mpack_store_i8(p + 3, exttype);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_ext32(char* p, int8_t exttype, uint32_t count) {
+    mpack_assert(count > MPACK_UINT16_MAX);
+    mpack_store_u8(p, 0xc9);
+    mpack_store_u32(p + 1, count);
+    mpack_store_i8(p + 5, exttype);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_timestamp_4(char* p, uint32_t seconds) {
+    mpack_encode_fixext4(p, MPACK_EXTTYPE_TIMESTAMP);
+    mpack_store_u32(p + MPACK_TAG_SIZE_FIXEXT4, seconds);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_timestamp_8(char* p, int64_t seconds, uint32_t nanoseconds) {
+    mpack_assert(nanoseconds <= MPACK_TIMESTAMP_NANOSECONDS_MAX);
+    mpack_encode_fixext8(p, MPACK_EXTTYPE_TIMESTAMP);
+    uint64_t encoded = ((uint64_t)nanoseconds << 34) | (uint64_t)seconds;
+    mpack_store_u64(p + MPACK_TAG_SIZE_FIXEXT8, encoded);
+}
+
+MPACK_STATIC_INLINE void mpack_encode_timestamp_12(char* p, int64_t seconds, uint32_t nanoseconds) {
+    mpack_assert(nanoseconds <= MPACK_TIMESTAMP_NANOSECONDS_MAX);
+    mpack_encode_ext8(p, MPACK_EXTTYPE_TIMESTAMP, 12);
+    mpack_store_u32(p + MPACK_TAG_SIZE_EXT8, nanoseconds);
+    mpack_store_i64(p + MPACK_TAG_SIZE_EXT8 + 4, seconds);
+}
+#endif
+
+
+
+/*
+ * Write functions
+ */
+
+// This is a macro wrapper to the encode functions to encode
+// directly into the buffer. If mpack_writer_ensure() fails
+// it will flag an error so we don't have to do anything.
+#define MPACK_WRITE_ENCODED(encode_fn, size, ...) do {                                                 \
+    if (MPACK_LIKELY(mpack_writer_buffer_left(writer) >= size) || mpack_writer_ensure(writer, size)) { \
+        MPACK_EXPAND(encode_fn(writer->position, __VA_ARGS__));                                        \
+        writer->position += size;                                                                      \
+    }                                                                                                  \
+} while (0)
+
+void mpack_write_u8(mpack_writer_t* writer, uint8_t value) {
+    #if MPACK_OPTIMIZE_FOR_SIZE
+    mpack_write_u64(writer, value);
+    #else
+    mpack_writer_track_element(writer);
+    if (value <= 127) {
+        MPACK_WRITE_ENCODED(mpack_encode_fixuint, MPACK_TAG_SIZE_FIXUINT, value);
+    } else {
+        MPACK_WRITE_ENCODED(mpack_encode_u8, MPACK_TAG_SIZE_U8, value);
+    }
+    #endif
+}
+
+void mpack_write_u16(mpack_writer_t* writer, uint16_t value) {
+    #if MPACK_OPTIMIZE_FOR_SIZE
+    mpack_write_u64(writer, value);
+    #else
+    mpack_writer_track_element(writer);
+    if (value <= 127) {
+        MPACK_WRITE_ENCODED(mpack_encode_fixuint, MPACK_TAG_SIZE_FIXUINT, (uint8_t)value);
+    } else if (value <= MPACK_UINT8_MAX) {
+        MPACK_WRITE_ENCODED(mpack_encode_u8, MPACK_TAG_SIZE_U8, (uint8_t)value);
+    } else {
+        MPACK_WRITE_ENCODED(mpack_encode_u16, MPACK_TAG_SIZE_U16, value);
+    }
+    #endif
+}
+
+void mpack_write_u32(mpack_writer_t* writer, uint32_t value) {
+    #if MPACK_OPTIMIZE_FOR_SIZE
+    mpack_write_u64(writer, value);
+    #else
+    mpack_writer_track_element(writer);
+    if (value <= 127) {
+        MPACK_WRITE_ENCODED(mpack_encode_fixuint, MPACK_TAG_SIZE_FIXUINT, (uint8_t)value);
+    } else if (value <= MPACK_UINT8_MAX) {
+        MPACK_WRITE_ENCODED(mpack_encode_u8, MPACK_TAG_SIZE_U8, (uint8_t)value);
+    } else if (value <= MPACK_UINT16_MAX) {
+        MPACK_WRITE_ENCODED(mpack_encode_u16, MPACK_TAG_SIZE_U16, (uint16_t)value);
+    } else {
+        MPACK_WRITE_ENCODED(mpack_encode_u32, MPACK_TAG_SIZE_U32, value);
+    }
+    #endif
+}
+
+void mpack_write_u64(mpack_writer_t* writer, uint64_t value) {
+    mpack_writer_track_element(writer);
+
+    if (value <= 127) {
+        MPACK_WRITE_ENCODED(mpack_encode_fixuint, MPACK_TAG_SIZE_FIXUINT, (uint8_t)value);
+    } else if (value <= MPACK_UINT8_MAX) {
+        MPACK_WRITE_ENCODED(mpack_encode_u8, MPACK_TAG_SIZE_U8, (uint8_t)value);
+    } else if (value <= MPACK_UINT16_MAX) {
+        MPACK_WRITE_ENCODED(mpack_encode_u16, MPACK_TAG_SIZE_U16, (uint16_t)value);
+    } else if (value <= MPACK_UINT32_MAX) {
+        MPACK_WRITE_ENCODED(mpack_encode_u32, MPACK_TAG_SIZE_U32, (uint32_t)value);
+    } else {
+        MPACK_WRITE_ENCODED(mpack_encode_u64, MPACK_TAG_SIZE_U64, value);
+    }
+}
+
+void mpack_write_i8(mpack_writer_t* writer, int8_t value) {
+    #if MPACK_OPTIMIZE_FOR_SIZE
+    mpack_write_i64(writer, value);
+    #else
+    mpack_writer_track_element(writer);
+    if (value >= -32) {
+        // we encode positive and negative fixints together
+        MPACK_WRITE_ENCODED(mpack_encode_fixint, MPACK_TAG_SIZE_FIXINT, (int8_t)value);
+    } else {
+        MPACK_WRITE_ENCODED(mpack_encode_i8, MPACK_TAG_SIZE_I8, (int8_t)value);
+    }
+    #endif
+}
+
+void mpack_write_i16(mpack_writer_t* writer, int16_t value) {
+    #if MPACK_OPTIMIZE_FOR_SIZE
+    mpack_write_i64(writer, value);
+    #else
+    mpack_writer_track_element(writer);
+    if (value >= -32) {
+        if (value <= 127) {
+            // we encode positive and negative fixints together
+            MPACK_WRITE_ENCODED(mpack_encode_fixint, MPACK_TAG_SIZE_FIXINT, (int8_t)value);
+        } else if (value <= MPACK_UINT8_MAX) {
+            MPACK_WRITE_ENCODED(mpack_encode_u8, MPACK_TAG_SIZE_U8, (uint8_t)value);
+        } else {
+            MPACK_WRITE_ENCODED(mpack_encode_u16, MPACK_TAG_SIZE_U16, (uint16_t)value);
+        }
+    } else if (value >= MPACK_INT8_MIN) {
+        MPACK_WRITE_ENCODED(mpack_encode_i8, MPACK_TAG_SIZE_I8, (int8_t)value);
+    } else {
+        MPACK_WRITE_ENCODED(mpack_encode_i16, MPACK_TAG_SIZE_I16, (int16_t)value);
+    }
+    #endif
+}
+
+void mpack_write_i32(mpack_writer_t* writer, int32_t value) {
+    #if MPACK_OPTIMIZE_FOR_SIZE
+    mpack_write_i64(writer, value);
+    #else
+    mpack_writer_track_element(writer);
+    if (value >= -32) {
+        if (value <= 127) {
+            // we encode positive and negative fixints together
+            MPACK_WRITE_ENCODED(mpack_encode_fixint, MPACK_TAG_SIZE_FIXINT, (int8_t)value);
+        } else if (value <= MPACK_UINT8_MAX) {
+            MPACK_WRITE_ENCODED(mpack_encode_u8, MPACK_TAG_SIZE_U8, (uint8_t)value);
+        } else if (value <= MPACK_UINT16_MAX) {
+            MPACK_WRITE_ENCODED(mpack_encode_u16, MPACK_TAG_SIZE_U16, (uint16_t)value);
+        } else {
+            MPACK_WRITE_ENCODED(mpack_encode_u32, MPACK_TAG_SIZE_U32, (uint32_t)value);
+        }
+    } else if (value >= MPACK_INT8_MIN) {
+        MPACK_WRITE_ENCODED(mpack_encode_i8, MPACK_TAG_SIZE_I8, (int8_t)value);
+    } else if (value >= MPACK_INT16_MIN) {
+        MPACK_WRITE_ENCODED(mpack_encode_i16, MPACK_TAG_SIZE_I16, (int16_t)value);
+    } else {
+        MPACK_WRITE_ENCODED(mpack_encode_i32, MPACK_TAG_SIZE_I32, value);
+    }
+    #endif
+}
+
+void mpack_write_i64(mpack_writer_t* writer, int64_t value) {
+    #if MPACK_OPTIMIZE_FOR_SIZE
+    if (value > 127) {
+        // for non-fix positive ints we call the u64 writer to save space
+        mpack_write_u64(writer, (uint64_t)value);
+        return;
+    }
+    #endif
+
+    mpack_writer_track_element(writer);
+    if (value >= -32) {
+        #if MPACK_OPTIMIZE_FOR_SIZE
+        MPACK_WRITE_ENCODED(mpack_encode_fixint, MPACK_TAG_SIZE_FIXINT, (int8_t)value);
+        #else
+        if (value <= 127) {
+            MPACK_WRITE_ENCODED(mpack_encode_fixint, MPACK_TAG_SIZE_FIXINT, (int8_t)value);
+        } else if (value <= MPACK_UINT8_MAX) {
+            MPACK_WRITE_ENCODED(mpack_encode_u8, MPACK_TAG_SIZE_U8, (uint8_t)value);
+        } else if (value <= MPACK_UINT16_MAX) {
+            MPACK_WRITE_ENCODED(mpack_encode_u16, MPACK_TAG_SIZE_U16, (uint16_t)value);
+        } else if (value <= MPACK_UINT32_MAX) {
+            MPACK_WRITE_ENCODED(mpack_encode_u32, MPACK_TAG_SIZE_U32, (uint32_t)value);
+        } else {
+            MPACK_WRITE_ENCODED(mpack_encode_u64, MPACK_TAG_SIZE_U64, (uint64_t)value);
+        }
+        #endif
+    } else if (value >= MPACK_INT8_MIN) {
+        MPACK_WRITE_ENCODED(mpack_encode_i8, MPACK_TAG_SIZE_I8, (int8_t)value);
+    } else if (value >= MPACK_INT16_MIN) {
+        MPACK_WRITE_ENCODED(mpack_encode_i16, MPACK_TAG_SIZE_I16, (int16_t)value);
+    } else if (value >= MPACK_INT32_MIN) {
+        MPACK_WRITE_ENCODED(mpack_encode_i32, MPACK_TAG_SIZE_I32, (int32_t)value);
+    } else {
+        MPACK_WRITE_ENCODED(mpack_encode_i64, MPACK_TAG_SIZE_I64, value);
+    }
+}
+
+#if MPACK_FLOAT
+void mpack_write_float(mpack_writer_t* writer, float value) {
+    mpack_writer_track_element(writer);
+    MPACK_WRITE_ENCODED(mpack_encode_float, MPACK_TAG_SIZE_FLOAT, value);
+}
+#else
+void mpack_write_raw_float(mpack_writer_t* writer, uint32_t value) {
+    mpack_writer_track_element(writer);
+    MPACK_WRITE_ENCODED(mpack_encode_raw_float, MPACK_TAG_SIZE_FLOAT, value);
+}
+#endif
+
+#if MPACK_DOUBLE
+void mpack_write_double(mpack_writer_t* writer, double value) {
+    mpack_writer_track_element(writer);
+    MPACK_WRITE_ENCODED(mpack_encode_double, MPACK_TAG_SIZE_DOUBLE, value);
+}
+#else
+void mpack_write_raw_double(mpack_writer_t* writer, uint64_t value) {
+    mpack_writer_track_element(writer);
+    MPACK_WRITE_ENCODED(mpack_encode_raw_double, MPACK_TAG_SIZE_DOUBLE, value);
+}
+#endif
+
+#if MPACK_EXTENSIONS
+void mpack_write_timestamp(mpack_writer_t* writer, int64_t seconds, uint32_t nanoseconds) {
+    #if MPACK_COMPATIBILITY
+    if (writer->version <= mpack_version_v4) {
+        mpack_break("Timestamps require spec version v5 or later. This writer is in v%i mode.", (int)writer->version);
+        mpack_writer_flag_error(writer, mpack_error_bug);
+        return;
+    }
+    #endif
+
+    if (nanoseconds > MPACK_TIMESTAMP_NANOSECONDS_MAX) {
+        mpack_break("timestamp nanoseconds out of bounds: %" PRIu32 , nanoseconds);
+        mpack_writer_flag_error(writer, mpack_error_bug);
+        return;
+    }
+
+    mpack_writer_track_element(writer);
+
+    if (seconds < 0 || seconds >= (MPACK_INT64_C(1) << 34)) {
+        MPACK_WRITE_ENCODED(mpack_encode_timestamp_12, MPACK_EXT_SIZE_TIMESTAMP12, seconds, nanoseconds);
+    } else if (seconds > MPACK_UINT32_MAX || nanoseconds > 0) {
+        MPACK_WRITE_ENCODED(mpack_encode_timestamp_8, MPACK_EXT_SIZE_TIMESTAMP8, seconds, nanoseconds);
+    } else {
+        MPACK_WRITE_ENCODED(mpack_encode_timestamp_4, MPACK_EXT_SIZE_TIMESTAMP4, (uint32_t)seconds);
+    }
+}
+#endif
+
+static void mpack_write_array_notrack(mpack_writer_t* writer, uint32_t count) {
+    if (count <= 15) {
+        MPACK_WRITE_ENCODED(mpack_encode_fixarray, MPACK_TAG_SIZE_FIXARRAY, (uint8_t)count);
+    } else if (count <= MPACK_UINT16_MAX) {
+        MPACK_WRITE_ENCODED(mpack_encode_array16, MPACK_TAG_SIZE_ARRAY16, (uint16_t)count);
+    } else {
+        MPACK_WRITE_ENCODED(mpack_encode_array32, MPACK_TAG_SIZE_ARRAY32, (uint32_t)count);
+    }
+}
+
+static void mpack_write_map_notrack(mpack_writer_t* writer, uint32_t count) {
+    if (count <= 15) {
+        MPACK_WRITE_ENCODED(mpack_encode_fixmap, MPACK_TAG_SIZE_FIXMAP, (uint8_t)count);
+    } else if (count <= MPACK_UINT16_MAX) {
+        MPACK_WRITE_ENCODED(mpack_encode_map16, MPACK_TAG_SIZE_MAP16, (uint16_t)count);
+    } else {
+        MPACK_WRITE_ENCODED(mpack_encode_map32, MPACK_TAG_SIZE_MAP32, (uint32_t)count);
+    }
+}
+
+void mpack_start_array(mpack_writer_t* writer, uint32_t count) {
+    mpack_writer_track_element(writer);
+    mpack_write_array_notrack(writer, count);
+    mpack_writer_track_push(writer, mpack_type_array, count);
+    mpack_builder_compound_push(writer);
+}
+
+void mpack_start_map(mpack_writer_t* writer, uint32_t count) {
+    mpack_writer_track_element(writer);
+    mpack_write_map_notrack(writer, count);
+    mpack_writer_track_push(writer, mpack_type_map, count);
+    mpack_builder_compound_push(writer);
+}
+
+static void mpack_start_str_notrack(mpack_writer_t* writer, uint32_t count) {
+    if (count <= 31) {
+        MPACK_WRITE_ENCODED(mpack_encode_fixstr, MPACK_TAG_SIZE_FIXSTR, (uint8_t)count);
+
+    // str8 is only supported in v5 or later.
+    } else if (count <= MPACK_UINT8_MAX
+            #if MPACK_COMPATIBILITY
+            && writer->version >= mpack_version_v5
+            #endif
+            ) {
+        MPACK_WRITE_ENCODED(mpack_encode_str8, MPACK_TAG_SIZE_STR8, (uint8_t)count);
+
+    } else if (count <= MPACK_UINT16_MAX) {
+        MPACK_WRITE_ENCODED(mpack_encode_str16, MPACK_TAG_SIZE_STR16, (uint16_t)count);
+    } else {
+        MPACK_WRITE_ENCODED(mpack_encode_str32, MPACK_TAG_SIZE_STR32, (uint32_t)count);
+    }
+}
+
+static void mpack_start_bin_notrack(mpack_writer_t* writer, uint32_t count) {
+    #if MPACK_COMPATIBILITY
+    // In the v4 spec, there was only the raw type for any kind of
+    // variable-length data. In v4 mode, we support the bin functions,
+    // but we produce an old-style raw.
+    if (writer->version <= mpack_version_v4) {
+        mpack_start_str_notrack(writer, count);
+        return;
+    }
+    #endif
+
+    if (count <= MPACK_UINT8_MAX) {
+        MPACK_WRITE_ENCODED(mpack_encode_bin8, MPACK_TAG_SIZE_BIN8, (uint8_t)count);
+    } else if (count <= MPACK_UINT16_MAX) {
+        MPACK_WRITE_ENCODED(mpack_encode_bin16, MPACK_TAG_SIZE_BIN16, (uint16_t)count);
+    } else {
+        MPACK_WRITE_ENCODED(mpack_encode_bin32, MPACK_TAG_SIZE_BIN32, (uint32_t)count);
+    }
+}
+
+void mpack_start_str(mpack_writer_t* writer, uint32_t count) {
+    mpack_writer_track_element(writer);
+    mpack_start_str_notrack(writer, count);
+    mpack_writer_track_push(writer, mpack_type_str, count);
+}
+
+void mpack_start_bin(mpack_writer_t* writer, uint32_t count) {
+    mpack_writer_track_element(writer);
+    mpack_start_bin_notrack(writer, count);
+    mpack_writer_track_push(writer, mpack_type_bin, count);
+}
+
+#if MPACK_EXTENSIONS
+void mpack_start_ext(mpack_writer_t* writer, int8_t exttype, uint32_t count) {
+    #if MPACK_COMPATIBILITY
+    if (writer->version <= mpack_version_v4) {
+        mpack_break("Ext types require spec version v5 or later. This writer is in v%i mode.", (int)writer->version);
+        mpack_writer_flag_error(writer, mpack_error_bug);
+        return;
+    }
+    #endif
+
+    mpack_writer_track_element(writer);
+
+    if (count == 1) {
+        MPACK_WRITE_ENCODED(mpack_encode_fixext1, MPACK_TAG_SIZE_FIXEXT1, exttype);
+    } else if (count == 2) {
+        MPACK_WRITE_ENCODED(mpack_encode_fixext2, MPACK_TAG_SIZE_FIXEXT2, exttype);
+    } else if (count == 4) {
+        MPACK_WRITE_ENCODED(mpack_encode_fixext4, MPACK_TAG_SIZE_FIXEXT4, exttype);
+    } else if (count == 8) {
+        MPACK_WRITE_ENCODED(mpack_encode_fixext8, MPACK_TAG_SIZE_FIXEXT8, exttype);
+    } else if (count == 16) {
+        MPACK_WRITE_ENCODED(mpack_encode_fixext16, MPACK_TAG_SIZE_FIXEXT16, exttype);
+    } else if (count <= MPACK_UINT8_MAX) {
+        MPACK_WRITE_ENCODED(mpack_encode_ext8, MPACK_TAG_SIZE_EXT8, exttype, (uint8_t)count);
+    } else if (count <= MPACK_UINT16_MAX) {
+        MPACK_WRITE_ENCODED(mpack_encode_ext16, MPACK_TAG_SIZE_EXT16, exttype, (uint16_t)count);
+    } else {
+        MPACK_WRITE_ENCODED(mpack_encode_ext32, MPACK_TAG_SIZE_EXT32, exttype, (uint32_t)count);
+    }
+
+    mpack_writer_track_push(writer, mpack_type_ext, count);
+}
+#endif
+
+
+
+/*
+ * Compound helpers and other functions
+ */
+
+void mpack_write_str(mpack_writer_t* writer, const char* data, uint32_t count) {
+    mpack_assert(count == 0 || data != NULL, "data for string of length %i is NULL", (int)count);
+
+    #if MPACK_OPTIMIZE_FOR_SIZE
+    mpack_writer_track_element(writer);
+    mpack_start_str_notrack(writer, count);
+    mpack_write_native(writer, data, count);
+    #else
+
+    mpack_writer_track_element(writer);
+
+    if (count <= 31) {
+        // The minimum buffer size when using a flush function is guaranteed to
+        // fit the largest possible fixstr.
+        size_t size = count + MPACK_TAG_SIZE_FIXSTR;
+        if (MPACK_LIKELY(mpack_writer_buffer_left(writer) >= size) || mpack_writer_ensure(writer, size)) {
+            char* MPACK_RESTRICT p = writer->position;
+            mpack_encode_fixstr(p, (uint8_t)count);
+            mpack_memcpy(p + MPACK_TAG_SIZE_FIXSTR, data, count);
+            writer->position += count + MPACK_TAG_SIZE_FIXSTR;
+        }
+        return;
+    }
+
+    if (count <= MPACK_UINT8_MAX
+            #if MPACK_COMPATIBILITY
+            && writer->version >= mpack_version_v5
+            #endif
+            ) {
+        if (count + MPACK_TAG_SIZE_STR8 <= mpack_writer_buffer_left(writer)) {
+            char* MPACK_RESTRICT p = writer->position;
+            mpack_encode_str8(p, (uint8_t)count);
+            mpack_memcpy(p + MPACK_TAG_SIZE_STR8, data, count);
+            writer->position += count + MPACK_TAG_SIZE_STR8;
+        } else {
+            MPACK_WRITE_ENCODED(mpack_encode_str8, MPACK_TAG_SIZE_STR8, (uint8_t)count);
+            mpack_write_native(writer, data, count);
+        }
+        return;
+    }
+
+    // str16 and str32 are likely to be a significant fraction of the buffer
+    // size, so we don't bother with a combined space check in order to
+    // minimize code size.
+    if (count <= MPACK_UINT16_MAX) {
+        MPACK_WRITE_ENCODED(mpack_encode_str16, MPACK_TAG_SIZE_STR16, (uint16_t)count);
+        mpack_write_native(writer, data, count);
+    } else {
+        MPACK_WRITE_ENCODED(mpack_encode_str32, MPACK_TAG_SIZE_STR32, (uint32_t)count);
+        mpack_write_native(writer, data, count);
+    }
+
+    #endif
+}
+
+void mpack_write_bin(mpack_writer_t* writer, const char* data, uint32_t count) {
+    mpack_assert(count == 0 || data != NULL, "data pointer for bin of %i bytes is NULL", (int)count);
+    mpack_start_bin(writer, count);
+    mpack_write_bytes(writer, data, count);
+    mpack_finish_bin(writer);
+}
+
+#if MPACK_EXTENSIONS
+void mpack_write_ext(mpack_writer_t* writer, int8_t exttype, const char* data, uint32_t count) {
+    mpack_assert(count == 0 || data != NULL, "data pointer for ext of type %i and %i bytes is NULL", exttype, (int)count);
+    mpack_start_ext(writer, exttype, count);
+    mpack_write_bytes(writer, data, count);
+    mpack_finish_ext(writer);
+}
+#endif
+
+void mpack_write_bytes(mpack_writer_t* writer, const char* data, size_t count) {
+    mpack_assert(count == 0 || data != NULL, "data pointer for %i bytes is NULL", (int)count);
+    mpack_writer_track_bytes(writer, count);
+    mpack_write_native(writer, data, count);
+}
+
+void mpack_write_cstr(mpack_writer_t* writer, const char* cstr) {
+    mpack_assert(cstr != NULL, "cstr pointer is NULL");
+    size_t length = mpack_strlen(cstr);
+    if (length > MPACK_UINT32_MAX)
+        mpack_writer_flag_error(writer, mpack_error_invalid);
+    mpack_write_str(writer, cstr, (uint32_t)length);
+}
+
+void mpack_write_cstr_or_nil(mpack_writer_t* writer, const char* cstr) {
+    if (cstr)
+        mpack_write_cstr(writer, cstr);
+    else
+        mpack_write_nil(writer);
+}
+
+void mpack_write_utf8(mpack_writer_t* writer, const char* str, uint32_t length) {
+    mpack_assert(length == 0 || str != NULL, "data for string of length %i is NULL", (int)length);
+    if (!mpack_utf8_check(str, length)) {
+        mpack_writer_flag_error(writer, mpack_error_invalid);
+        return;
+    }
+    mpack_write_str(writer, str, length);
+}
+
+void mpack_write_utf8_cstr(mpack_writer_t* writer, const char* cstr) {
+    mpack_assert(cstr != NULL, "cstr pointer is NULL");
+    size_t length = mpack_strlen(cstr);
+    if (length > MPACK_UINT32_MAX) {
+        mpack_writer_flag_error(writer, mpack_error_invalid);
+        return;
+    }
+    mpack_write_utf8(writer, cstr, (uint32_t)length);
+}
+
+void mpack_write_utf8_cstr_or_nil(mpack_writer_t* writer, const char* cstr) {
+    if (cstr)
+        mpack_write_utf8_cstr(writer, cstr);
+    else
+        mpack_write_nil(writer);
+}
+
+/*
+ * Builder implementation
+ *
+ * When a writer is in build mode, it diverts writes to an internal growable
+ * buffer. All elements other than builder start tags are encoded as normal
+ * into the builder buffer (even nested maps and arrays of known size, e.g.
+ * `mpack_start_array()`.) But for compound elements of unknown size, an
+ * mpack_build_t is written to the buffer instead.
+ *
+ * The mpack_build_t tracks everything needed to re-constitute the final
+ * message once all sizes are known. When the last build element is completed,
+ * the builder resolves the build by walking through the builds, outputting the
+ * final encoded tag, and copying everything in between to the writer's true
+ * buffer.
+ *
+ * To make things extra complicated, the builder buffer is not contiguous. It's
+ * allocated in pages, where the first page may be an internal page in the
+ * writer. But, each mpack_build_t must itself be contiguous and aligned
+ * properly within the buffer. This means bytes can be skipped (and wasted)
+ * before the builds or at the end of pages.
+ *
+ * To keep track of this, builds store both their element count and the number
+ * of encoded bytes that follow, and pages store the number of bytes used. As
+ * elements are written, each element adds to the count in the current open
+ * build, and the number of bytes written adds to the current page and the byte
+ * count in the last started build (whether or not it is completed.)
+ */
+
+#if MPACK_BUILDER
+
+#ifdef MPACK_ALIGNOF
+    #define MPACK_BUILD_ALIGNMENT MPACK_ALIGNOF(mpack_build_t)
+#else
+    // without alignof, we just align to the greater of size_t, void* and uint64_t.
+    // (we do this even though we don't have uint64_t in it in case we add it later.)
+    #define MPACK_BUILD_ALIGNMENT_MAX(x, y) ((x) > (y) ? (x) : (y))
+    #define MPACK_BUILD_ALIGNMENT (MPACK_BUILD_ALIGNMENT_MAX(sizeof(void*), \
+                MPACK_BUILD_ALIGNMENT_MAX(sizeof(size_t), sizeof(uint64_t))))
+#endif
+
+static inline void mpack_builder_check_sizes(mpack_writer_t* writer) {
+
+    // We check internal and page sizes here so that we don't have to check
+    // them again. A new page with a build in it will have a page header,
+    // build, and minimum space for a tag. This will perform horribly and waste
+    // tons of memory if the page size is small, so you're best off just
+    // sticking with the defaults.
+    //
+    // These are all known at compile time, so if they are large
+    // enough this function should trivially optimize to a no-op.
+
+    #if MPACK_BUILDER_INTERNAL_STORAGE
+    // make sure the internal storage is big enough to be useful
+    MPACK_STATIC_ASSERT(MPACK_BUILDER_INTERNAL_STORAGE_SIZE >= (sizeof(mpack_builder_page_t) +
+            sizeof(mpack_build_t) + MPACK_WRITER_MINIMUM_BUFFER_SIZE),
+            "MPACK_BUILDER_INTERNAL_STORAGE_SIZE is too small to be useful!");
+    if (MPACK_BUILDER_INTERNAL_STORAGE_SIZE < (sizeof(mpack_builder_page_t) +
+            sizeof(mpack_build_t) + MPACK_WRITER_MINIMUM_BUFFER_SIZE))
+    {
+        mpack_break("MPACK_BUILDER_INTERNAL_STORAGE_SIZE is too small to be useful!");
+        mpack_writer_flag_error(writer, mpack_error_bug);
+    }
+    #endif
+
+    // make sure the builder page size is big enough to be useful
+    MPACK_STATIC_ASSERT(MPACK_BUILDER_PAGE_SIZE >= (sizeof(mpack_builder_page_t) +
+            sizeof(mpack_build_t) + MPACK_WRITER_MINIMUM_BUFFER_SIZE),
+            "MPACK_BUILDER_PAGE_SIZE is too small to be useful!");
+    if (MPACK_BUILDER_PAGE_SIZE < (sizeof(mpack_builder_page_t) +
+            sizeof(mpack_build_t) + MPACK_WRITER_MINIMUM_BUFFER_SIZE))
+    {
+        mpack_break("MPACK_BUILDER_PAGE_SIZE is too small to be useful!");
+        mpack_writer_flag_error(writer, mpack_error_bug);
+    }
+}
+
+static inline size_t mpack_builder_page_size(mpack_writer_t* writer, mpack_builder_page_t* page) {
+    #if MPACK_BUILDER_INTERNAL_STORAGE
+    if ((char*)page == writer->builder.internal)
+        return sizeof(writer->builder.internal);
+    #else
+    (void)writer;
+    (void)page;
+    #endif
+    return MPACK_BUILDER_PAGE_SIZE;
+}
+
+static inline size_t mpack_builder_align_build(size_t bytes_used) {
+    size_t offset = bytes_used;
+    offset += MPACK_BUILD_ALIGNMENT - 1;
+    offset -= offset % MPACK_BUILD_ALIGNMENT;
+    mpack_log("aligned %zi to %zi\n", bytes_used, offset);
+    return offset;
+}
+
+static inline void mpack_builder_free_page(mpack_writer_t* writer, mpack_builder_page_t* page) {
+    mpack_log("freeing page %p\n", (void*)page);
+    #if MPACK_BUILDER_INTERNAL_STORAGE
+    if ((char*)page == writer->builder.internal)
+        return;
+    #else
+    (void)writer;
+    #endif
+    MPACK_FREE(page);
+}
+
+static inline size_t mpack_builder_page_remaining(mpack_writer_t* writer, mpack_builder_page_t* page) {
+    return mpack_builder_page_size(writer, page) - page->bytes_used;
+}
+
+static void mpack_builder_configure_buffer(mpack_writer_t* writer) {
+    if (mpack_writer_error(writer) != mpack_ok)
+        return;
+    mpack_builder_t* builder = &writer->builder;
+
+    mpack_builder_page_t* page = builder->current_page;
+    mpack_assert(page != NULL, "page is null??");
+
+    // This diverts the writer into the remainder of the current page of our
+    // build buffer.
+    writer->buffer = (char*)page + page->bytes_used;
+    writer->position = (char*)page + page->bytes_used;
+    writer->end = (char*)page + mpack_builder_page_size(writer, page);
+    mpack_log("configuring buffer from %p to %p\n", (void*)writer->position, (void*)writer->end);
+}
+
+static void mpack_builder_add_page(mpack_writer_t* writer) {
+    mpack_builder_t* builder = &writer->builder;
+    mpack_assert(writer->error == mpack_ok);
+
+    mpack_log("adding a page.\n");
+    mpack_builder_page_t* page = (mpack_builder_page_t*)MPACK_MALLOC(MPACK_BUILDER_PAGE_SIZE);
+    if (page == NULL) {
+        mpack_writer_flag_error(writer, mpack_error_memory);
+        return;
+    }
+
+    page->next = NULL;
+    page->bytes_used = sizeof(mpack_builder_page_t);
+    builder->current_page->next = page;
+    builder->current_page = page;
+}
+
+// Checks how many bytes the writer wrote to the page, adding it to the page's
+// bytes_used. This must be followed up with mpack_builder_configure_buffer()
+// (after adding a new page, build, etc) to reset the writer's buffer pointers.
+static void mpack_builder_apply_writes(mpack_writer_t* writer) {
+    mpack_assert(writer->error == mpack_ok);
+    mpack_builder_t* builder = &writer->builder;
+    mpack_log("latest build is %p\n", (void*)builder->latest_build);
+
+    // The difference between buffer and current is the number of bytes that
+    // were written to the page.
+    size_t bytes_written = (size_t)(writer->position - writer->buffer);
+    mpack_log("applying write of %zi bytes to build %p\n", bytes_written, (void*)builder->latest_build);
+
+    mpack_assert(builder->current_page != NULL);
+    mpack_assert(builder->latest_build != NULL);
+    builder->current_page->bytes_used += bytes_written;
+    builder->latest_build->bytes += bytes_written;
+    mpack_log("latest build %p now has %zi bytes\n", (void*)builder->latest_build, builder->latest_build->bytes);
+}
+
+static void mpack_builder_flush(mpack_writer_t* writer) {
+    mpack_assert(writer->error == mpack_ok);
+    mpack_builder_apply_writes(writer);
+    mpack_builder_add_page(writer);
+    mpack_builder_configure_buffer(writer);
+}
+
+MPACK_NOINLINE static void mpack_builder_begin(mpack_writer_t* writer) {
+    mpack_builder_t* builder = &writer->builder;
+    mpack_assert(writer->error == mpack_ok);
+    mpack_assert(builder->current_build == NULL);
+    mpack_assert(builder->latest_build == NULL);
+    mpack_assert(builder->pages == NULL);
+
+    // If this is the first build, we need to stash the real buffer backing our
+    // writer. We'll be diverting the writer to our build buffer.
+    builder->stash_buffer = writer->buffer;
+    builder->stash_position = writer->position;
+    builder->stash_end = writer->end;
+
+    mpack_builder_page_t* page;
+
+    // we've checked that both these sizes are large enough above.
+    #if MPACK_BUILDER_INTERNAL_STORAGE
+    page = (mpack_builder_page_t*)builder->internal;
+    mpack_log("beginning builder with internal storage %p\n", (void*)page);
+    #else
+    page = (mpack_builder_page_t*)MPACK_MALLOC(MPACK_BUILDER_PAGE_SIZE);
+    if (page == NULL) {
+        mpack_writer_flag_error(writer, mpack_error_memory);
+        return;
+    }
+    mpack_log("beginning builder with allocated page %p\n", (void*)page);
+    #endif
+
+    page->next = NULL;
+    page->bytes_used = sizeof(mpack_builder_page_t);
+    builder->pages = page;
+    builder->current_page = page;
+}
+
+static void mpack_builder_build(mpack_writer_t* writer, mpack_type_t type) {
+    mpack_builder_check_sizes(writer);
+    if (mpack_writer_error(writer) != mpack_ok)
+        return;
+
+    mpack_writer_track_element(writer);
+    mpack_writer_track_push_builder(writer, type);
+
+    mpack_builder_t* builder = &writer->builder;
+
+    if (builder->current_build == NULL) {
+        mpack_builder_begin(writer);
+    } else {
+        mpack_builder_apply_writes(writer);
+    }
+    if (mpack_writer_error(writer) != mpack_ok)
+        return;
+
+    // find aligned space for a new build. if there isn't enough space in the
+    // current page, we discard the remaining space in it and allocate a new
+    // page.
+    size_t offset = mpack_builder_align_build(builder->current_page->bytes_used);
+    if (offset + sizeof(mpack_build_t) > mpack_builder_page_size(writer, builder->current_page)) {
+        mpack_log("not enough space for a build. %zi bytes used of %zi in this page\n",
+                builder->current_page->bytes_used, mpack_builder_page_size(writer, builder->current_page));
+        mpack_builder_add_page(writer);
+        // there is always enough space in a fresh page.
+        offset = mpack_builder_align_build(builder->current_page->bytes_used);
+    }
+
+    // allocate the build within the page. note that we don't keep track of the
+    // space wasted due to the offset. instead the previous build has stored
+    // how many bytes follow it, and we'll redo this offset calculation to find
+    // this build after it.
+    mpack_builder_page_t* page = builder->current_page;
+    page->bytes_used = offset + sizeof(mpack_build_t);
+    mpack_assert(page->bytes_used <= mpack_builder_page_size(writer, page));
+    mpack_build_t* build = (mpack_build_t*)((char*)page + offset);
+    mpack_log("created new build %p within page %p, which now has %zi bytes used\n",
+            (void*)build, (void*)page, page->bytes_used);
+
+    // configure the new build
+    build->parent = builder->current_build;
+    build->bytes = 0;
+    build->count = 0;
+    build->type = type;
+    build->key_needs_value = false;
+    build->nested_compound_elements = 0;
+
+    mpack_log("setting current and latest build to new build %p\n", (void*)build);
+    builder->current_build = build;
+    builder->latest_build = build;
+
+    // we always need to provide a buffer that meets the minimum buffer size.
+    // if there isn't enough space, we discard the remaining space in the
+    // current page and allocate a new one.
+    if (mpack_builder_page_remaining(writer, page) < MPACK_WRITER_MINIMUM_BUFFER_SIZE) {
+        mpack_log("less than minimum buffer size in current page. %zi bytes used of %zi in this page\n",
+                builder->current_page->bytes_used, mpack_builder_page_size(writer, builder->current_page));
+        mpack_builder_add_page(writer);
+        if (mpack_writer_error(writer) != mpack_ok)
+            return;
+    }
+    mpack_assert(mpack_builder_page_remaining(writer, builder->current_page) >= MPACK_WRITER_MINIMUM_BUFFER_SIZE);
+    mpack_builder_configure_buffer(writer);
+}
+
+MPACK_NOINLINE
+static void mpack_builder_resolve(mpack_writer_t* writer) {
+    mpack_builder_t* builder = &writer->builder;
+
+    // We should not have gotten here if we are in an error state. If an error
+    // occurs with an open builder, the writer will free the open builder pages
+    // when destroyed.
+    mpack_assert(mpack_writer_error(writer) == mpack_ok, "can't resolve in error state!");
+
+    // We don't want the user to longjmp out of any I/O errors while we are
+    // walking the page list, so defer error callbacks to after we're done.
+    mpack_writer_error_t error_fn = writer->error_fn;
+    writer->error_fn = NULL;
+
+    // The starting page is the internal storage (if we have it), otherwise
+    // it's the first page in the array
+    mpack_builder_page_t* page =
+        #if MPACK_BUILDER_INTERNAL_STORAGE
+        (mpack_builder_page_t*)builder->internal
+        #else
+        builder->pages
+        #endif
+        ;
+
+    // We start by restoring the writer's original buffer so we can write the
+    // data for real.
+    writer->buffer = builder->stash_buffer;
+    writer->position = builder->stash_position;
+    writer->end = builder->stash_end;
+
+    // We can also close out the build now.
+    builder->current_build = NULL;
+    builder->latest_build = NULL;
+    builder->current_page = NULL;
+    builder->pages = NULL;
+
+    // the starting page always starts with the first build
+    size_t offset = mpack_builder_align_build(sizeof(mpack_builder_page_t));
+    mpack_build_t* build = (mpack_build_t*)((char*)page + offset);
+    mpack_log("starting resolve with build %p in page %p\n", (void*)build, (void*)page);
+
+    // encoded data immediately follows the build
+    offset += sizeof(mpack_build_t);
+
+    // Walk the list of builds, writing everything out in the buffer. Note that
+    // we don't check for errors anywhere. The lower-level write functions will
+    // all check for errors and do nothing after an error occurs. We need to
+    // walk all pages anyway to free them, so there's not much point in
+    // optimizing an error path at the expense of the normal path.
+    while (true) {
+
+        // write out the container tag
+        mpack_log("writing out an %s with count %" PRIu32 " followed by %zi bytes\n",
+                mpack_type_to_string(build->type), build->count, build->bytes);
+        switch (build->type) {
+            case mpack_type_map:
+                mpack_write_map_notrack(writer, build->count);
+                break;
+            case mpack_type_array:
+                mpack_write_array_notrack(writer, build->count);
+                break;
+            default:
+                mpack_break("invalid type in builder?");
+                mpack_writer_flag_error(writer, mpack_error_bug);
+                return;
+        }
+
+        // figure out how many bytes follow this container. we're going to be
+        // freeing pages as we write, so we need to be done with this build.
+        size_t left = build->bytes;
+        build = NULL;
+
+        // write out all bytes following this container
+        while (left > 0) {
+            size_t bytes_used = page->bytes_used;
+            if (offset < bytes_used) {
+                size_t step = bytes_used - offset;
+                if (step > left)
+                    step = left;
+                mpack_log("writing out %zi bytes starting at %p in page %p\n",
+                        step, (void*)((char*)page + offset), (void*)page);
+                mpack_write_native(writer, (char*)page + offset, step);
+                offset += step;
+                left -= step;
+            }
+
+            if (left == 0) {
+                mpack_log("done writing bytes for this build\n");
+                break;
+            }
+
+            // still need to write more bytes. free this page and jump to the
+            // next one.
+            mpack_builder_page_t* next_page = page->next;
+            mpack_builder_free_page(writer, page);
+            page = next_page;
+            // bytes on the next page immediately follow the header.
+            offset = sizeof(mpack_builder_page_t);
+        }
+
+        // now see if we can find another build.
+        offset = mpack_builder_align_build(offset);
+        if (offset + sizeof(mpack_build_t) > mpack_builder_page_size(writer, page)) {
+            mpack_log("not enough room in this page for another build\n");
+            mpack_builder_page_t* next_page = page->next;
+            mpack_builder_free_page(writer, page);
+            page = next_page;
+            if (page == NULL) {
+                mpack_log("no more pages\n");
+                // there are no more pages. we're done.
+                break;
+            }
+            offset = mpack_builder_align_build(sizeof(mpack_builder_page_t));
+        }
+        if (offset + sizeof(mpack_build_t) > page->bytes_used) {
+            // there is no more data. we're done.
+            mpack_log("no more data\n");
+            mpack_builder_free_page(writer, page);
+            break;
+        }
+
+        // we've found another build. loop around!
+        build = (mpack_build_t*)((char*)page + offset);
+        offset += sizeof(mpack_build_t);
+        mpack_log("found build %p\n", (void*)build);
+    }
+
+    mpack_log("done resolve.\n");
+
+    // We can now restore the error handler and call it if an error occurred.
+    writer->error_fn = error_fn;
+    if (writer->error_fn && mpack_writer_error(writer) != mpack_ok)
+        writer->error_fn(writer, writer->error);
+}
+
+static void mpack_builder_complete(mpack_writer_t* writer, mpack_type_t type) {
+    mpack_writer_track_pop_builder(writer, type);
+    if (mpack_writer_error(writer) != mpack_ok)
+        return;
+
+    mpack_builder_t* builder = &writer->builder;
+    mpack_assert(builder->current_build != NULL, "no build in progress!");
+    mpack_assert(builder->latest_build != NULL, "missing latest build!");
+    mpack_assert(builder->current_build->type == type, "completing wrong type!");
+    mpack_log("completing build %p\n", (void*)builder->current_build);
+
+    if (builder->current_build->key_needs_value) {
+        mpack_break("an odd number of elements were written in a map!");
+        mpack_writer_flag_error(writer, mpack_error_bug);
+        return;
+    }
+
+    if (builder->current_build->nested_compound_elements != 0) {
+        mpack_break("there is a nested unfinished non-build map or array in this build.");
+        mpack_writer_flag_error(writer, mpack_error_bug);
+        return;
+    }
+
+    // We need to apply whatever writes have been made to the current build
+    // before popping it.
+    mpack_builder_apply_writes(writer);
+
+    // For a nested build, we just switch the current build back to its parent.
+    if (builder->current_build->parent != NULL) {
+        mpack_log("setting current build to parent build %p. latest is still %p.\n",
+                (void*)builder->current_build->parent, (void*)builder->latest_build);
+        builder->current_build = builder->current_build->parent;
+        mpack_builder_configure_buffer(writer);
+    } else {
+        // We're completing the final build.
+        mpack_builder_resolve(writer);
+    }
+}
+
+void mpack_build_map(mpack_writer_t* writer) {
+    mpack_builder_build(writer, mpack_type_map);
+}
+
+void mpack_build_array(mpack_writer_t* writer) {
+    mpack_builder_build(writer, mpack_type_array);
+}
+
+void mpack_complete_map(mpack_writer_t* writer) {
+    mpack_builder_complete(writer, mpack_type_map);
+}
+
+void mpack_complete_array(mpack_writer_t* writer) {
+    mpack_builder_complete(writer, mpack_type_array);
+}
+
+#endif // MPACK_BUILDER
+#endif // MPACK_WRITER
+
+MPACK_SILENCE_WARNINGS_END
+
+/* mpack/mpack-reader.c.c */
+
+#define MPACK_INTERNAL 1
+
+/* #include "mpack-reader.h" */
+
+MPACK_SILENCE_WARNINGS_BEGIN
+
+#if MPACK_READER
+
+static void mpack_reader_skip_using_fill(mpack_reader_t* reader, size_t count);
+
+void mpack_reader_init(mpack_reader_t* reader, char* buffer, size_t size, size_t count) {
+    mpack_assert(buffer != NULL, "buffer is NULL");
+
+    mpack_memset(reader, 0, sizeof(*reader));
+    reader->buffer = buffer;
+    reader->size = size;
+    reader->data = buffer;
+    reader->end = buffer + count;
+
+    #if MPACK_READ_TRACKING
+    mpack_reader_flag_if_error(reader, mpack_track_init(&reader->track));
+    #endif
+
+    mpack_log("===========================\n");
+    mpack_log("initializing reader with buffer size %i\n", (int)size);
+}
+
+void mpack_reader_init_error(mpack_reader_t* reader, mpack_error_t error) {
+    mpack_memset(reader, 0, sizeof(*reader));
+    reader->error = error;
+
+    mpack_log("===========================\n");
+    mpack_log("initializing reader error state %i\n", (int)error);
+}
+
+void mpack_reader_init_data(mpack_reader_t* reader, const char* data, size_t count) {
+    mpack_assert(data != NULL, "data is NULL");
+
+    mpack_memset(reader, 0, sizeof(*reader));
+    reader->data = data;
+    reader->end = data + count;
+
+    #if MPACK_READ_TRACKING
+    mpack_reader_flag_if_error(reader, mpack_track_init(&reader->track));
+    #endif
+
+    mpack_log("===========================\n");
+    mpack_log("initializing reader with data size %i\n", (int)count);
+}
+
+void mpack_reader_set_fill(mpack_reader_t* reader, mpack_reader_fill_t fill) {
+    MPACK_STATIC_ASSERT(MPACK_READER_MINIMUM_BUFFER_SIZE >= MPACK_MAXIMUM_TAG_SIZE,
+            "minimum buffer size must fit any tag!");
+
+    if (reader->size == 0) {
+        mpack_break("cannot use fill function without a writeable buffer!");
+        mpack_reader_flag_error(reader, mpack_error_bug);
+        return;
+    }
+
+    if (reader->size < MPACK_READER_MINIMUM_BUFFER_SIZE) {
+        mpack_break("buffer size is %i, but minimum buffer size for fill is %i",
+                (int)reader->size, MPACK_READER_MINIMUM_BUFFER_SIZE);
+        mpack_reader_flag_error(reader, mpack_error_bug);
+        return;
+    }
+
+    reader->fill = fill;
+}
+
+void mpack_reader_set_skip(mpack_reader_t* reader, mpack_reader_skip_t skip) {
+    mpack_assert(reader->size != 0, "cannot use skip function without a writeable buffer!");
+    reader->skip = skip;
+}
+
+#if MPACK_STDIO
+static size_t mpack_file_reader_fill(mpack_reader_t* reader, char* buffer, size_t count) {
+    if (feof((FILE *)reader->context)) {
+       mpack_reader_flag_error(reader, mpack_error_eof);
+       return 0;
+    }
+    return fread((void*)buffer, 1, count, (FILE*)reader->context);
+}
+
+static void mpack_file_reader_skip(mpack_reader_t* reader, size_t count) {
+    if (mpack_reader_error(reader) != mpack_ok)
+        return;
+    FILE* file = (FILE*)reader->context;
+
+    // We call ftell() to test whether the stream is seekable
+    // without causing a file error.
+    if (ftell(file) >= 0) {
+        mpack_log("seeking forward %i bytes\n", (int)count);
+        if (fseek(file, (long int)count, SEEK_CUR) == 0)
+            return;
+        mpack_log("fseek() didn't return zero!\n");
+        if (ferror(file)) {
+            mpack_reader_flag_error(reader, mpack_error_io);
+            return;
+        }
+    }
+
+    // If the stream is not seekable, fall back to the fill function.
+    mpack_reader_skip_using_fill(reader, count);
+}
+
+static void mpack_file_reader_teardown(mpack_reader_t* reader) {
+    MPACK_FREE(reader->buffer);
+    reader->buffer = NULL;
+    reader->context = NULL;
+    reader->size = 0;
+    reader->fill = NULL;
+    reader->skip = NULL;
+    reader->teardown = NULL;
+}
+
+static void mpack_file_reader_teardown_close(mpack_reader_t* reader) {
+    FILE* file = (FILE*)reader->context;
+
+    if (file) {
+        int ret = fclose(file);
+        if (ret != 0)
+            mpack_reader_flag_error(reader, mpack_error_io);
+    }
+
+    mpack_file_reader_teardown(reader);
+}
+
+void mpack_reader_init_stdfile(mpack_reader_t* reader, FILE* file, bool close_when_done) {
+    mpack_assert(file != NULL, "file is NULL");
+
+    size_t capacity = MPACK_BUFFER_SIZE;
+    char* buffer = (char*)MPACK_MALLOC(capacity);
+    if (buffer == NULL) {
+        mpack_reader_init_error(reader, mpack_error_memory);
+        if (close_when_done) {
+            fclose(file);
+        }
+        return;
+    }
+
+    mpack_reader_init(reader, buffer, capacity, 0);
+    mpack_reader_set_context(reader, file);
+    mpack_reader_set_fill(reader, mpack_file_reader_fill);
+    mpack_reader_set_skip(reader, mpack_file_reader_skip);
+    mpack_reader_set_teardown(reader, close_when_done ?
+            mpack_file_reader_teardown_close :
+            mpack_file_reader_teardown);
+}
+
+void mpack_reader_init_filename(mpack_reader_t* reader, const char* filename) {
+    mpack_assert(filename != NULL, "filename is NULL");
+
+    FILE* file = fopen(filename, "rb");
+    if (file == NULL) {
+        mpack_reader_init_error(reader, mpack_error_io);
+        return;
+    }
+
+    mpack_reader_init_stdfile(reader, file, true);
+}
+#endif
+
+mpack_error_t mpack_reader_destroy(mpack_reader_t* reader) {
+
+    // clean up tracking, asserting if we're not already in an error state
+    #if MPACK_READ_TRACKING
+    mpack_reader_flag_if_error(reader, mpack_track_destroy(&reader->track, mpack_reader_error(reader) != mpack_ok));
+    #endif
+
+    if (reader->teardown)
+        reader->teardown(reader);
+    reader->teardown = NULL;
+
+    return reader->error;
+}
+
+size_t mpack_reader_remaining(mpack_reader_t* reader, const char** data) {
+    if (mpack_reader_error(reader) != mpack_ok)
+        return 0;
+
+    #if MPACK_READ_TRACKING
+    if (mpack_reader_flag_if_error(reader, mpack_track_check_empty(&reader->track)) != mpack_ok)
+        return 0;
+    #endif
+
+    if (data)
+        *data = reader->data;
+    return (size_t)(reader->end - reader->data);
+}
+
+void mpack_reader_flag_error(mpack_reader_t* reader, mpack_error_t error) {
+    mpack_log("reader %p setting error %i: %s\n", (void*)reader, (int)error, mpack_error_to_string(error));
+
+    if (reader->error == mpack_ok) {
+        reader->error = error;
+        reader->end = reader->data;
+        if (reader->error_fn)
+            reader->error_fn(reader, error);
+    }
+}
+
+// Loops on the fill function, reading between the minimum and
+// maximum number of bytes and flagging an error if it fails.
+MPACK_NOINLINE static size_t mpack_fill_range(mpack_reader_t* reader, char* p, size_t min_bytes, size_t max_bytes) {
+    mpack_assert(reader->fill != NULL, "mpack_fill_range() called with no fill function?");
+    mpack_assert(min_bytes > 0, "cannot fill zero bytes!");
+    mpack_assert(max_bytes >= min_bytes, "min_bytes %i cannot be larger than max_bytes %i!",
+            (int)min_bytes, (int)max_bytes);
+
+    size_t count = 0;
+    while (count < min_bytes) {
+        size_t read = reader->fill(reader, p + count, max_bytes - count);
+
+        // Reader fill functions can flag an error or return 0 on failure. We
+        // also guard against functions that return -1 just in case.
+        if (mpack_reader_error(reader) != mpack_ok)
+            return 0;
+        if (read == 0 || read == ((size_t)(-1))) {
+            mpack_reader_flag_error(reader, mpack_error_io);
+            return 0;
+        }
+
+        count += read;
+    }
+    return count;
+}
+
+MPACK_NOINLINE bool mpack_reader_ensure_straddle(mpack_reader_t* reader, size_t count) {
+    mpack_assert(count != 0, "cannot ensure zero bytes!");
+    mpack_assert(reader->error == mpack_ok, "reader cannot be in an error state!");
+
+    mpack_assert(count > (size_t)(reader->end - reader->data),
+            "straddling ensure requested for %i bytes, but there are %i bytes "
+            "left in buffer. call mpack_reader_ensure() instead",
+            (int)count, (int)(reader->end - reader->data));
+
+    // we'll need a fill function to get more data. if there's no
+    // fill function, the buffer should contain an entire MessagePack
+    // object, so we raise mpack_error_invalid instead of mpack_error_io
+    // on truncated data.
+    if (reader->fill == NULL) {
+        mpack_reader_flag_error(reader, mpack_error_invalid);
+        return false;
+    }
+
+    // we need enough space in the buffer. if the buffer is not
+    // big enough, we return mpack_error_too_big (since this is
+    // for an in-place read larger than the buffer size.)
+    if (count > reader->size) {
+        mpack_reader_flag_error(reader, mpack_error_too_big);
+        return false;
+    }
+
+    // move the existing data to the start of the buffer
+    size_t left = (size_t)(reader->end - reader->data);
+    mpack_memmove(reader->buffer, reader->data, left);
+    reader->end -= reader->data - reader->buffer;
+    reader->data = reader->buffer;
+
+    // read at least the necessary number of bytes, accepting up to the
+    // buffer size
+    size_t read = mpack_fill_range(reader, reader->buffer + left,
+            count - left, reader->size - left);
+    if (mpack_reader_error(reader) != mpack_ok)
+        return false;
+    reader->end += read;
+    return true;
+}
+
+// Reads count bytes into p. Used when there are not enough bytes
+// left in the buffer to satisfy a read.
+MPACK_NOINLINE void mpack_read_native_straddle(mpack_reader_t* reader, char* p, size_t count) {
+    mpack_assert(count == 0 || p != NULL, "data pointer for %i bytes is NULL", (int)count);
+
+    if (mpack_reader_error(reader) != mpack_ok) {
+        mpack_memset(p, 0, count);
+        return;
+    }
+
+    size_t left = (size_t)(reader->end - reader->data);
+    mpack_log("big read for %i bytes into %p, %i left in buffer, buffer size %i\n",
+            (int)count, p, (int)left, (int)reader->size);
+
+    if (count <= left) {
+        mpack_assert(0,
+                "big read requested for %i bytes, but there are %i bytes "
+                "left in buffer. call mpack_read_native() instead",
+                (int)count, (int)left);
+        mpack_reader_flag_error(reader, mpack_error_bug);
+        mpack_memset(p, 0, count);
+        return;
+    }
+
+    // we'll need a fill function to get more data. if there's no
+    // fill function, the buffer should contain an entire MessagePack
+    // object, so we raise mpack_error_invalid instead of mpack_error_io
+    // on truncated data.
+    if (reader->fill == NULL) {
+        mpack_reader_flag_error(reader, mpack_error_invalid);
+        mpack_memset(p, 0, count);
+        return;
+    }
+
+    if (reader->size == 0) {
+        // somewhat debatable what error should be returned here. when
+        // initializing a reader with an in-memory buffer it's not
+        // necessarily a bug if the data is blank; it might just have
+        // been truncated to zero. for this reason we return the same
+        // error as if the data was truncated.
+        mpack_reader_flag_error(reader, mpack_error_io);
+        mpack_memset(p, 0, count);
+        return;
+    }
+
+    // flush what's left of the buffer
+    if (left > 0) {
+        mpack_log("flushing %i bytes remaining in buffer\n", (int)left);
+        mpack_memcpy(p, reader->data, left);
+        count -= left;
+        p += left;
+        reader->data += left;
+    }
+
+    // if the remaining data needed is some small fraction of the
+    // buffer size, we'll try to fill the buffer as much as possible
+    // and copy the needed data out.
+    if (count <= reader->size / MPACK_READER_SMALL_FRACTION_DENOMINATOR) {
+        size_t read = mpack_fill_range(reader, reader->buffer, count, reader->size);
+        if (mpack_reader_error(reader) != mpack_ok)
+            return;
+        mpack_memcpy(p, reader->buffer, count);
+        reader->data = reader->buffer + count;
+        reader->end = reader->buffer + read;
+
+    // otherwise we read the remaining data directly into the target.
+    } else {
+        mpack_log("reading %i additional bytes\n", (int)count);
+        mpack_fill_range(reader, p, count, count);
+    }
+}
+
+MPACK_NOINLINE static void mpack_skip_bytes_straddle(mpack_reader_t* reader, size_t count) {
+
+    // we'll need at least a fill function to skip more data. if there's
+    // no fill function, the buffer should contain an entire MessagePack
+    // object, so we raise mpack_error_invalid instead of mpack_error_io
+    // on truncated data. (see mpack_read_native_straddle())
+    if (reader->fill == NULL) {
+        mpack_log("reader has no fill function!\n");
+        mpack_reader_flag_error(reader, mpack_error_invalid);
+        return;
+    }
+
+    // discard whatever's left in the buffer
+    size_t left = (size_t)(reader->end - reader->data);
+    mpack_log("discarding %i bytes still in buffer\n", (int)left);
+    count -= left;
+    reader->data = reader->end;
+
+    // use the skip function if we've got one, and if we're trying
+    // to skip a lot of data. if we only need to skip some tiny
+    // fraction of the buffer size, it's probably better to just
+    // fill the buffer and skip from it instead of trying to seek.
+    if (reader->skip && count > reader->size / 16) {
+        mpack_log("calling skip function for %i bytes\n", (int)count);
+        reader->skip(reader, count);
+        return;
+    }
+
+    mpack_reader_skip_using_fill(reader, count);
+}
+
+void mpack_skip_bytes(mpack_reader_t* reader, size_t count) {
+    if (mpack_reader_error(reader) != mpack_ok)
+        return;
+    mpack_log("skip requested for %i bytes\n", (int)count);
+
+    mpack_reader_track_bytes(reader, count);
+
+    // check if we have enough in the buffer already
+    size_t left = (size_t)(reader->end - reader->data);
+    if (left >= count) {
+        mpack_log("skipping %" PRIu32 " bytes still in buffer\n", (uint32_t)count);
+        reader->data += count;
+        return;
+    }
+
+    mpack_skip_bytes_straddle(reader, count);
+}
+
+MPACK_NOINLINE static void mpack_reader_skip_using_fill(mpack_reader_t* reader, size_t count) {
+    mpack_assert(reader->fill != NULL, "missing fill function!");
+    mpack_assert(reader->data == reader->end, "there are bytes left in the buffer!");
+    mpack_assert(reader->error == mpack_ok, "should not have called this in an error state (%i)", reader->error);
+    mpack_log("skip using fill for %i bytes\n", (int)count);
+
+    // fill and discard multiples of the buffer size
+    while (count > reader->size) {
+        mpack_log("filling and discarding buffer of %i bytes\n", (int)reader->size);
+        if (mpack_fill_range(reader, reader->buffer, reader->size, reader->size) < reader->size) {
+            mpack_reader_flag_error(reader, mpack_error_io);
+            return;
+        }
+        count -= reader->size;
+    }
+
+    // fill the buffer as much as possible
+    reader->data = reader->buffer;
+    size_t read = mpack_fill_range(reader, reader->buffer, count, reader->size);
+    if (read < count) {
+        mpack_reader_flag_error(reader, mpack_error_io);
+        return;
+    }
+    reader->end = reader->data + read;
+    mpack_log("filled %i bytes into buffer; discarding %i bytes\n", (int)read, (int)count);
+    reader->data += count;
+}
+
+void mpack_read_bytes(mpack_reader_t* reader, char* p, size_t count) {
+    mpack_assert(p != NULL, "destination for read of %i bytes is NULL", (int)count);
+    mpack_reader_track_bytes(reader, count);
+    mpack_read_native(reader, p, count);
+}
+
+void mpack_read_utf8(mpack_reader_t* reader, char* p, size_t byte_count) {
+    mpack_assert(p != NULL, "destination for read of %i bytes is NULL", (int)byte_count);
+    mpack_reader_track_str_bytes_all(reader, byte_count);
+    mpack_read_native(reader, p, byte_count);
+
+    if (mpack_reader_error(reader) == mpack_ok && !mpack_utf8_check(p, byte_count))
+        mpack_reader_flag_error(reader, mpack_error_type);
+}
+
+static void mpack_read_cstr_unchecked(mpack_reader_t* reader, char* buf, size_t buffer_size, size_t byte_count) {
+    mpack_assert(buf != NULL, "destination for read of %i bytes is NULL", (int)byte_count);
+    mpack_assert(buffer_size >= 1, "buffer size is zero; you must have room for at least a null-terminator");
+
+    if (mpack_reader_error(reader)) {
+        buf[0] = 0;
+        return;
+    }
+
+    if (byte_count > buffer_size - 1) {
+        mpack_reader_flag_error(reader, mpack_error_too_big);
+        buf[0] = 0;
+        return;
+    }
+
+    mpack_reader_track_str_bytes_all(reader, byte_count);
+    mpack_read_native(reader, buf, byte_count);
+    buf[byte_count] = 0;
+}
+
+void mpack_read_cstr(mpack_reader_t* reader, char* buf, size_t buffer_size, size_t byte_count) {
+    mpack_read_cstr_unchecked(reader, buf, buffer_size, byte_count);
+
+    // check for null bytes
+    if (mpack_reader_error(reader) == mpack_ok && !mpack_str_check_no_null(buf, byte_count)) {
+        buf[0] = 0;
+        mpack_reader_flag_error(reader, mpack_error_type);
+    }
+}
+
+void mpack_read_utf8_cstr(mpack_reader_t* reader, char* buf, size_t buffer_size, size_t byte_count) {
+    mpack_read_cstr_unchecked(reader, buf, buffer_size, byte_count);
+
+    // check encoding
+    if (mpack_reader_error(reader) == mpack_ok && !mpack_utf8_check_no_null(buf, byte_count)) {
+        buf[0] = 0;
+        mpack_reader_flag_error(reader, mpack_error_type);
+    }
+}
+
+#ifdef MPACK_MALLOC
+// Reads native bytes with error callback disabled. This allows MPack reader functions
+// to hold an allocated buffer and read native data into it without leaking it in
+// case of a non-local jump (longjmp, throw) out of an error handler.
+static void mpack_read_native_noerrorfn(mpack_reader_t* reader, char* p, size_t count) {
+    mpack_assert(reader->error == mpack_ok, "cannot call if an error is already flagged!");
+    mpack_reader_error_t error_fn = reader->error_fn;
+    reader->error_fn = NULL;
+    mpack_read_native(reader, p, count);
+    reader->error_fn = error_fn;
+}
+
+char* mpack_read_bytes_alloc_impl(mpack_reader_t* reader, size_t count, bool null_terminated) {
+
+    // track the bytes first in case it jumps
+    mpack_reader_track_bytes(reader, count);
+    if (mpack_reader_error(reader) != mpack_ok)
+        return NULL;
+
+    // cannot allocate zero bytes. this is not an error.
+    if (count == 0 && null_terminated == false)
+        return NULL;
+
+    // allocate data
+    char* data = (char*)MPACK_MALLOC(count + (null_terminated ? 1 : 0)); // TODO: can this overflow?
+    if (data == NULL) {
+        mpack_reader_flag_error(reader, mpack_error_memory);
+        return NULL;
+    }
+
+    // read with error callback disabled so we don't leak our buffer
+    mpack_read_native_noerrorfn(reader, data, count);
+
+    // report flagged errors
+    if (mpack_reader_error(reader) != mpack_ok) {
+        MPACK_FREE(data);
+        if (reader->error_fn)
+            reader->error_fn(reader, mpack_reader_error(reader));
+        return NULL;
+    }
+
+    if (null_terminated)
+        data[count] = '\0';
+    return data;
+}
+#endif
+
+// read inplace without tracking (since there are different
+// tracking modes for different inplace readers)
+static const char* mpack_read_bytes_inplace_notrack(mpack_reader_t* reader, size_t count) {
+    if (mpack_reader_error(reader) != mpack_ok)
+        return NULL;
+
+    // if we have enough bytes already in the buffer, we can return it directly.
+    if ((size_t)(reader->end - reader->data) >= count) {
+        const char* bytes = reader->data;
+        reader->data += count;
+        return bytes;
+    }
+
+    if (!mpack_reader_ensure(reader, count))
+        return NULL;
+
+    const char* bytes = reader->data;
+    reader->data += count;
+    return bytes;
+}
+
+const char* mpack_read_bytes_inplace(mpack_reader_t* reader, size_t count) {
+    mpack_reader_track_bytes(reader, count);
+    return mpack_read_bytes_inplace_notrack(reader, count);
+}
+
+const char* mpack_read_utf8_inplace(mpack_reader_t* reader, size_t count) {
+    mpack_reader_track_str_bytes_all(reader, count);
+    const char* str = mpack_read_bytes_inplace_notrack(reader, count);
+
+    if (mpack_reader_error(reader) == mpack_ok && !mpack_utf8_check(str, count)) {
+        mpack_reader_flag_error(reader, mpack_error_type);
+        return NULL;
+    }
+
+    return str;
+}
+
+static size_t mpack_parse_tag(mpack_reader_t* reader, mpack_tag_t* tag) {
+    mpack_assert(reader->error == mpack_ok, "reader cannot be in an error state!");
+
+    if (!mpack_reader_ensure(reader, 1))
+        return 0;
+    uint8_t type = mpack_load_u8(reader->data);
+
+    // unfortunately, by far the fastest way to parse a tag is to switch
+    // on the first byte, and to explicitly list every possible byte. so for
+    // infix types, the list of cases is quite large.
+    //
+    // in size-optimized builds, we switch on the top four bits first to
+    // handle most infix types with a smaller jump table to save space.
+
+    #if MPACK_OPTIMIZE_FOR_SIZE
+    switch (type >> 4) {
+
+        // positive fixnum
+        case 0x0: case 0x1: case 0x2: case 0x3:
+        case 0x4: case 0x5: case 0x6: case 0x7:
+            *tag = mpack_tag_make_uint(type);
+            return 1;
+
+        // negative fixnum
+        case 0xe: case 0xf:
+            *tag = mpack_tag_make_int((int8_t)type);
+            return 1;
+
+        // fixmap
+        case 0x8:
+            *tag = mpack_tag_make_map(type & ~0xf0u);
+            return 1;
+
+        // fixarray
+        case 0x9:
+            *tag = mpack_tag_make_array(type & ~0xf0u);
+            return 1;
+
+        // fixstr
+        case 0xa: case 0xb:
+            *tag = mpack_tag_make_str(type & ~0xe0u);
+            return 1;
+
+        // not one of the common infix types
+        default:
+            break;
+
+    }
+    #endif
+
+    // handle individual type tags
+    switch (type) {
+
+        #if !MPACK_OPTIMIZE_FOR_SIZE
+        // positive fixnum
+        case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06: case 0x07:
+        case 0x08: case 0x09: case 0x0a: case 0x0b: case 0x0c: case 0x0d: case 0x0e: case 0x0f:
+        case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17:
+        case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f:
+        case 0x20: case 0x21: case 0x22: case 0x23: case 0x24: case 0x25: case 0x26: case 0x27:
+        case 0x28: case 0x29: case 0x2a: case 0x2b: case 0x2c: case 0x2d: case 0x2e: case 0x2f:
+        case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37:
+        case 0x38: case 0x39: case 0x3a: case 0x3b: case 0x3c: case 0x3d: case 0x3e: case 0x3f:
+        case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
+        case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f:
+        case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57:
+        case 0x58: case 0x59: case 0x5a: case 0x5b: case 0x5c: case 0x5d: case 0x5e: case 0x5f:
+        case 0x60: case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67:
+        case 0x68: case 0x69: case 0x6a: case 0x6b: case 0x6c: case 0x6d: case 0x6e: case 0x6f:
+        case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77:
+        case 0x78: case 0x79: case 0x7a: case 0x7b: case 0x7c: case 0x7d: case 0x7e: case 0x7f:
+            *tag = mpack_tag_make_uint(type);
+            return 1;
+
+        // negative fixnum
+        case 0xe0: case 0xe1: case 0xe2: case 0xe3: case 0xe4: case 0xe5: case 0xe6: case 0xe7:
+        case 0xe8: case 0xe9: case 0xea: case 0xeb: case 0xec: case 0xed: case 0xee: case 0xef:
+        case 0xf0: case 0xf1: case 0xf2: case 0xf3: case 0xf4: case 0xf5: case 0xf6: case 0xf7:
+        case 0xf8: case 0xf9: case 0xfa: case 0xfb: case 0xfc: case 0xfd: case 0xfe: case 0xff:
+            *tag = mpack_tag_make_int((int8_t)type);
+            return 1;
+
+        // fixmap
+        case 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: case 0x87:
+        case 0x88: case 0x89: case 0x8a: case 0x8b: case 0x8c: case 0x8d: case 0x8e: case 0x8f:
+            *tag = mpack_tag_make_map(type & ~0xf0u);
+            return 1;
+
+        // fixarray
+        case 0x90: case 0x91: case 0x92: case 0x93: case 0x94: case 0x95: case 0x96: case 0x97:
+        case 0x98: case 0x99: case 0x9a: case 0x9b: case 0x9c: case 0x9d: case 0x9e: case 0x9f:
+            *tag = mpack_tag_make_array(type & ~0xf0u);
+            return 1;
+
+        // fixstr
+        case 0xa0: case 0xa1: case 0xa2: case 0xa3: case 0xa4: case 0xa5: case 0xa6: case 0xa7:
+        case 0xa8: case 0xa9: case 0xaa: case 0xab: case 0xac: case 0xad: case 0xae: case 0xaf:
+        case 0xb0: case 0xb1: case 0xb2: case 0xb3: case 0xb4: case 0xb5: case 0xb6: case 0xb7:
+        case 0xb8: case 0xb9: case 0xba: case 0xbb: case 0xbc: case 0xbd: case 0xbe: case 0xbf:
+            *tag = mpack_tag_make_str(type & ~0xe0u);
+            return 1;
+        #endif
+
+        // nil
+        case 0xc0:
+            *tag = mpack_tag_make_nil();
+            return 1;
+
+        // bool
+        case 0xc2: case 0xc3:
+            *tag = mpack_tag_make_bool((bool)(type & 1));
+            return 1;
+
+        // bin8
+        case 0xc4:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_BIN8))
+                return 0;
+            *tag = mpack_tag_make_bin(mpack_load_u8(reader->data + 1));
+            return MPACK_TAG_SIZE_BIN8;
+
+        // bin16
+        case 0xc5:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_BIN16))
+                return 0;
+            *tag = mpack_tag_make_bin(mpack_load_u16(reader->data + 1));
+            return MPACK_TAG_SIZE_BIN16;
+
+        // bin32
+        case 0xc6:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_BIN32))
+                return 0;
+            *tag = mpack_tag_make_bin(mpack_load_u32(reader->data + 1));
+            return MPACK_TAG_SIZE_BIN32;
+
+        #if MPACK_EXTENSIONS
+        // ext8
+        case 0xc7:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_EXT8))
+                return 0;
+            *tag = mpack_tag_make_ext(mpack_load_i8(reader->data + 2), mpack_load_u8(reader->data + 1));
+            return MPACK_TAG_SIZE_EXT8;
+
+        // ext16
+        case 0xc8:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_EXT16))
+                return 0;
+            *tag = mpack_tag_make_ext(mpack_load_i8(reader->data + 3), mpack_load_u16(reader->data + 1));
+            return MPACK_TAG_SIZE_EXT16;
+
+        // ext32
+        case 0xc9:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_EXT32))
+                return 0;
+            *tag = mpack_tag_make_ext(mpack_load_i8(reader->data + 5), mpack_load_u32(reader->data + 1));
+            return MPACK_TAG_SIZE_EXT32;
+        #endif
+
+        // float
+        case 0xca:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_FLOAT))
+                return 0;
+            #if MPACK_FLOAT
+            *tag = mpack_tag_make_float(mpack_load_float(reader->data + 1));
+            #else
+            *tag = mpack_tag_make_raw_float(mpack_load_u32(reader->data + 1));
+            #endif
+            return MPACK_TAG_SIZE_FLOAT;
+
+        // double
+        case 0xcb:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_DOUBLE))
+                return 0;
+            #if MPACK_DOUBLE
+            *tag = mpack_tag_make_double(mpack_load_double(reader->data + 1));
+            #else
+            *tag = mpack_tag_make_raw_double(mpack_load_u64(reader->data + 1));
+            #endif
+            return MPACK_TAG_SIZE_DOUBLE;
+
+        // uint8
+        case 0xcc:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_U8))
+                return 0;
+            *tag = mpack_tag_make_uint(mpack_load_u8(reader->data + 1));
+            return MPACK_TAG_SIZE_U8;
+
+        // uint16
+        case 0xcd:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_U16))
+                return 0;
+            *tag = mpack_tag_make_uint(mpack_load_u16(reader->data + 1));
+            return MPACK_TAG_SIZE_U16;
+
+        // uint32
+        case 0xce:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_U32))
+                return 0;
+            *tag = mpack_tag_make_uint(mpack_load_u32(reader->data + 1));
+            return MPACK_TAG_SIZE_U32;
+
+        // uint64
+        case 0xcf:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_U64))
+                return 0;
+            *tag = mpack_tag_make_uint(mpack_load_u64(reader->data + 1));
+            return MPACK_TAG_SIZE_U64;
+
+        // int8
+        case 0xd0:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_I8))
+                return 0;
+            *tag = mpack_tag_make_int(mpack_load_i8(reader->data + 1));
+            return MPACK_TAG_SIZE_I8;
+
+        // int16
+        case 0xd1:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_I16))
+                return 0;
+            *tag = mpack_tag_make_int(mpack_load_i16(reader->data + 1));
+            return MPACK_TAG_SIZE_I16;
+
+        // int32
+        case 0xd2:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_I32))
+                return 0;
+            *tag = mpack_tag_make_int(mpack_load_i32(reader->data + 1));
+            return MPACK_TAG_SIZE_I32;
+
+        // int64
+        case 0xd3:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_I64))
+                return 0;
+            *tag = mpack_tag_make_int(mpack_load_i64(reader->data + 1));
+            return MPACK_TAG_SIZE_I64;
+
+        #if MPACK_EXTENSIONS
+        // fixext1
+        case 0xd4:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_FIXEXT1))
+                return 0;
+            *tag = mpack_tag_make_ext(mpack_load_i8(reader->data + 1), 1);
+            return MPACK_TAG_SIZE_FIXEXT1;
+
+        // fixext2
+        case 0xd5:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_FIXEXT2))
+                return 0;
+            *tag = mpack_tag_make_ext(mpack_load_i8(reader->data + 1), 2);
+            return MPACK_TAG_SIZE_FIXEXT2;
+
+        // fixext4
+        case 0xd6:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_FIXEXT4))
+                return 0;
+            *tag = mpack_tag_make_ext(mpack_load_i8(reader->data + 1), 4);
+            return 2;
+
+        // fixext8
+        case 0xd7:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_FIXEXT8))
+                return 0;
+            *tag = mpack_tag_make_ext(mpack_load_i8(reader->data + 1), 8);
+            return MPACK_TAG_SIZE_FIXEXT8;
+
+        // fixext16
+        case 0xd8:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_FIXEXT16))
+                return 0;
+            *tag = mpack_tag_make_ext(mpack_load_i8(reader->data + 1), 16);
+            return MPACK_TAG_SIZE_FIXEXT16;
+        #endif
+
+        // str8
+        case 0xd9:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_STR8))
+                return 0;
+            *tag = mpack_tag_make_str(mpack_load_u8(reader->data + 1));
+            return MPACK_TAG_SIZE_STR8;
+
+        // str16
+        case 0xda:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_STR16))
+                return 0;
+            *tag = mpack_tag_make_str(mpack_load_u16(reader->data + 1));
+            return MPACK_TAG_SIZE_STR16;
+
+        // str32
+        case 0xdb:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_STR32))
+                return 0;
+            *tag = mpack_tag_make_str(mpack_load_u32(reader->data + 1));
+            return MPACK_TAG_SIZE_STR32;
+
+        // array16
+        case 0xdc:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_ARRAY16))
+                return 0;
+            *tag = mpack_tag_make_array(mpack_load_u16(reader->data + 1));
+            return MPACK_TAG_SIZE_ARRAY16;
+
+        // array32
+        case 0xdd:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_ARRAY32))
+                return 0;
+            *tag = mpack_tag_make_array(mpack_load_u32(reader->data + 1));
+            return MPACK_TAG_SIZE_ARRAY32;
+
+        // map16
+        case 0xde:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_MAP16))
+                return 0;
+            *tag = mpack_tag_make_map(mpack_load_u16(reader->data + 1));
+            return MPACK_TAG_SIZE_MAP16;
+
+        // map32
+        case 0xdf:
+            if (!mpack_reader_ensure(reader, MPACK_TAG_SIZE_MAP32))
+                return 0;
+            *tag = mpack_tag_make_map(mpack_load_u32(reader->data + 1));
+            return MPACK_TAG_SIZE_MAP32;
+
+        // reserved
+        case 0xc1:
+            mpack_reader_flag_error(reader, mpack_error_invalid);
+            return 0;
+
+        #if !MPACK_EXTENSIONS
+        // ext
+        case 0xc7: // fallthrough
+        case 0xc8: // fallthrough
+        case 0xc9: // fallthrough
+        // fixext
+        case 0xd4: // fallthrough
+        case 0xd5: // fallthrough
+        case 0xd6: // fallthrough
+        case 0xd7: // fallthrough
+        case 0xd8:
+            mpack_reader_flag_error(reader, mpack_error_unsupported);
+            return 0;
+        #endif
+
+        #if MPACK_OPTIMIZE_FOR_SIZE
+        // any other bytes should have been handled by the infix switch
+        default:
+            break;
+        #endif
+    }
+
+    mpack_assert(0, "unreachable");
+    return 0;
+}
+
+mpack_tag_t mpack_read_tag(mpack_reader_t* reader) {
+    mpack_log("reading tag\n");
+
+    // make sure we can read a tag
+    if (mpack_reader_error(reader) != mpack_ok)
+        return mpack_tag_nil();
+    if (mpack_reader_track_element(reader) != mpack_ok)
+        return mpack_tag_nil();
+
+    mpack_tag_t tag = MPACK_TAG_ZERO;
+    size_t count = mpack_parse_tag(reader, &tag);
+    if (count == 0)
+        return mpack_tag_nil();
+
+    #if MPACK_READ_TRACKING
+    mpack_error_t track_error = mpack_ok;
+
+    switch (tag.type) {
+        case mpack_type_map:
+        case mpack_type_array:
+            track_error = mpack_track_push(&reader->track, tag.type, tag.v.n);
+            break;
+        #if MPACK_EXTENSIONS
+        case mpack_type_ext:
+        #endif
+        case mpack_type_str:
+        case mpack_type_bin:
+            track_error = mpack_track_push(&reader->track, tag.type, tag.v.l);
+            break;
+        default:
+            break;
+    }
+
+    if (track_error != mpack_ok) {
+        mpack_reader_flag_error(reader, track_error);
+        return mpack_tag_nil();
+    }
+    #endif
+
+    reader->data += count;
+    return tag;
+}
+
+mpack_tag_t mpack_peek_tag(mpack_reader_t* reader) {
+    mpack_log("peeking tag\n");
+
+    // make sure we can peek a tag
+    if (mpack_reader_error(reader) != mpack_ok)
+        return mpack_tag_nil();
+    if (mpack_reader_track_peek_element(reader) != mpack_ok)
+        return mpack_tag_nil();
+
+    mpack_tag_t tag = MPACK_TAG_ZERO;
+    if (mpack_parse_tag(reader, &tag) == 0)
+        return mpack_tag_nil();
+    return tag;
+}
+
+void mpack_discard(mpack_reader_t* reader) {
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (mpack_reader_error(reader))
+        return;
+    switch (var.type) {
+        case mpack_type_str:
+            mpack_skip_bytes(reader, var.v.l);
+            mpack_done_str(reader);
+            break;
+        case mpack_type_bin:
+            mpack_skip_bytes(reader, var.v.l);
+            mpack_done_bin(reader);
+            break;
+        #if MPACK_EXTENSIONS
+        case mpack_type_ext:
+            mpack_skip_bytes(reader, var.v.l);
+            mpack_done_ext(reader);
+            break;
+        #endif
+        case mpack_type_array: {
+            for (; var.v.n > 0; --var.v.n) {
+                mpack_discard(reader);
+                if (mpack_reader_error(reader))
+                    break;
+            }
+            mpack_done_array(reader);
+            break;
+        }
+        case mpack_type_map: {
+            for (; var.v.n > 0; --var.v.n) {
+                mpack_discard(reader);
+                mpack_discard(reader);
+                if (mpack_reader_error(reader))
+                    break;
+            }
+            mpack_done_map(reader);
+            break;
+        }
+        default:
+            break;
+    }
+}
+
+#if MPACK_EXTENSIONS
+mpack_timestamp_t mpack_read_timestamp(mpack_reader_t* reader, size_t size) {
+    mpack_timestamp_t timestamp = {0, 0};
+
+    if (size != 4 && size != 8 && size != 12) {
+        mpack_reader_flag_error(reader, mpack_error_invalid);
+        return timestamp;
+    }
+
+    char buf[12];
+    mpack_read_bytes(reader, buf, size);
+    mpack_done_ext(reader);
+    if (mpack_reader_error(reader) != mpack_ok)
+        return timestamp;
+
+    switch (size) {
+        case 4:
+            timestamp.seconds = (int64_t)(uint64_t)mpack_load_u32(buf);
+            break;
+
+        case 8: {
+            uint64_t packed = mpack_load_u64(buf);
+            timestamp.seconds = (int64_t)(packed & ((MPACK_UINT64_C(1) << 34) - 1));
+            timestamp.nanoseconds = (uint32_t)(packed >> 34);
+            break;
+        }
+
+        case 12:
+            timestamp.nanoseconds = mpack_load_u32(buf);
+            timestamp.seconds = mpack_load_i64(buf + 4);
+            break;
+
+        default:
+            mpack_assert(false, "unreachable");
+            break;
+    }
+
+    if (timestamp.nanoseconds > MPACK_TIMESTAMP_NANOSECONDS_MAX) {
+        mpack_reader_flag_error(reader, mpack_error_invalid);
+        mpack_timestamp_t zero = {0, 0};
+        return zero;
+    }
+
+    return timestamp;
+}
+#endif
+
+#if MPACK_READ_TRACKING
+void mpack_done_type(mpack_reader_t* reader, mpack_type_t type) {
+    if (mpack_reader_error(reader) == mpack_ok)
+        mpack_reader_flag_if_error(reader, mpack_track_pop(&reader->track, type));
+}
+#endif
+
+#if MPACK_DEBUG && MPACK_STDIO
+static size_t mpack_print_read_prefix(mpack_reader_t* reader, size_t length, char* buffer, size_t buffer_size) {
+    if (length == 0)
+        return 0;
+
+    size_t read = (length < buffer_size) ? length : buffer_size;
+    mpack_read_bytes(reader, buffer, read);
+    if (mpack_reader_error(reader) != mpack_ok)
+        return 0;
+
+    mpack_skip_bytes(reader, length - read);
+    return read;
+}
+
+static void mpack_print_element(mpack_reader_t* reader, mpack_print_t* print, size_t depth) {
+    mpack_tag_t val = mpack_read_tag(reader);
+    if (mpack_reader_error(reader) != mpack_ok)
+        return;
+
+    // We read some bytes from bin and ext so we can print its prefix in hex.
+    char buffer[MPACK_PRINT_BYTE_COUNT];
+    size_t count = 0;
+    size_t i, j;
+
+    switch (val.type) {
+        case mpack_type_str:
+            mpack_print_append_cstr(print, "\"");
+            for (i = 0; i < val.v.l; ++i) {
+                char c;
+                mpack_read_bytes(reader, &c, 1);
+                if (mpack_reader_error(reader) != mpack_ok)
+                    return;
+                switch (c) {
+                    case '\n': mpack_print_append_cstr(print, "\\n"); break;
+                    case '\\': mpack_print_append_cstr(print, "\\\\"); break;
+                    case '"': mpack_print_append_cstr(print, "\\\""); break;
+                    default: mpack_print_append(print, &c, 1); break;
+                }
+            }
+            mpack_print_append_cstr(print, "\"");
+            mpack_done_str(reader);
+            return;
+
+        case mpack_type_array:
+            mpack_print_append_cstr(print, "[\n");
+            for (i = 0; i < val.v.n; ++i) {
+                for (j = 0; j < depth + 1; ++j)
+                    mpack_print_append_cstr(print, "    ");
+                mpack_print_element(reader, print, depth + 1);
+                if (mpack_reader_error(reader) != mpack_ok)
+                    return;
+                if (i != val.v.n - 1)
+                    mpack_print_append_cstr(print, ",");
+                mpack_print_append_cstr(print, "\n");
+            }
+            for (i = 0; i < depth; ++i)
+                mpack_print_append_cstr(print, "    ");
+            mpack_print_append_cstr(print, "]");
+            mpack_done_array(reader);
+            return;
+
+        case mpack_type_map:
+            mpack_print_append_cstr(print, "{\n");
+            for (i = 0; i < val.v.n; ++i) {
+                for (j = 0; j < depth + 1; ++j)
+                    mpack_print_append_cstr(print, "    ");
+                mpack_print_element(reader, print, depth + 1);
+                if (mpack_reader_error(reader) != mpack_ok)
+                    return;
+                mpack_print_append_cstr(print, ": ");
+                mpack_print_element(reader, print, depth + 1);
+                if (mpack_reader_error(reader) != mpack_ok)
+                    return;
+                if (i != val.v.n - 1)
+                    mpack_print_append_cstr(print, ",");
+                mpack_print_append_cstr(print, "\n");
+            }
+            for (i = 0; i < depth; ++i)
+                mpack_print_append_cstr(print, "    ");
+            mpack_print_append_cstr(print, "}");
+            mpack_done_map(reader);
+            return;
+
+        // The above cases return so as not to print a pseudo-json value. The
+        // below cases break and print pseudo-json.
+
+        case mpack_type_bin:
+            count = mpack_print_read_prefix(reader, mpack_tag_bin_length(&val), buffer, sizeof(buffer));
+            mpack_done_bin(reader);
+            break;
+
+        #if MPACK_EXTENSIONS
+        case mpack_type_ext:
+            count = mpack_print_read_prefix(reader, mpack_tag_ext_length(&val), buffer, sizeof(buffer));
+            mpack_done_ext(reader);
+            break;
+        #endif
+
+        default:
+            break;
+    }
+
+    char buf[256];
+    mpack_tag_debug_pseudo_json(val, buf, sizeof(buf), buffer, count);
+    mpack_print_append_cstr(print, buf);
+}
+
+static void mpack_print_and_destroy(mpack_reader_t* reader, mpack_print_t* print, size_t depth) {
+    size_t i;
+    for (i = 0; i < depth; ++i)
+        mpack_print_append_cstr(print, "    ");
+    mpack_print_element(reader, print, depth);
+
+    size_t remaining = mpack_reader_remaining(reader, NULL);
+
+    char buf[256];
+    if (mpack_reader_destroy(reader) != mpack_ok) {
+        mpack_snprintf(buf, sizeof(buf), "\n<mpack parsing error %s>", mpack_error_to_string(mpack_reader_error(reader)));
+        buf[sizeof(buf) - 1] = '\0';
+        mpack_print_append_cstr(print, buf);
+    } else if (remaining > 0) {
+        mpack_snprintf(buf, sizeof(buf), "\n<%i extra bytes at end of message>", (int)remaining);
+        buf[sizeof(buf) - 1] = '\0';
+        mpack_print_append_cstr(print, buf);
+    }
+}
+
+static void mpack_print_data(const char* data, size_t len, mpack_print_t* print, size_t depth) {
+    mpack_reader_t reader;
+    mpack_reader_init_data(&reader, data, len);
+    mpack_print_and_destroy(&reader, print, depth);
+}
+
+void mpack_print_data_to_buffer(const char* data, size_t data_size, char* buffer, size_t buffer_size) {
+    if (buffer_size == 0) {
+        mpack_assert(false, "buffer size is zero!");
+        return;
+    }
+
+    mpack_print_t print;
+    mpack_memset(&print, 0, sizeof(print));
+    print.buffer = buffer;
+    print.size = buffer_size;
+    mpack_print_data(data, data_size, &print, 0);
+    mpack_print_append(&print, "",  1); // null-terminator
+    mpack_print_flush(&print);
+
+    // we always make sure there's a null-terminator at the end of the buffer
+    // in case we ran out of space.
+    print.buffer[print.size - 1] = '\0';
+}
+
+void mpack_print_data_to_callback(const char* data, size_t size, mpack_print_callback_t callback, void* context) {
+    char buffer[1024];
+    mpack_print_t print;
+    mpack_memset(&print, 0, sizeof(print));
+    print.buffer = buffer;
+    print.size = sizeof(buffer);
+    print.callback = callback;
+    print.context = context;
+    mpack_print_data(data, size, &print, 0);
+    mpack_print_flush(&print);
+}
+
+void mpack_print_data_to_file(const char* data, size_t len, FILE* file) {
+    mpack_assert(data != NULL, "data is NULL");
+    mpack_assert(file != NULL, "file is NULL");
+
+    char buffer[1024];
+    mpack_print_t print;
+    mpack_memset(&print, 0, sizeof(print));
+    print.buffer = buffer;
+    print.size = sizeof(buffer);
+    print.callback = &mpack_print_file_callback;
+    print.context = file;
+
+    mpack_print_data(data, len, &print, 2);
+    mpack_print_append_cstr(&print, "\n");
+    mpack_print_flush(&print);
+}
+
+void mpack_print_stdfile_to_callback(FILE* file, mpack_print_callback_t callback, void* context) {
+    char buffer[1024];
+    mpack_print_t print;
+    mpack_memset(&print, 0, sizeof(print));
+    print.buffer = buffer;
+    print.size = sizeof(buffer);
+    print.callback = callback;
+    print.context = context;
+
+    mpack_reader_t reader;
+    mpack_reader_init_stdfile(&reader, file, false);
+    mpack_print_and_destroy(&reader, &print, 0);
+    mpack_print_flush(&print);
+}
+#endif
+
+#endif
+
+MPACK_SILENCE_WARNINGS_END
+
+/* mpack/mpack-expect.c.c */
+
+#define MPACK_INTERNAL 1
+
+/* #include "mpack-expect.h" */
+
+MPACK_SILENCE_WARNINGS_BEGIN
+
+#if MPACK_EXPECT
+
+
+// Helpers
+
+MPACK_STATIC_INLINE uint8_t mpack_expect_native_u8(mpack_reader_t* reader) {
+    if (mpack_reader_error(reader) != mpack_ok)
+        return 0;
+    uint8_t type;
+    if (!mpack_reader_ensure(reader, sizeof(type)))
+        return 0;
+    type = mpack_load_u8(reader->data);
+    reader->data += sizeof(type);
+    return type;
+}
+
+#if !MPACK_OPTIMIZE_FOR_SIZE
+MPACK_STATIC_INLINE uint16_t mpack_expect_native_u16(mpack_reader_t* reader) {
+    if (mpack_reader_error(reader) != mpack_ok)
+        return 0;
+    uint16_t type;
+    if (!mpack_reader_ensure(reader, sizeof(type)))
+        return 0;
+    type = mpack_load_u16(reader->data);
+    reader->data += sizeof(type);
+    return type;
+}
+
+MPACK_STATIC_INLINE uint32_t mpack_expect_native_u32(mpack_reader_t* reader) {
+    if (mpack_reader_error(reader) != mpack_ok)
+        return 0;
+    uint32_t type;
+    if (!mpack_reader_ensure(reader, sizeof(type)))
+        return 0;
+    type = mpack_load_u32(reader->data);
+    reader->data += sizeof(type);
+    return type;
+}
+#endif
+
+MPACK_STATIC_INLINE uint8_t mpack_expect_type_byte(mpack_reader_t* reader) {
+    mpack_reader_track_element(reader);
+    return mpack_expect_native_u8(reader);
+}
+
+
+// Basic Number Functions
+
+uint8_t mpack_expect_u8(mpack_reader_t* reader) {
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (var.type == mpack_type_uint) {
+        if (var.v.u <= MPACK_UINT8_MAX)
+            return (uint8_t)var.v.u;
+    } else if (var.type == mpack_type_int) {
+        if (var.v.i >= 0 && var.v.i <= MPACK_UINT8_MAX)
+            return (uint8_t)var.v.i;
+    }
+    mpack_reader_flag_error(reader, mpack_error_type);
+    return 0;
+}
+
+uint16_t mpack_expect_u16(mpack_reader_t* reader) {
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (var.type == mpack_type_uint) {
+        if (var.v.u <= MPACK_UINT16_MAX)
+            return (uint16_t)var.v.u;
+    } else if (var.type == mpack_type_int) {
+        if (var.v.i >= 0 && var.v.i <= MPACK_UINT16_MAX)
+            return (uint16_t)var.v.i;
+    }
+    mpack_reader_flag_error(reader, mpack_error_type);
+    return 0;
+}
+
+uint32_t mpack_expect_u32(mpack_reader_t* reader) {
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (var.type == mpack_type_uint) {
+        if (var.v.u <= MPACK_UINT32_MAX)
+            return (uint32_t)var.v.u;
+    } else if (var.type == mpack_type_int) {
+        if (var.v.i >= 0 && var.v.i <= MPACK_UINT32_MAX)
+            return (uint32_t)var.v.i;
+    }
+    mpack_reader_flag_error(reader, mpack_error_type);
+    return 0;
+}
+
+uint64_t mpack_expect_u64(mpack_reader_t* reader) {
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (var.type == mpack_type_uint) {
+        return var.v.u;
+    } else if (var.type == mpack_type_int) {
+        if (var.v.i >= 0)
+            return (uint64_t)var.v.i;
+    }
+    mpack_reader_flag_error(reader, mpack_error_type);
+    return 0;
+}
+
+int8_t mpack_expect_i8(mpack_reader_t* reader) {
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (var.type == mpack_type_uint) {
+        if (var.v.u <= MPACK_INT8_MAX)
+            return (int8_t)var.v.u;
+    } else if (var.type == mpack_type_int) {
+        if (var.v.i >= MPACK_INT8_MIN && var.v.i <= MPACK_INT8_MAX)
+            return (int8_t)var.v.i;
+    }
+    mpack_reader_flag_error(reader, mpack_error_type);
+    return 0;
+}
+
+int16_t mpack_expect_i16(mpack_reader_t* reader) {
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (var.type == mpack_type_uint) {
+        if (var.v.u <= MPACK_INT16_MAX)
+            return (int16_t)var.v.u;
+    } else if (var.type == mpack_type_int) {
+        if (var.v.i >= MPACK_INT16_MIN && var.v.i <= MPACK_INT16_MAX)
+            return (int16_t)var.v.i;
+    }
+    mpack_reader_flag_error(reader, mpack_error_type);
+    return 0;
+}
+
+int32_t mpack_expect_i32(mpack_reader_t* reader) {
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (var.type == mpack_type_uint) {
+        if (var.v.u <= MPACK_INT32_MAX)
+            return (int32_t)var.v.u;
+    } else if (var.type == mpack_type_int) {
+        if (var.v.i >= MPACK_INT32_MIN && var.v.i <= MPACK_INT32_MAX)
+            return (int32_t)var.v.i;
+    }
+    mpack_reader_flag_error(reader, mpack_error_type);
+    return 0;
+}
+
+int64_t mpack_expect_i64(mpack_reader_t* reader) {
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (var.type == mpack_type_uint) {
+        if (var.v.u <= MPACK_INT64_MAX)
+            return (int64_t)var.v.u;
+    } else if (var.type == mpack_type_int) {
+        return var.v.i;
+    }
+    mpack_reader_flag_error(reader, mpack_error_type);
+    return 0;
+}
+
+#if MPACK_FLOAT
+float mpack_expect_float(mpack_reader_t* reader) {
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (var.type == mpack_type_uint)
+        return (float)var.v.u;
+    if (var.type == mpack_type_int)
+        return (float)var.v.i;
+    if (var.type == mpack_type_float)
+        return var.v.f;
+
+    if (var.type == mpack_type_double) {
+        #if MPACK_DOUBLE
+        return (float)var.v.d;
+        #else
+        return mpack_shorten_raw_double_to_float(var.v.d);
+        #endif
+    }
+
+    mpack_reader_flag_error(reader, mpack_error_type);
+    return 0.0f;
+}
+#endif
+
+#if MPACK_DOUBLE
+double mpack_expect_double(mpack_reader_t* reader) {
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (var.type == mpack_type_uint)
+        return (double)var.v.u;
+    else if (var.type == mpack_type_int)
+        return (double)var.v.i;
+    else if (var.type == mpack_type_float)
+        return (double)var.v.f;
+    else if (var.type == mpack_type_double)
+        return var.v.d;
+    mpack_reader_flag_error(reader, mpack_error_type);
+    return 0.0;
+}
+#endif
+
+#if MPACK_FLOAT
+float mpack_expect_float_strict(mpack_reader_t* reader) {
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (var.type == mpack_type_float)
+        return var.v.f;
+    mpack_reader_flag_error(reader, mpack_error_type);
+    return 0.0f;
+}
+#endif
+
+#if MPACK_DOUBLE
+double mpack_expect_double_strict(mpack_reader_t* reader) {
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (var.type == mpack_type_float)
+        return (double)var.v.f;
+    else if (var.type == mpack_type_double)
+        return var.v.d;
+    mpack_reader_flag_error(reader, mpack_error_type);
+    return 0.0;
+}
+#endif
+
+#if !MPACK_FLOAT
+uint32_t mpack_expect_raw_float(mpack_reader_t* reader) {
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (var.type == mpack_type_float)
+        return var.v.f;
+    mpack_reader_flag_error(reader, mpack_error_type);
+    return 0;
+}
+#endif
+
+#if !MPACK_DOUBLE
+uint64_t mpack_expect_raw_double(mpack_reader_t* reader) {
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (var.type == mpack_type_double)
+        return var.v.d;
+    mpack_reader_flag_error(reader, mpack_error_type);
+    return 0;
+}
+#endif
+
+
+// Ranged Number Functions
+//
+// All ranged functions are identical other than the type, so we
+// define their content with a macro. The prototypes are still written
+// out in full to support ctags/IDE tools.
+
+#define MPACK_EXPECT_RANGE_IMPL(name, type_t)                           \
+                                                                        \
+    /* make sure the range is sensible */                               \
+    mpack_assert(min_value <= max_value,                                \
+            "min_value %i must be less than or equal to max_value %i",  \
+            min_value, max_value);                                      \
+                                                                        \
+    /* read the value */                                                \
+    type_t val = mpack_expect_##name(reader);                           \
+    if (mpack_reader_error(reader) != mpack_ok)                         \
+        return min_value;                                               \
+                                                                        \
+    /* make sure it fits */                                             \
+    if (val < min_value || val > max_value) {                           \
+        mpack_reader_flag_error(reader, mpack_error_type);              \
+        return min_value;                                               \
+    }                                                                   \
+                                                                        \
+    return val;
+
+uint8_t mpack_expect_u8_range(mpack_reader_t* reader, uint8_t min_value, uint8_t max_value) {MPACK_EXPECT_RANGE_IMPL(u8, uint8_t)}
+uint16_t mpack_expect_u16_range(mpack_reader_t* reader, uint16_t min_value, uint16_t max_value) {MPACK_EXPECT_RANGE_IMPL(u16, uint16_t)}
+uint32_t mpack_expect_u32_range(mpack_reader_t* reader, uint32_t min_value, uint32_t max_value) {MPACK_EXPECT_RANGE_IMPL(u32, uint32_t)}
+uint64_t mpack_expect_u64_range(mpack_reader_t* reader, uint64_t min_value, uint64_t max_value) {MPACK_EXPECT_RANGE_IMPL(u64, uint64_t)}
+
+int8_t mpack_expect_i8_range(mpack_reader_t* reader, int8_t min_value, int8_t max_value) {MPACK_EXPECT_RANGE_IMPL(i8, int8_t)}
+int16_t mpack_expect_i16_range(mpack_reader_t* reader, int16_t min_value, int16_t max_value) {MPACK_EXPECT_RANGE_IMPL(i16, int16_t)}
+int32_t mpack_expect_i32_range(mpack_reader_t* reader, int32_t min_value, int32_t max_value) {MPACK_EXPECT_RANGE_IMPL(i32, int32_t)}
+int64_t mpack_expect_i64_range(mpack_reader_t* reader, int64_t min_value, int64_t max_value) {MPACK_EXPECT_RANGE_IMPL(i64, int64_t)}
+
+#if MPACK_FLOAT
+float mpack_expect_float_range(mpack_reader_t* reader, float min_value, float max_value) {MPACK_EXPECT_RANGE_IMPL(float, float)}
+#endif
+#if MPACK_DOUBLE
+double mpack_expect_double_range(mpack_reader_t* reader, double min_value, double max_value) {MPACK_EXPECT_RANGE_IMPL(double, double)}
+#endif
+
+uint32_t mpack_expect_map_range(mpack_reader_t* reader, uint32_t min_value, uint32_t max_value) {MPACK_EXPECT_RANGE_IMPL(map, uint32_t)}
+uint32_t mpack_expect_array_range(mpack_reader_t* reader, uint32_t min_value, uint32_t max_value) {MPACK_EXPECT_RANGE_IMPL(array, uint32_t)}
+
+
+// Matching Number Functions
+
+void mpack_expect_uint_match(mpack_reader_t* reader, uint64_t value) {
+    if (mpack_expect_u64(reader) != value)
+        mpack_reader_flag_error(reader, mpack_error_type);
+}
+
+void mpack_expect_int_match(mpack_reader_t* reader, int64_t value) {
+    if (mpack_expect_i64(reader) != value)
+        mpack_reader_flag_error(reader, mpack_error_type);
+}
+
+
+// Other Basic Types
+
+void mpack_expect_nil(mpack_reader_t* reader) {
+    if (mpack_expect_type_byte(reader) != 0xc0)
+        mpack_reader_flag_error(reader, mpack_error_type);
+}
+
+bool mpack_expect_bool(mpack_reader_t* reader) {
+    uint8_t type = mpack_expect_type_byte(reader);
+    if ((type & ~1) != 0xc2)
+        mpack_reader_flag_error(reader, mpack_error_type);
+    return (bool)(type & 1);
+}
+
+void mpack_expect_true(mpack_reader_t* reader) {
+    if (mpack_expect_bool(reader) != true)
+        mpack_reader_flag_error(reader, mpack_error_type);
+}
+
+void mpack_expect_false(mpack_reader_t* reader) {
+    if (mpack_expect_bool(reader) != false)
+        mpack_reader_flag_error(reader, mpack_error_type);
+}
+
+#if MPACK_EXTENSIONS
+mpack_timestamp_t mpack_expect_timestamp(mpack_reader_t* reader) {
+    mpack_timestamp_t zero = {0, 0};
+
+    mpack_tag_t tag = mpack_read_tag(reader);
+    if (tag.type != mpack_type_ext) {
+        mpack_reader_flag_error(reader, mpack_error_type);
+        return zero;
+    }
+    if (mpack_tag_ext_exttype(&tag) != MPACK_EXTTYPE_TIMESTAMP) {
+        mpack_reader_flag_error(reader, mpack_error_type);
+        return zero;
+    }
+
+    return mpack_read_timestamp(reader, mpack_tag_ext_length(&tag));
+}
+
+int64_t mpack_expect_timestamp_truncate(mpack_reader_t* reader) {
+    return mpack_expect_timestamp(reader).seconds;
+}
+#endif
+
+
+// Compound Types
+
+uint32_t mpack_expect_map(mpack_reader_t* reader) {
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (var.type == mpack_type_map)
+        return var.v.n;
+    mpack_reader_flag_error(reader, mpack_error_type);
+    return 0;
+}
+
+void mpack_expect_map_match(mpack_reader_t* reader, uint32_t count) {
+    if (mpack_expect_map(reader) != count)
+        mpack_reader_flag_error(reader, mpack_error_type);
+}
+
+bool mpack_expect_map_or_nil(mpack_reader_t* reader, uint32_t* count) {
+    mpack_assert(count != NULL, "count cannot be NULL");
+
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (var.type == mpack_type_nil) {
+        *count = 0;
+        return false;
+    }
+    if (var.type == mpack_type_map) {
+        *count = var.v.n;
+        return true;
+    }
+    mpack_reader_flag_error(reader, mpack_error_type);
+    *count = 0;
+    return false;
+}
+
+bool mpack_expect_map_max_or_nil(mpack_reader_t* reader, uint32_t max_count, uint32_t* count) {
+    mpack_assert(count != NULL, "count cannot be NULL");
+
+    bool has_map = mpack_expect_map_or_nil(reader, count);
+    if (has_map && *count > max_count) {
+        *count = 0;
+        mpack_reader_flag_error(reader, mpack_error_type);
+        return false;
+    }
+    return has_map;
+}
+
+uint32_t mpack_expect_array(mpack_reader_t* reader) {
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (var.type == mpack_type_array)
+        return var.v.n;
+    mpack_reader_flag_error(reader, mpack_error_type);
+    return 0;
+}
+
+void mpack_expect_array_match(mpack_reader_t* reader, uint32_t count) {
+    if (mpack_expect_array(reader) != count)
+        mpack_reader_flag_error(reader, mpack_error_type);
+}
+
+bool mpack_expect_array_or_nil(mpack_reader_t* reader, uint32_t* count) {
+    mpack_assert(count != NULL, "count cannot be NULL");
+
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (var.type == mpack_type_nil) {
+        *count = 0;
+        return false;
+    }
+    if (var.type == mpack_type_array) {
+        *count = var.v.n;
+        return true;
+    }
+    mpack_reader_flag_error(reader, mpack_error_type);
+    *count = 0;
+    return false;
+}
+
+bool mpack_expect_array_max_or_nil(mpack_reader_t* reader, uint32_t max_count, uint32_t* count) {
+    mpack_assert(count != NULL, "count cannot be NULL");
+
+    bool has_array = mpack_expect_array_or_nil(reader, count);
+    if (has_array && *count > max_count) {
+        *count = 0;
+        mpack_reader_flag_error(reader, mpack_error_type);
+        return false;
+    }
+    return has_array;
+}
+
+#ifdef MPACK_MALLOC
+void* mpack_expect_array_alloc_impl(mpack_reader_t* reader, size_t element_size, uint32_t max_count, uint32_t* out_count, bool allow_nil) {
+    mpack_assert(out_count != NULL, "out_count cannot be NULL");
+    *out_count = 0;
+
+    uint32_t count;
+    bool has_array = true;
+    if (allow_nil)
+        has_array = mpack_expect_array_max_or_nil(reader, max_count, &count);
+    else
+        count = mpack_expect_array_max(reader, max_count);
+    if (mpack_reader_error(reader))
+        return NULL;
+
+    // size 0 is not an error; we return NULL for no elements.
+    if (count == 0) {
+        // we call mpack_done_array() automatically ONLY if we are using
+        // the _or_nil variant. this is the only way to allow nil and empty
+        // to work the same way.
+        if (allow_nil && has_array)
+            mpack_done_array(reader);
+        return NULL;
+    }
+
+    void* p = MPACK_MALLOC(element_size * count);
+    if (p == NULL) {
+        mpack_reader_flag_error(reader, mpack_error_memory);
+        return NULL;
+    }
+
+    *out_count = count;
+    return p;
+}
+#endif
+
+
+// Str, Bin and Ext Functions
+
+uint32_t mpack_expect_str(mpack_reader_t* reader) {
+    #if MPACK_OPTIMIZE_FOR_SIZE
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (var.type == mpack_type_str)
+        return var.v.l;
+    mpack_reader_flag_error(reader, mpack_error_type);
+    return 0;
+    #else
+    uint8_t type = mpack_expect_type_byte(reader);
+    uint32_t count;
+
+    if ((type >> 5) == 5) {
+        count = type & (uint8_t)~0xe0;
+    } else if (type == 0xd9) {
+        count = mpack_expect_native_u8(reader);
+    } else if (type == 0xda) {
+        count = mpack_expect_native_u16(reader);
+    } else if (type == 0xdb) {
+        count = mpack_expect_native_u32(reader);
+    } else {
+        mpack_reader_flag_error(reader, mpack_error_type);
+        return 0;
+    }
+
+    #if MPACK_READ_TRACKING
+    mpack_reader_flag_if_error(reader, mpack_track_push(&reader->track, mpack_type_str, count));
+    #endif
+    return count;
+    #endif
+}
+
+size_t mpack_expect_str_buf(mpack_reader_t* reader, char* buf, size_t bufsize) {
+    mpack_assert(buf != NULL, "buf cannot be NULL");
+
+    size_t length = mpack_expect_str(reader);
+    if (mpack_reader_error(reader))
+        return 0;
+
+    if (length > bufsize) {
+        mpack_reader_flag_error(reader, mpack_error_too_big);
+        return 0;
+    }
+
+    mpack_read_bytes(reader, buf, length);
+    if (mpack_reader_error(reader))
+        return 0;
+
+    mpack_done_str(reader);
+    return length;
+}
+
+size_t mpack_expect_utf8(mpack_reader_t* reader, char* buf, size_t size) {
+    mpack_assert(buf != NULL, "buf cannot be NULL");
+
+    size_t length = mpack_expect_str_buf(reader, buf, size);
+
+    if (!mpack_utf8_check(buf, length)) {
+        mpack_reader_flag_error(reader, mpack_error_type);
+        return 0;
+    }
+
+    return length;
+}
+
+uint32_t mpack_expect_bin(mpack_reader_t* reader) {
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (var.type == mpack_type_bin)
+        return var.v.l;
+    mpack_reader_flag_error(reader, mpack_error_type);
+    return 0;
+}
+
+size_t mpack_expect_bin_buf(mpack_reader_t* reader, char* buf, size_t bufsize) {
+    mpack_assert(buf != NULL, "buf cannot be NULL");
+
+    size_t binsize = mpack_expect_bin(reader);
+    if (mpack_reader_error(reader))
+        return 0;
+    if (binsize > bufsize) {
+        mpack_reader_flag_error(reader, mpack_error_too_big);
+        return 0;
+    }
+    mpack_read_bytes(reader, buf, binsize);
+    if (mpack_reader_error(reader))
+        return 0;
+    mpack_done_bin(reader);
+    return binsize;
+}
+
+void mpack_expect_bin_size_buf(mpack_reader_t* reader, char* buf, uint32_t size) {
+    mpack_assert(buf != NULL, "buf cannot be NULL");
+    mpack_expect_bin_size(reader, size);
+    mpack_read_bytes(reader, buf, size);
+    mpack_done_bin(reader);
+}
+
+#if MPACK_EXTENSIONS
+uint32_t mpack_expect_ext(mpack_reader_t* reader, int8_t* type) {
+    mpack_tag_t var = mpack_read_tag(reader);
+    if (var.type == mpack_type_ext) {
+        *type = mpack_tag_ext_exttype(&var);
+        return mpack_tag_ext_length(&var);
+    }
+    *type = 0;
+    mpack_reader_flag_error(reader, mpack_error_type);
+    return 0;
+}
+
+size_t mpack_expect_ext_buf(mpack_reader_t* reader, int8_t* type, char* buf, size_t bufsize) {
+    mpack_assert(buf != NULL, "buf cannot be NULL");
+
+    size_t extsize = mpack_expect_ext(reader, type);
+    if (mpack_reader_error(reader))
+        return 0;
+    if (extsize > bufsize) {
+        *type = 0;
+        mpack_reader_flag_error(reader, mpack_error_too_big);
+        return 0;
+    }
+    mpack_read_bytes(reader, buf, extsize);
+    if (mpack_reader_error(reader)) {
+        *type = 0;
+        return 0;
+    }
+    mpack_done_ext(reader);
+    return extsize;
+}
+#endif
+
+void mpack_expect_cstr(mpack_reader_t* reader, char* buf, size_t bufsize) {
+    uint32_t length = mpack_expect_str(reader);
+    mpack_read_cstr(reader, buf, bufsize, length);
+    mpack_done_str(reader);
+}
+
+void mpack_expect_utf8_cstr(mpack_reader_t* reader, char* buf, size_t bufsize) {
+    uint32_t length = mpack_expect_str(reader);
+    mpack_read_utf8_cstr(reader, buf, bufsize, length);
+    mpack_done_str(reader);
+}
+
+#ifdef MPACK_MALLOC
+static char* mpack_expect_cstr_alloc_unchecked(mpack_reader_t* reader, size_t maxsize, size_t* out_length) {
+    mpack_assert(out_length != NULL, "out_length cannot be NULL");
+    *out_length = 0;
+
+    // make sure argument makes sense
+    if (maxsize < 1) {
+        mpack_break("maxsize is zero; you must have room for at least a null-terminator");
+        mpack_reader_flag_error(reader, mpack_error_bug);
+        return NULL;
+    }
+
+    if (SIZE_MAX < MPACK_UINT32_MAX) {
+        if (maxsize > SIZE_MAX)
+            maxsize = SIZE_MAX;
+    } else {
+        if (maxsize > (size_t)MPACK_UINT32_MAX)
+            maxsize = (size_t)MPACK_UINT32_MAX;
+    }
+
+    size_t length = mpack_expect_str_max(reader, (uint32_t)maxsize - 1);
+    char* str = mpack_read_bytes_alloc_impl(reader, length, true);
+    mpack_done_str(reader);
+
+    if (str)
+        *out_length = length;
+    return str;
+}
+
+char* mpack_expect_cstr_alloc(mpack_reader_t* reader, size_t maxsize) {
+    size_t length;
+    char* str = mpack_expect_cstr_alloc_unchecked(reader, maxsize, &length);
+
+    if (str && !mpack_str_check_no_null(str, length)) {
+        MPACK_FREE(str);
+        mpack_reader_flag_error(reader, mpack_error_type);
+        return NULL;
+    }
+
+    return str;
+}
+
+char* mpack_expect_utf8_cstr_alloc(mpack_reader_t* reader, size_t maxsize) {
+    size_t length;
+    char* str = mpack_expect_cstr_alloc_unchecked(reader, maxsize, &length);
+
+    if (str && !mpack_utf8_check_no_null(str, length)) {
+        MPACK_FREE(str);
+        mpack_reader_flag_error(reader, mpack_error_type);
+        return NULL;
+    }
+
+    return str;
+}
+#endif
+
+void mpack_expect_str_match(mpack_reader_t* reader, const char* str, size_t len) {
+    mpack_assert(str != NULL, "str cannot be NULL");
+
+    // expect a str the correct length
+    if (len > MPACK_UINT32_MAX)
+        mpack_reader_flag_error(reader, mpack_error_type);
+    mpack_expect_str_length(reader, (uint32_t)len);
+    if (mpack_reader_error(reader))
+        return;
+    mpack_reader_track_bytes(reader, (uint32_t)len);
+
+    // check each byte one by one (matched strings are likely to be very small)
+    for (; len > 0; --len) {
+        if (mpack_expect_native_u8(reader) != *str++) {
+            mpack_reader_flag_error(reader, mpack_error_type);
+            return;
+        }
+    }
+
+    mpack_done_str(reader);
+}
+
+void mpack_expect_tag(mpack_reader_t* reader, mpack_tag_t expected) {
+    mpack_tag_t actual = mpack_read_tag(reader);
+    if (!mpack_tag_equal(actual, expected))
+        mpack_reader_flag_error(reader, mpack_error_type);
+}
+
+#ifdef MPACK_MALLOC
+char* mpack_expect_bin_alloc(mpack_reader_t* reader, size_t maxsize, size_t* size) {
+    mpack_assert(size != NULL, "size cannot be NULL");
+    *size = 0;
+
+    if (SIZE_MAX < MPACK_UINT32_MAX) {
+        if (maxsize > SIZE_MAX)
+            maxsize = SIZE_MAX;
+    } else {
+        if (maxsize > (size_t)MPACK_UINT32_MAX)
+            maxsize = (size_t)MPACK_UINT32_MAX;
+    }
+
+    size_t length = mpack_expect_bin_max(reader, (uint32_t)maxsize);
+    if (mpack_reader_error(reader))
+        return NULL;
+
+    char* data = mpack_read_bytes_alloc(reader, length);
+    mpack_done_bin(reader);
+
+    if (data)
+        *size = length;
+    return data;
+}
+#endif
+
+#if MPACK_EXTENSIONS && defined(MPACK_MALLOC)
+char* mpack_expect_ext_alloc(mpack_reader_t* reader, int8_t* type, size_t maxsize, size_t* size) {
+    mpack_assert(size != NULL, "size cannot be NULL");
+    *size = 0;
+
+    if (SIZE_MAX < MPACK_UINT32_MAX) {
+        if (maxsize > SIZE_MAX)
+            maxsize = SIZE_MAX;
+    } else {
+        if (maxsize > (size_t)MPACK_UINT32_MAX)
+            maxsize = (size_t)MPACK_UINT32_MAX;
+    }
+
+    size_t length = mpack_expect_ext_max(reader, type, (uint32_t)maxsize);
+    if (mpack_reader_error(reader))
+        return NULL;
+
+    char* data = mpack_read_bytes_alloc(reader, length);
+    mpack_done_ext(reader);
+
+    if (data) {
+        *size = length;
+    } else {
+        *type = 0;
+    }
+    return data;
+}
+#endif
+
+size_t mpack_expect_enum(mpack_reader_t* reader, const char* strings[], size_t count) {
+
+    // read the string in-place
+    size_t keylen = mpack_expect_str(reader);
+    const char* key = mpack_read_bytes_inplace(reader, keylen);
+    mpack_done_str(reader);
+    if (mpack_reader_error(reader) != mpack_ok)
+        return count;
+
+    // find what key it matches
+    size_t i;
+    for (i = 0; i < count; ++i) {
+        const char* other = strings[i];
+        size_t otherlen = mpack_strlen(other);
+        if (keylen == otherlen && mpack_memcmp(key, other, keylen) == 0)
+            return i;
+    }
+
+    // no matches
+    mpack_reader_flag_error(reader, mpack_error_type);
+    return count;
+}
+
+size_t mpack_expect_enum_optional(mpack_reader_t* reader, const char* strings[], size_t count) {
+    if (mpack_reader_error(reader) != mpack_ok)
+        return count;
+
+    mpack_assert(count != 0, "count cannot be zero; no strings are valid!");
+    mpack_assert(strings != NULL, "strings cannot be NULL");
+
+    // the key is only recognized if it is a string
+    if (mpack_peek_tag(reader).type != mpack_type_str) {
+        mpack_discard(reader);
+        return count;
+    }
+
+    // read the string in-place
+    size_t keylen = mpack_expect_str(reader);
+    const char* key = mpack_read_bytes_inplace(reader, keylen);
+    mpack_done_str(reader);
+    if (mpack_reader_error(reader) != mpack_ok)
+        return count;
+
+    // find what key it matches
+    size_t i;
+    for (i = 0; i < count; ++i) {
+        const char* other = strings[i];
+        size_t otherlen = mpack_strlen(other);
+        if (keylen == otherlen && mpack_memcmp(key, other, keylen) == 0)
+            return i;
+    }
+
+    // no matches
+    return count;
+}
+
+size_t mpack_expect_key_uint(mpack_reader_t* reader, bool found[], size_t count) {
+    if (mpack_reader_error(reader) != mpack_ok)
+        return count;
+
+    if (count == 0) {
+        mpack_break("count cannot be zero; no keys are valid!");
+        mpack_reader_flag_error(reader, mpack_error_bug);
+        return count;
+    }
+    mpack_assert(found != NULL, "found cannot be NULL");
+
+    // the key is only recognized if it is an unsigned int
+    if (mpack_peek_tag(reader).type != mpack_type_uint) {
+        mpack_discard(reader);
+        return count;
+    }
+
+    // read the key
+    uint64_t value = mpack_expect_u64(reader);
+    if (mpack_reader_error(reader) != mpack_ok)
+        return count;
+
+    // unrecognized keys are fine, we just return count
+    if (value >= count)
+        return count;
+
+    // check if this key is a duplicate
+    if (found[value]) {
+        mpack_reader_flag_error(reader, mpack_error_invalid);
+        return count;
+    }
+
+    found[value] = true;
+    return (size_t)value;
+}
+
+size_t mpack_expect_key_cstr(mpack_reader_t* reader, const char* keys[], bool found[], size_t count) {
+    size_t i = mpack_expect_enum_optional(reader, keys, count);
+
+    // unrecognized keys are fine, we just return count
+    if (i == count)
+        return count;
+
+    // check if this key is a duplicate
+    mpack_assert(found != NULL, "found cannot be NULL");
+    if (found[i]) {
+        mpack_reader_flag_error(reader, mpack_error_invalid);
+        return count;
+    }
+
+    found[i] = true;
+    return i;
+}
+
+#endif
+
+MPACK_SILENCE_WARNINGS_END
+
+/* mpack/mpack-node.c.c */
+
+#define MPACK_INTERNAL 1
+
+/* #include "mpack-node.h" */
+
+MPACK_SILENCE_WARNINGS_BEGIN
+
+#if MPACK_NODE
+
+MPACK_STATIC_INLINE const char* mpack_node_data_unchecked(mpack_node_t node) {
+    mpack_assert(mpack_node_error(node) == mpack_ok, "tree is in an error state!");
+
+    mpack_type_t type = node.data->type;
+    MPACK_UNUSED(type);
+    #if MPACK_EXTENSIONS
+    mpack_assert(type == mpack_type_str || type == mpack_type_bin || type == mpack_type_ext,
+            "node of type %i (%s) is not a data type!", type, mpack_type_to_string(type));
+    #else
+    mpack_assert(type == mpack_type_str || type == mpack_type_bin,
+            "node of type %i (%s) is not a data type!", type, mpack_type_to_string(type));
+    #endif
+
+    return node.tree->data + node.data->value.offset;
+}
+
+#if MPACK_EXTENSIONS
+MPACK_STATIC_INLINE int8_t mpack_node_exttype_unchecked(mpack_node_t node) {
+    mpack_assert(mpack_node_error(node) == mpack_ok, "tree is in an error state!");
+
+    mpack_type_t type = node.data->type;
+    MPACK_UNUSED(type);
+    mpack_assert(type == mpack_type_ext, "node of type %i (%s) is not an ext type!",
+            type, mpack_type_to_string(type));
+
+    // the exttype of an ext node is stored in the byte preceding the data
+    return mpack_load_i8(mpack_node_data_unchecked(node) - 1);
+}
+#endif
+
+
+
+/*
+ * Tree Parsing
+ */
+
+#ifdef MPACK_MALLOC
+
+// fix up the alloc size to make sure it exactly fits the
+// maximum number of nodes it can contain (the allocator will
+// waste it back anyway, but we round it down just in case)
+
+#define MPACK_NODES_PER_PAGE \
+    ((MPACK_NODE_PAGE_SIZE - sizeof(mpack_tree_page_t)) / sizeof(mpack_node_data_t) + 1)
+
+#define MPACK_PAGE_ALLOC_SIZE \
+    (sizeof(mpack_tree_page_t) + sizeof(mpack_node_data_t) * (MPACK_NODES_PER_PAGE - 1))
+
+#endif
+
+#ifdef MPACK_MALLOC
+/*
+ * Fills the tree until we have at least enough bytes for the current node.
+ */
+static bool mpack_tree_reserve_fill(mpack_tree_t* tree) {
+    mpack_assert(tree->parser.state == mpack_tree_parse_state_in_progress);
+
+    size_t bytes = tree->parser.current_node_reserved;
+    mpack_assert(bytes > tree->parser.possible_nodes_left,
+            "there are already enough bytes! call mpack_tree_ensure() instead.");
+    mpack_log("filling to reserve %i bytes\n", (int)bytes);
+
+    // if the necessary bytes would put us over the maximum tree
+    // size, fail right away.
+    // TODO: check for overflow?
+    if (tree->data_length + bytes > tree->max_size) {
+        mpack_tree_flag_error(tree, mpack_error_too_big);
+        return false;
+    }
+
+    // we'll need a read function to fetch more data. if there's
+    // no read function, the data should contain an entire message
+    // (or messages), so we flag it as invalid.
+    if (tree->read_fn == NULL) {
+        mpack_log("tree has no read function!\n");
+        mpack_tree_flag_error(tree, mpack_error_invalid);
+        return false;
+    }
+
+    // expand the buffer if needed
+    if (tree->data_length + bytes > tree->buffer_capacity) {
+
+        // TODO: check for overflow?
+        size_t new_capacity = (tree->buffer_capacity == 0) ? MPACK_BUFFER_SIZE : tree->buffer_capacity;
+        while (new_capacity < tree->data_length + bytes)
+            new_capacity *= 2;
+        if (new_capacity > tree->max_size)
+            new_capacity = tree->max_size;
+
+        mpack_log("expanding buffer from %i to %i\n", (int)tree->buffer_capacity, (int)new_capacity);
+
+        char* new_buffer;
+        if (tree->buffer == NULL)
+            new_buffer = (char*)MPACK_MALLOC(new_capacity);
+        else
+            new_buffer = (char*)mpack_realloc(tree->buffer, tree->data_length, new_capacity);
+
+        if (new_buffer == NULL) {
+            mpack_tree_flag_error(tree, mpack_error_memory);
+            return false;
+        }
+
+        tree->data = new_buffer;
+        tree->buffer = new_buffer;
+        tree->buffer_capacity = new_capacity;
+    }
+
+    // request as much data as possible, looping until we have
+    // all the data we need
+    do {
+        size_t read = tree->read_fn(tree, tree->buffer + tree->data_length, tree->buffer_capacity - tree->data_length);
+
+        // If the fill function encounters an error, it should flag an error on
+        // the tree.
+        if (mpack_tree_error(tree) != mpack_ok)
+            return false;
+
+        // We guard against fill functions that return -1 just in case.
+        if (read == (size_t)(-1)) {
+            mpack_tree_flag_error(tree, mpack_error_io);
+            return false;
+        }
+
+        // If the fill function returns 0, the data is not available yet. We
+        // return false to stop parsing the current node.
+        if (read == 0) {
+            mpack_log("not enough data.\n");
+            return false;
+        }
+
+        mpack_log("read %" PRIu32 " more bytes\n", (uint32_t)read);
+        tree->data_length += read;
+        tree->parser.possible_nodes_left += read;
+    } while (tree->parser.possible_nodes_left < bytes);
+
+    return true;
+}
+#endif
+
+/*
+ * Ensures there are enough additional bytes in the tree for the current node
+ * (including reserved bytes for the children of this node, and in addition to
+ * the reserved bytes for children of previous compound nodes), reading more
+ * data if needed.
+ *
+ * extra_bytes is the number of additional bytes to reserve for the current
+ * node beyond the type byte (since one byte is already reserved for each node
+ * by its parent array or map.)
+ *
+ * This may reallocate the tree, which means the tree->data pointer may change!
+ *
+ * Returns false if not enough bytes could be read.
+ */
+MPACK_STATIC_INLINE bool mpack_tree_reserve_bytes(mpack_tree_t* tree, size_t extra_bytes) {
+    mpack_assert(tree->parser.state == mpack_tree_parse_state_in_progress);
+
+    // We guard against overflow here. A compound type could declare more than
+    // MPACK_UINT32_MAX contents which overflows SIZE_MAX on 32-bit platforms. We
+    // flag mpack_error_invalid instead of mpack_error_too_big since it's far
+    // more likely that the message is corrupt than that the data is valid but
+    // not parseable on this architecture (see test_read_node_possible() in
+    // test-node.c .)
+    if ((uint64_t)tree->parser.current_node_reserved + (uint64_t)extra_bytes > SIZE_MAX) {
+        mpack_tree_flag_error(tree, mpack_error_invalid);
+        return false;
+    }
+
+    tree->parser.current_node_reserved += extra_bytes;
+
+    // Note that possible_nodes_left already accounts for reserved bytes for
+    // children of previous compound nodes. So even if there are hundreds of
+    // bytes left in the buffer, we might need to read anyway.
+    if (tree->parser.current_node_reserved <= tree->parser.possible_nodes_left)
+        return true;
+
+    #ifdef MPACK_MALLOC
+    return mpack_tree_reserve_fill(tree);
+    #else
+    return false;
+    #endif
+}
+
+MPACK_STATIC_INLINE size_t mpack_tree_parser_stack_capacity(mpack_tree_t* tree) {
+    #ifdef MPACK_MALLOC
+    return tree->parser.stack_capacity;
+    #else
+    return sizeof(tree->parser.stack) / sizeof(tree->parser.stack[0]);
+    #endif
+}
+
+static bool mpack_tree_push_stack(mpack_tree_t* tree, mpack_node_data_t* first_child, size_t total) {
+    mpack_tree_parser_t* parser = &tree->parser;
+    mpack_assert(parser->state == mpack_tree_parse_state_in_progress);
+
+    // No need to push empty containers
+    if (total == 0)
+        return true;
+
+    // Make sure we have enough room in the stack
+    if (parser->level + 1 == mpack_tree_parser_stack_capacity(tree)) {
+        #ifdef MPACK_MALLOC
+        size_t new_capacity = parser->stack_capacity * 2;
+        mpack_log("growing parse stack to capacity %i\n", (int)new_capacity);
+
+        // Replace the stack-allocated parsing stack
+        if (!parser->stack_owned) {
+            mpack_level_t* new_stack = (mpack_level_t*)MPACK_MALLOC(sizeof(mpack_level_t) * new_capacity);
+            if (!new_stack) {
+                mpack_tree_flag_error(tree, mpack_error_memory);
+                return false;
+            }
+            mpack_memcpy(new_stack, parser->stack, sizeof(mpack_level_t) * parser->stack_capacity);
+            parser->stack = new_stack;
+            parser->stack_owned = true;
+
+        // Realloc the allocated parsing stack
+        } else {
+            mpack_level_t* new_stack = (mpack_level_t*)mpack_realloc(parser->stack,
+                    sizeof(mpack_level_t) * parser->stack_capacity, sizeof(mpack_level_t) * new_capacity);
+            if (!new_stack) {
+                mpack_tree_flag_error(tree, mpack_error_memory);
+                return false;
+            }
+            parser->stack = new_stack;
+        }
+        parser->stack_capacity = new_capacity;
+        #else
+        mpack_tree_flag_error(tree, mpack_error_too_big);
+        return false;
+        #endif
+    }
+
+    // Push the contents of this node onto the parsing stack
+    ++parser->level;
+    parser->stack[parser->level].child = first_child;
+    parser->stack[parser->level].left = total;
+    return true;
+}
+
+static bool mpack_tree_parse_children(mpack_tree_t* tree, mpack_node_data_t* node) {
+    mpack_tree_parser_t* parser = &tree->parser;
+    mpack_assert(parser->state == mpack_tree_parse_state_in_progress);
+
+    mpack_type_t type = node->type;
+    size_t total = node->len;
+
+    // Calculate total elements to read
+    if (type == mpack_type_map) {
+        if ((uint64_t)total * 2 > SIZE_MAX) {
+            mpack_tree_flag_error(tree, mpack_error_too_big);
+            return false;
+        }
+        total *= 2;
+    }
+
+    // Make sure we are under our total node limit (TODO can this overflow?)
+    tree->node_count += total;
+    if (tree->node_count > tree->max_nodes) {
+        mpack_tree_flag_error(tree, mpack_error_too_big);
+        return false;
+    }
+
+    // Each node is at least one byte. Count these bytes now to make
+    // sure there is enough data left.
+    if (!mpack_tree_reserve_bytes(tree, total))
+        return false;
+
+    // If there are enough nodes left in the current page, no need to grow
+    if (total <= parser->nodes_left) {
+        node->value.children = parser->nodes;
+        parser->nodes += total;
+        parser->nodes_left -= total;
+
+    } else {
+
+        #ifdef MPACK_MALLOC
+
+        // We can't grow if we're using a fixed pool (i.e. we didn't start with a page)
+        if (!tree->next) {
+            mpack_tree_flag_error(tree, mpack_error_too_big);
+            return false;
+        }
+
+        // Otherwise we need to grow, and the node's children need to be contiguous.
+        // This is a heuristic to decide whether we should waste the remaining space
+        // in the current page and start a new one, or give the children their
+        // own page. With a fraction of 1/8, this causes at most 12% additional
+        // waste. Note that reducing this too much causes less cache coherence and
+        // more malloc() overhead due to smaller allocations, so there's a tradeoff
+        // here. This heuristic could use some improvement, especially with custom
+        // page sizes.
+
+        mpack_tree_page_t* page;
+
+        if (total > MPACK_NODES_PER_PAGE || parser->nodes_left > MPACK_NODES_PER_PAGE / 8) {
+            // TODO: this should check for overflow
+            page = (mpack_tree_page_t*)MPACK_MALLOC(
+                    sizeof(mpack_tree_page_t) + sizeof(mpack_node_data_t) * (total - 1));
+            if (page == NULL) {
+                mpack_tree_flag_error(tree, mpack_error_memory);
+                return false;
+            }
+            mpack_log("allocated seperate page %p for %i children, %i left in page of %i total\n",
+                    (void*)page, (int)total, (int)parser->nodes_left, (int)MPACK_NODES_PER_PAGE);
+
+            node->value.children = page->nodes;
+
+        } else {
+            page = (mpack_tree_page_t*)MPACK_MALLOC(MPACK_PAGE_ALLOC_SIZE);
+            if (page == NULL) {
+                mpack_tree_flag_error(tree, mpack_error_memory);
+                return false;
+            }
+            mpack_log("allocated new page %p for %i children, wasting %i in page of %i total\n",
+                    (void*)page, (int)total, (int)parser->nodes_left, (int)MPACK_NODES_PER_PAGE);
+
+            node->value.children = page->nodes;
+            parser->nodes = page->nodes + total;
+            parser->nodes_left = MPACK_NODES_PER_PAGE - total;
+        }
+
+        page->next = tree->next;
+        tree->next = page;
+
+        #else
+        // We can't grow if we don't have an allocator
+        mpack_tree_flag_error(tree, mpack_error_too_big);
+        return false;
+        #endif
+    }
+
+    return mpack_tree_push_stack(tree, node->value.children, total);
+}
+
+static bool mpack_tree_parse_bytes(mpack_tree_t* tree, mpack_node_data_t* node) {
+    node->value.offset = tree->size + tree->parser.current_node_reserved + 1;
+    return mpack_tree_reserve_bytes(tree, node->len);
+}
+
+#if MPACK_EXTENSIONS
+static bool mpack_tree_parse_ext(mpack_tree_t* tree, mpack_node_data_t* node) {
+    // reserve space for exttype
+    tree->parser.current_node_reserved += sizeof(int8_t);
+    node->type = mpack_type_ext;
+    return mpack_tree_parse_bytes(tree, node);
+}
+#endif
+
+static bool mpack_tree_parse_node_contents(mpack_tree_t* tree, mpack_node_data_t* node) {
+    mpack_assert(tree->parser.state == mpack_tree_parse_state_in_progress);
+    mpack_assert(node != NULL, "null node?");
+
+    // read the type. we've already accounted for this byte in
+    // possible_nodes_left, so we already know it is in bounds, and we don't
+    // need to reserve it for this node.
+    mpack_assert(tree->data_length > tree->size);
+    uint8_t type = mpack_load_u8(tree->data + tree->size);
+    mpack_log("node type %x\n", type);
+    tree->parser.current_node_reserved = 0;
+
+    // as with mpack_read_tag(), the fastest way to parse a node is to switch
+    // on the first byte, and to explicitly list every possible byte. we switch
+    // on the first four bits in size-optimized builds.
+
+    #if MPACK_OPTIMIZE_FOR_SIZE
+    switch (type >> 4) {
+
+        // positive fixnum
+        case 0x0: case 0x1: case 0x2: case 0x3:
+        case 0x4: case 0x5: case 0x6: case 0x7:
+            node->type = mpack_type_uint;
+            node->value.u = type;
+            return true;
+
+        // negative fixnum
+        case 0xe: case 0xf:
+            node->type = mpack_type_int;
+            node->value.i = (int8_t)type;
+            return true;
+
+        // fixmap
+        case 0x8:
+            node->type = mpack_type_map;
+            node->len = (uint32_t)(type & ~0xf0);
+            return mpack_tree_parse_children(tree, node);
+
+        // fixarray
+        case 0x9:
+            node->type = mpack_type_array;
+            node->len = (uint32_t)(type & ~0xf0);
+            return mpack_tree_parse_children(tree, node);
+
+        // fixstr
+        case 0xa: case 0xb:
+            node->type = mpack_type_str;
+            node->len = (uint32_t)(type & ~0xe0);
+            return mpack_tree_parse_bytes(tree, node);
+
+        // not one of the common infix types
+        default:
+            break;
+    }
+    #endif
+
+    switch (type) {
+
+        #if !MPACK_OPTIMIZE_FOR_SIZE
+        // positive fixnum
+        case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06: case 0x07:
+        case 0x08: case 0x09: case 0x0a: case 0x0b: case 0x0c: case 0x0d: case 0x0e: case 0x0f:
+        case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17:
+        case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f:
+        case 0x20: case 0x21: case 0x22: case 0x23: case 0x24: case 0x25: case 0x26: case 0x27:
+        case 0x28: case 0x29: case 0x2a: case 0x2b: case 0x2c: case 0x2d: case 0x2e: case 0x2f:
+        case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37:
+        case 0x38: case 0x39: case 0x3a: case 0x3b: case 0x3c: case 0x3d: case 0x3e: case 0x3f:
+        case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
+        case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f:
+        case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57:
+        case 0x58: case 0x59: case 0x5a: case 0x5b: case 0x5c: case 0x5d: case 0x5e: case 0x5f:
+        case 0x60: case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67:
+        case 0x68: case 0x69: case 0x6a: case 0x6b: case 0x6c: case 0x6d: case 0x6e: case 0x6f:
+        case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77:
+        case 0x78: case 0x79: case 0x7a: case 0x7b: case 0x7c: case 0x7d: case 0x7e: case 0x7f:
+            node->type = mpack_type_uint;
+            node->value.u = type;
+            return true;
+
+        // negative fixnum
+        case 0xe0: case 0xe1: case 0xe2: case 0xe3: case 0xe4: case 0xe5: case 0xe6: case 0xe7:
+        case 0xe8: case 0xe9: case 0xea: case 0xeb: case 0xec: case 0xed: case 0xee: case 0xef:
+        case 0xf0: case 0xf1: case 0xf2: case 0xf3: case 0xf4: case 0xf5: case 0xf6: case 0xf7:
+        case 0xf8: case 0xf9: case 0xfa: case 0xfb: case 0xfc: case 0xfd: case 0xfe: case 0xff:
+            node->type = mpack_type_int;
+            node->value.i = (int8_t)type;
+            return true;
+
+        // fixmap
+        case 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: case 0x87:
+        case 0x88: case 0x89: case 0x8a: case 0x8b: case 0x8c: case 0x8d: case 0x8e: case 0x8f:
+            node->type = mpack_type_map;
+            node->len = (uint32_t)(type & ~0xf0);
+            return mpack_tree_parse_children(tree, node);
+
+        // fixarray
+        case 0x90: case 0x91: case 0x92: case 0x93: case 0x94: case 0x95: case 0x96: case 0x97:
+        case 0x98: case 0x99: case 0x9a: case 0x9b: case 0x9c: case 0x9d: case 0x9e: case 0x9f:
+            node->type = mpack_type_array;
+            node->len = (uint32_t)(type & ~0xf0);
+            return mpack_tree_parse_children(tree, node);
+
+        // fixstr
+        case 0xa0: case 0xa1: case 0xa2: case 0xa3: case 0xa4: case 0xa5: case 0xa6: case 0xa7:
+        case 0xa8: case 0xa9: case 0xaa: case 0xab: case 0xac: case 0xad: case 0xae: case 0xaf:
+        case 0xb0: case 0xb1: case 0xb2: case 0xb3: case 0xb4: case 0xb5: case 0xb6: case 0xb7:
+        case 0xb8: case 0xb9: case 0xba: case 0xbb: case 0xbc: case 0xbd: case 0xbe: case 0xbf:
+            node->type = mpack_type_str;
+            node->len = (uint32_t)(type & ~0xe0);
+            return mpack_tree_parse_bytes(tree, node);
+        #endif
+
+        // nil
+        case 0xc0:
+            node->type = mpack_type_nil;
+            return true;
+
+        // bool
+        case 0xc2: case 0xc3:
+            node->type = mpack_type_bool;
+            node->value.b = type & 1;
+            return true;
+
+        // bin8
+        case 0xc4:
+            node->type = mpack_type_bin;
+            if (!mpack_tree_reserve_bytes(tree, sizeof(uint8_t)))
+                return false;
+            node->len = mpack_load_u8(tree->data + tree->size + 1);
+            return mpack_tree_parse_bytes(tree, node);
+
+        // bin16
+        case 0xc5:
+            node->type = mpack_type_bin;
+            if (!mpack_tree_reserve_bytes(tree, sizeof(uint16_t)))
+                return false;
+            node->len = mpack_load_u16(tree->data + tree->size + 1);
+            return mpack_tree_parse_bytes(tree, node);
+
+        // bin32
+        case 0xc6:
+            node->type = mpack_type_bin;
+            if (!mpack_tree_reserve_bytes(tree, sizeof(uint32_t)))
+                return false;
+            node->len = mpack_load_u32(tree->data + tree->size + 1);
+            return mpack_tree_parse_bytes(tree, node);
+
+        #if MPACK_EXTENSIONS
+        // ext8
+        case 0xc7:
+            if (!mpack_tree_reserve_bytes(tree, sizeof(uint8_t)))
+                return false;
+            node->len = mpack_load_u8(tree->data + tree->size + 1);
+            return mpack_tree_parse_ext(tree, node);
+
+        // ext16
+        case 0xc8:
+            if (!mpack_tree_reserve_bytes(tree, sizeof(uint16_t)))
+                return false;
+            node->len = mpack_load_u16(tree->data + tree->size + 1);
+            return mpack_tree_parse_ext(tree, node);
+
+        // ext32
+        case 0xc9:
+            if (!mpack_tree_reserve_bytes(tree, sizeof(uint32_t)))
+                return false;
+            node->len = mpack_load_u32(tree->data + tree->size + 1);
+            return mpack_tree_parse_ext(tree, node);
+        #endif
+
+        // float
+        case 0xca:
+            #if MPACK_FLOAT
+            if (!mpack_tree_reserve_bytes(tree, sizeof(float)))
+                return false;
+            node->value.f = mpack_load_float(tree->data + tree->size + 1);
+            #else
+            if (!mpack_tree_reserve_bytes(tree, sizeof(uint32_t)))
+                return false;
+            node->value.f = mpack_load_u32(tree->data + tree->size + 1);
+            #endif
+            node->type = mpack_type_float;
+            return true;
+
+        // double
+        case 0xcb:
+            #if MPACK_DOUBLE
+            if (!mpack_tree_reserve_bytes(tree, sizeof(double)))
+                return false;
+            node->value.d = mpack_load_double(tree->data + tree->size + 1);
+            #else
+            if (!mpack_tree_reserve_bytes(tree, sizeof(uint64_t)))
+                return false;
+            node->value.d = mpack_load_u64(tree->data + tree->size + 1);
+            #endif
+            node->type = mpack_type_double;
+            return true;
+
+        // uint8
+        case 0xcc:
+            node->type = mpack_type_uint;
+            if (!mpack_tree_reserve_bytes(tree, sizeof(uint8_t)))
+                return false;
+            node->value.u = mpack_load_u8(tree->data + tree->size + 1);
+            return true;
+
+        // uint16
+        case 0xcd:
+            node->type = mpack_type_uint;
+            if (!mpack_tree_reserve_bytes(tree, sizeof(uint16_t)))
+                return false;
+            node->value.u = mpack_load_u16(tree->data + tree->size + 1);
+            return true;
+
+        // uint32
+        case 0xce:
+            node->type = mpack_type_uint;
+            if (!mpack_tree_reserve_bytes(tree, sizeof(uint32_t)))
+                return false;
+            node->value.u = mpack_load_u32(tree->data + tree->size + 1);
+            return true;
+
+        // uint64
+        case 0xcf:
+            node->type = mpack_type_uint;
+            if (!mpack_tree_reserve_bytes(tree, sizeof(uint64_t)))
+                return false;
+            node->value.u = mpack_load_u64(tree->data + tree->size + 1);
+            return true;
+
+        // int8
+        case 0xd0:
+            node->type = mpack_type_int;
+            if (!mpack_tree_reserve_bytes(tree, sizeof(int8_t)))
+                return false;
+            node->value.i = mpack_load_i8(tree->data + tree->size + 1);
+            return true;
+
+        // int16
+        case 0xd1:
+            node->type = mpack_type_int;
+            if (!mpack_tree_reserve_bytes(tree, sizeof(int16_t)))
+                return false;
+            node->value.i = mpack_load_i16(tree->data + tree->size + 1);
+            return true;
+
+        // int32
+        case 0xd2:
+            node->type = mpack_type_int;
+            if (!mpack_tree_reserve_bytes(tree, sizeof(int32_t)))
+                return false;
+            node->value.i = mpack_load_i32(tree->data + tree->size + 1);
+            return true;
+
+        // int64
+        case 0xd3:
+            node->type = mpack_type_int;
+            if (!mpack_tree_reserve_bytes(tree, sizeof(int64_t)))
+                return false;
+            node->value.i = mpack_load_i64(tree->data + tree->size + 1);
+            return true;
+
+        #if MPACK_EXTENSIONS
+        // fixext1
+        case 0xd4:
+            node->len = 1;
+            return mpack_tree_parse_ext(tree, node);
+
+        // fixext2
+        case 0xd5:
+            node->len = 2;
+            return mpack_tree_parse_ext(tree, node);
+
+        // fixext4
+        case 0xd6:
+            node->len = 4;
+            return mpack_tree_parse_ext(tree, node);
+
+        // fixext8
+        case 0xd7:
+            node->len = 8;
+            return mpack_tree_parse_ext(tree, node);
+
+        // fixext16
+        case 0xd8:
+            node->len = 16;
+            return mpack_tree_parse_ext(tree, node);
+        #endif
+
+        // str8
+        case 0xd9:
+            if (!mpack_tree_reserve_bytes(tree, sizeof(uint8_t)))
+                return false;
+            node->len = mpack_load_u8(tree->data + tree->size + 1);
+            node->type = mpack_type_str;
+            return mpack_tree_parse_bytes(tree, node);
+
+        // str16
+        case 0xda:
+            if (!mpack_tree_reserve_bytes(tree, sizeof(uint16_t)))
+                return false;
+            node->len = mpack_load_u16(tree->data + tree->size + 1);
+            node->type = mpack_type_str;
+            return mpack_tree_parse_bytes(tree, node);
+
+        // str32
+        case 0xdb:
+            if (!mpack_tree_reserve_bytes(tree, sizeof(uint32_t)))
+                return false;
+            node->len = mpack_load_u32(tree->data + tree->size + 1);
+            node->type = mpack_type_str;
+            return mpack_tree_parse_bytes(tree, node);
+
+        // array16
+        case 0xdc:
+            if (!mpack_tree_reserve_bytes(tree, sizeof(uint16_t)))
+                return false;
+            node->len = mpack_load_u16(tree->data + tree->size + 1);
+            node->type = mpack_type_array;
+            return mpack_tree_parse_children(tree, node);
+
+        // array32
+        case 0xdd:
+            if (!mpack_tree_reserve_bytes(tree, sizeof(uint32_t)))
+                return false;
+            node->len = mpack_load_u32(tree->data + tree->size + 1);
+            node->type = mpack_type_array;
+            return mpack_tree_parse_children(tree, node);
+
+        // map16
+        case 0xde:
+            if (!mpack_tree_reserve_bytes(tree, sizeof(uint16_t)))
+                return false;
+            node->len = mpack_load_u16(tree->data + tree->size + 1);
+            node->type = mpack_type_map;
+            return mpack_tree_parse_children(tree, node);
+
+        // map32
+        case 0xdf:
+            if (!mpack_tree_reserve_bytes(tree, sizeof(uint32_t)))
+                return false;
+            node->len = mpack_load_u32(tree->data + tree->size + 1);
+            node->type = mpack_type_map;
+            return mpack_tree_parse_children(tree, node);
+
+        // reserved
+        case 0xc1:
+            mpack_tree_flag_error(tree, mpack_error_invalid);
+            return false;
+
+        #if !MPACK_EXTENSIONS
+        // ext
+        case 0xc7: // fallthrough
+        case 0xc8: // fallthrough
+        case 0xc9: // fallthrough
+        // fixext
+        case 0xd4: // fallthrough
+        case 0xd5: // fallthrough
+        case 0xd6: // fallthrough
+        case 0xd7: // fallthrough
+        case 0xd8:
+            mpack_tree_flag_error(tree, mpack_error_unsupported);
+            return false;
+        #endif
+
+        #if MPACK_OPTIMIZE_FOR_SIZE
+        // any other bytes should have been handled by the infix switch
+        default:
+            break;
+        #endif
+    }
+
+    mpack_assert(0, "unreachable");
+    return false;
+}
+
+static bool mpack_tree_parse_node(mpack_tree_t* tree, mpack_node_data_t* node) {
+    mpack_log("parsing a node at position %i in level %i\n",
+            (int)tree->size, (int)tree->parser.level);
+
+    if (!mpack_tree_parse_node_contents(tree, node)) {
+        mpack_log("node parsing returned false\n");
+        return false;
+    }
+
+    tree->parser.possible_nodes_left -= tree->parser.current_node_reserved;
+
+    // The reserve for the current node does not include the initial byte
+    // previously reserved as part of its parent.
+    size_t node_size = tree->parser.current_node_reserved + 1;
+
+    // If the parsed type is a map or array, the reserve includes one byte for
+    // each child. We want to subtract these out of possible_nodes_left, but
+    // not out of the current size of the tree.
+    if (node->type == mpack_type_array)
+        node_size -= node->len;
+    else if (node->type == mpack_type_map)
+        node_size -= node->len * 2;
+    tree->size += node_size;
+
+    mpack_log("parsed a node of type %s of %i bytes and "
+            "%i additional bytes reserved for children.\n",
+            mpack_type_to_string(node->type), (int)node_size,
+            (int)tree->parser.current_node_reserved + 1 - (int)node_size);
+
+    return true;
+}
+
+/*
+ * We read nodes in a loop instead of recursively for maximum performance. The
+ * stack holds the amount of children left to read in each level of the tree.
+ * Parsing can pause and resume when more data becomes available.
+ */
+static bool mpack_tree_continue_parsing(mpack_tree_t* tree) {
+    if (mpack_tree_error(tree) != mpack_ok)
+        return false;
+
+    mpack_tree_parser_t* parser = &tree->parser;
+    mpack_assert(parser->state == mpack_tree_parse_state_in_progress);
+    mpack_log("parsing tree elements, %i bytes in buffer\n", (int)tree->data_length);
+
+    // we loop parsing nodes until the parse stack is empty. we break
+    // by returning out of the function.
+    while (true) {
+        mpack_node_data_t* node = parser->stack[parser->level].child;
+        size_t level = parser->level;
+        if (!mpack_tree_parse_node(tree, node))
+            return false;
+        --parser->stack[level].left;
+        ++parser->stack[level].child;
+
+        mpack_assert(mpack_tree_error(tree) == mpack_ok,
+                "mpack_tree_parse_node() should have returned false due to error!");
+
+        // pop empty stack levels, exiting the outer loop when the stack is empty.
+        // (we could tail-optimize containers by pre-emptively popping empty
+        // stack levels before reading the new element, this way we wouldn't
+        // have to loop. but we eventually want to use the parse stack to give
+        // better error messages that contain the location of the error, so
+        // it needs to be complete.)
+        while (parser->stack[parser->level].left == 0) {
+            if (parser->level == 0)
+                return true;
+            --parser->level;
+        }
+    }
+}
+
+static void mpack_tree_cleanup(mpack_tree_t* tree) {
+    MPACK_UNUSED(tree);
+
+    #ifdef MPACK_MALLOC
+    if (tree->parser.stack_owned) {
+        MPACK_FREE(tree->parser.stack);
+        tree->parser.stack = NULL;
+        tree->parser.stack_owned = false;
+    }
+
+    mpack_tree_page_t* page = tree->next;
+    while (page != NULL) {
+        mpack_tree_page_t* next = page->next;
+        mpack_log("freeing page %p\n", (void*)page);
+        MPACK_FREE(page);
+        page = next;
+    }
+    tree->next = NULL;
+    #endif
+}
+
+static bool mpack_tree_parse_start(mpack_tree_t* tree) {
+    if (mpack_tree_error(tree) != mpack_ok)
+        return false;
+
+    mpack_tree_parser_t* parser = &tree->parser;
+    mpack_assert(parser->state != mpack_tree_parse_state_in_progress,
+            "previous parsing was not finished!");
+
+    if (parser->state == mpack_tree_parse_state_parsed)
+        mpack_tree_cleanup(tree);
+
+    mpack_log("starting parse\n");
+    tree->parser.state = mpack_tree_parse_state_in_progress;
+    tree->parser.current_node_reserved = 0;
+
+    // check if we previously parsed a tree
+    if (tree->size > 0) {
+        #ifdef MPACK_MALLOC
+        // if we're buffered, move the remaining data back to the
+        // start of the buffer
+        // TODO: This is not ideal performance-wise. We should only move data
+        // when we need to call the fill function.
+        // TODO: We could consider shrinking the buffer here, especially if we
+        // determine that the fill function is providing less than a quarter of
+        // the buffer size or if messages take up less than a quarter of the
+        // buffer size. Maybe this should be configurable.
+        if (tree->buffer != NULL) {
+            mpack_memmove(tree->buffer, tree->buffer + tree->size, tree->data_length - tree->size);
+        }
+        else
+        #endif
+        // otherwise advance past the parsed data
+        {
+            tree->data += tree->size;
+        }
+        tree->data_length -= tree->size;
+        tree->size = 0;
+        tree->node_count = 0;
+    }
+
+    // make sure we have at least one byte available before allocating anything
+    parser->possible_nodes_left = tree->data_length;
+    if (!mpack_tree_reserve_bytes(tree, sizeof(uint8_t))) {
+        tree->parser.state = mpack_tree_parse_state_not_started;
+        return false;
+    }
+    mpack_log("parsing tree at %p starting with byte %x\n", tree->data, (uint8_t)tree->data[0]);
+    parser->possible_nodes_left -= 1;
+    tree->node_count = 1;
+
+    #ifdef MPACK_MALLOC
+    parser->stack = parser->stack_local;
+    parser->stack_owned = false;
+    parser->stack_capacity = sizeof(parser->stack_local) / sizeof(*parser->stack_local);
+
+    if (tree->pool == NULL) {
+
+        // allocate first page
+        mpack_tree_page_t* page = (mpack_tree_page_t*)MPACK_MALLOC(MPACK_PAGE_ALLOC_SIZE);
+        mpack_log("allocated initial page %p of size %i count %i\n",
+                (void*)page, (int)MPACK_PAGE_ALLOC_SIZE, (int)MPACK_NODES_PER_PAGE);
+        if (page == NULL) {
+            tree->error = mpack_error_memory;
+            return false;
+        }
+        page->next = NULL;
+        tree->next = page;
+
+        parser->nodes = page->nodes;
+        parser->nodes_left = MPACK_NODES_PER_PAGE;
+    }
+    else
+    #endif
+    {
+        // otherwise use the provided pool
+        mpack_assert(tree->pool != NULL, "no pool provided?");
+        parser->nodes = tree->pool;
+        parser->nodes_left = tree->pool_count;
+    }
+
+    tree->root = parser->nodes;
+    ++parser->nodes;
+    --parser->nodes_left;
+
+    parser->level = 0;
+    parser->stack[0].child = tree->root;
+    parser->stack[0].left = 1;
+
+    return true;
+}
+
+void mpack_tree_parse(mpack_tree_t* tree) {
+    if (mpack_tree_error(tree) != mpack_ok)
+        return;
+
+    if (tree->parser.state != mpack_tree_parse_state_in_progress) {
+        if (!mpack_tree_parse_start(tree)) {
+            mpack_tree_flag_error(tree, (tree->read_fn == NULL) ?
+                    mpack_error_invalid : mpack_error_io);
+            return;
+        }
+    }
+
+    if (!mpack_tree_continue_parsing(tree)) {
+        if (mpack_tree_error(tree) != mpack_ok)
+            return;
+
+        // We're parsing synchronously on a blocking fill function. If we
+        // didn't completely finish parsing the tree, it's an error.
+        mpack_log("tree parsing incomplete. flagging error.\n");
+        mpack_tree_flag_error(tree, (tree->read_fn == NULL) ?
+                mpack_error_invalid : mpack_error_io);
+        return;
+    }
+
+    mpack_assert(mpack_tree_error(tree) == mpack_ok);
+    mpack_assert(tree->parser.level == 0);
+    tree->parser.state = mpack_tree_parse_state_parsed;
+    mpack_log("parsed tree of %i bytes, %i bytes left\n", (int)tree->size, (int)tree->parser.possible_nodes_left);
+    mpack_log("%i nodes in final page\n", (int)tree->parser.nodes_left);
+}
+
+bool mpack_tree_try_parse(mpack_tree_t* tree) {
+    if (mpack_tree_error(tree) != mpack_ok)
+        return false;
+
+    if (tree->parser.state != mpack_tree_parse_state_in_progress)
+        if (!mpack_tree_parse_start(tree))
+            return false;
+
+    if (!mpack_tree_continue_parsing(tree))
+        return false;
+
+    mpack_assert(mpack_tree_error(tree) == mpack_ok);
+    mpack_assert(tree->parser.level == 0);
+    tree->parser.state = mpack_tree_parse_state_parsed;
+    return true;
+}
+
+
+
+/*
+ * Tree functions
+ */
+
+mpack_node_t mpack_tree_root(mpack_tree_t* tree) {
+    if (mpack_tree_error(tree) != mpack_ok)
+        return mpack_tree_nil_node(tree);
+
+    // We check that a tree was parsed successfully and assert if not. You must
+    // call mpack_tree_parse() (or mpack_tree_try_parse() with a success
+    // result) in order to access the root node.
+    if (tree->parser.state != mpack_tree_parse_state_parsed) {
+        mpack_break("Tree has not been parsed! "
+                "Did you call mpack_tree_parse() or mpack_tree_try_parse()?");
+        mpack_tree_flag_error(tree, mpack_error_bug);
+        return mpack_tree_nil_node(tree);
+    }
+
+    return mpack_node(tree, tree->root);
+}
+
+static void mpack_tree_init_clear(mpack_tree_t* tree) {
+    mpack_memset(tree, 0, sizeof(*tree));
+    tree->nil_node.type = mpack_type_nil;
+    tree->missing_node.type = mpack_type_missing;
+    tree->max_size = SIZE_MAX;
+    tree->max_nodes = SIZE_MAX;
+}
+
+#ifdef MPACK_MALLOC
+void mpack_tree_init_data(mpack_tree_t* tree, const char* data, size_t length) {
+    mpack_tree_init_clear(tree);
+
+    MPACK_STATIC_ASSERT(MPACK_NODE_PAGE_SIZE >= sizeof(mpack_tree_page_t),
+            "MPACK_NODE_PAGE_SIZE is too small");
+
+    MPACK_STATIC_ASSERT(MPACK_PAGE_ALLOC_SIZE <= MPACK_NODE_PAGE_SIZE,
+            "incorrect page rounding?");
+
+    tree->data = data;
+    tree->data_length = length;
+    tree->pool = NULL;
+    tree->pool_count = 0;
+    tree->next = NULL;
+
+    mpack_log("===========================\n");
+    mpack_log("initializing tree with data of size %i\n", (int)length);
+}
+#endif
+
+void mpack_tree_init_pool(mpack_tree_t* tree, const char* data, size_t length,
+        mpack_node_data_t* node_pool, size_t node_pool_count)
+{
+    mpack_tree_init_clear(tree);
+    #ifdef MPACK_MALLOC
+    tree->next = NULL;
+    #endif
+
+    if (node_pool_count == 0) {
+        mpack_break("initial page has no nodes!");
+        mpack_tree_flag_error(tree, mpack_error_bug);
+        return;
+    }
+
+    tree->data = data;
+    tree->data_length = length;
+    tree->pool = node_pool;
+    tree->pool_count = node_pool_count;
+
+    mpack_log("===========================\n");
+    mpack_log("initializing tree with data of size %i and pool of count %i\n",
+            (int)length, (int)node_pool_count);
+}
+
+void mpack_tree_init_error(mpack_tree_t* tree, mpack_error_t error) {
+    mpack_tree_init_clear(tree);
+    tree->error = error;
+
+    mpack_log("===========================\n");
+    mpack_log("initializing tree error state %i\n", (int)error);
+}
+
+#ifdef MPACK_MALLOC
+void mpack_tree_init_stream(mpack_tree_t* tree, mpack_tree_read_t read_fn, void* context,
+        size_t max_message_size, size_t max_message_nodes) {
+    mpack_tree_init_clear(tree);
+
+    tree->read_fn = read_fn;
+    tree->context = context;
+
+    mpack_tree_set_limits(tree, max_message_size, max_message_nodes);
+    tree->max_size = max_message_size;
+    tree->max_nodes = max_message_nodes;
+
+    mpack_log("===========================\n");
+    mpack_log("initializing tree with stream, max size %i max nodes %i\n",
+            (int)max_message_size, (int)max_message_nodes);
+}
+#endif
+
+void mpack_tree_set_limits(mpack_tree_t* tree, size_t max_message_size, size_t max_message_nodes) {
+    mpack_assert(max_message_size > 0);
+    mpack_assert(max_message_nodes > 0);
+    tree->max_size = max_message_size;
+    tree->max_nodes = max_message_nodes;
+}
+
+#if MPACK_STDIO
+typedef struct mpack_file_tree_t {
+    char* data;
+    size_t size;
+    char buffer[MPACK_BUFFER_SIZE];
+} mpack_file_tree_t;
+
+static void mpack_file_tree_teardown(mpack_tree_t* tree) {
+    mpack_file_tree_t* file_tree = (mpack_file_tree_t*)tree->context;
+    MPACK_FREE(file_tree->data);
+    MPACK_FREE(file_tree);
+}
+
+static bool mpack_file_tree_read(mpack_tree_t* tree, mpack_file_tree_t* file_tree, FILE* file, size_t max_bytes) {
+
+    // get the file size
+    errno = 0;
+    int error = 0;
+    fseek(file, 0, SEEK_END);
+    error |= errno;
+    long size = ftell(file);
+    error |= errno;
+    fseek(file, 0, SEEK_SET);
+    error |= errno;
+
+    // check for errors
+    if (error != 0 || size < 0) {
+        mpack_tree_init_error(tree, mpack_error_io);
+        return false;
+    }
+    if (size == 0) {
+        mpack_tree_init_error(tree, mpack_error_invalid);
+        return false;
+    }
+
+    // make sure the size is less than max_bytes
+    // (this mess exists to safely convert between long and size_t regardless of their widths)
+    if (max_bytes != 0 && (((uint64_t)LONG_MAX > (uint64_t)SIZE_MAX && size > (long)SIZE_MAX) || (size_t)size > max_bytes)) {
+        mpack_tree_init_error(tree, mpack_error_too_big);
+        return false;
+    }
+
+    // allocate data
+    file_tree->data = (char*)MPACK_MALLOC((size_t)size);
+    if (file_tree->data == NULL) {
+        mpack_tree_init_error(tree, mpack_error_memory);
+        return false;
+    }
+
+    // read the file
+    long total = 0;
+    while (total < size) {
+        size_t read = fread(file_tree->data + total, 1, (size_t)(size - total), file);
+        if (read <= 0) {
+            mpack_tree_init_error(tree, mpack_error_io);
+            MPACK_FREE(file_tree->data);
+            return false;
+        }
+        total += (long)read;
+    }
+
+    file_tree->size = (size_t)size;
+    return true;
+}
+
+static bool mpack_tree_file_check_max_bytes(mpack_tree_t* tree, size_t max_bytes) {
+
+    // the C STDIO family of file functions use long (e.g. ftell)
+    if (max_bytes > LONG_MAX) {
+        mpack_break("max_bytes of %" PRIu64 " is invalid, maximum is LONG_MAX", (uint64_t)max_bytes);
+        mpack_tree_init_error(tree, mpack_error_bug);
+        return false;
+    }
+
+    return true;
+}
+
+static void mpack_tree_init_stdfile_noclose(mpack_tree_t* tree, FILE* stdfile, size_t max_bytes) {
+
+    // allocate file tree
+    mpack_file_tree_t* file_tree = (mpack_file_tree_t*) MPACK_MALLOC(sizeof(mpack_file_tree_t));
+    if (file_tree == NULL) {
+        mpack_tree_init_error(tree, mpack_error_memory);
+        return;
+    }
+
+    // read all data
+    if (!mpack_file_tree_read(tree, file_tree, stdfile, max_bytes)) {
+        MPACK_FREE(file_tree);
+        return;
+    }
+
+    mpack_tree_init_data(tree, file_tree->data, file_tree->size);
+    mpack_tree_set_context(tree, file_tree);
+    mpack_tree_set_teardown(tree, mpack_file_tree_teardown);
+}
+
+void mpack_tree_init_stdfile(mpack_tree_t* tree, FILE* stdfile, size_t max_bytes, bool close_when_done) {
+    if (!mpack_tree_file_check_max_bytes(tree, max_bytes))
+        return;
+
+    mpack_tree_init_stdfile_noclose(tree, stdfile, max_bytes);
+
+    if (close_when_done)
+        fclose(stdfile);
+}
+
+void mpack_tree_init_filename(mpack_tree_t* tree, const char* filename, size_t max_bytes) {
+    if (!mpack_tree_file_check_max_bytes(tree, max_bytes))
+        return;
+
+    // open the file
+    FILE* file = fopen(filename, "rb");
+    if (!file) {
+        mpack_tree_init_error(tree, mpack_error_io);
+        return;
+    }
+
+    mpack_tree_init_stdfile(tree, file, max_bytes, true);
+}
+#endif
+
+mpack_error_t mpack_tree_destroy(mpack_tree_t* tree) {
+    mpack_tree_cleanup(tree);
+
+    #ifdef MPACK_MALLOC
+    if (tree->buffer)
+        MPACK_FREE(tree->buffer);
+    #endif
+
+    if (tree->teardown)
+        tree->teardown(tree);
+    tree->teardown = NULL;
+
+    return tree->error;
+}
+
+void mpack_tree_flag_error(mpack_tree_t* tree, mpack_error_t error) {
+    if (tree->error == mpack_ok) {
+        mpack_log("tree %p setting error %i: %s\n", (void*)tree, (int)error, mpack_error_to_string(error));
+        tree->error = error;
+        if (tree->error_fn)
+            tree->error_fn(tree, error);
+    }
+
+}
+
+
+
+/*
+ * Node misc functions
+ */
+
+void mpack_node_flag_error(mpack_node_t node, mpack_error_t error) {
+    mpack_tree_flag_error(node.tree, error);
+}
+
+mpack_tag_t mpack_node_tag(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return mpack_tag_nil();
+
+    mpack_tag_t tag = MPACK_TAG_ZERO;
+
+    tag.type = node.data->type;
+    switch (node.data->type) {
+        case mpack_type_missing:
+            // If a node is missing, I don't know if it makes sense to ask for
+            // a tag for it. We'll return a missing tag to match the missing
+            // node I guess, but attempting to use the tag for anything (like
+            // writing it for example) will flag mpack_error_bug.
+            break;
+        case mpack_type_nil:                                            break;
+        case mpack_type_bool:    tag.v.b = node.data->value.b;          break;
+        case mpack_type_float:   tag.v.f = node.data->value.f;          break;
+        case mpack_type_double:  tag.v.d = node.data->value.d;          break;
+        case mpack_type_int:     tag.v.i = node.data->value.i;          break;
+        case mpack_type_uint:    tag.v.u = node.data->value.u;          break;
+
+        case mpack_type_str:     tag.v.l = node.data->len;     break;
+        case mpack_type_bin:     tag.v.l = node.data->len;     break;
+
+        #if MPACK_EXTENSIONS
+        case mpack_type_ext:
+            tag.v.l = node.data->len;
+            tag.exttype = mpack_node_exttype_unchecked(node);
+            break;
+        #endif
+
+        case mpack_type_array:   tag.v.n = node.data->len;  break;
+        case mpack_type_map:     tag.v.n = node.data->len;  break;
+
+        default:
+            mpack_assert(0, "unrecognized type %i", (int)node.data->type);
+            break;
+    }
+    return tag;
+}
+
+#if MPACK_DEBUG && MPACK_STDIO
+static void mpack_node_print_element(mpack_node_t node, mpack_print_t* print, size_t depth) {
+    mpack_node_data_t* data = node.data;
+    size_t i,j;
+    switch (data->type) {
+        case mpack_type_str:
+            {
+                mpack_print_append_cstr(print, "\"");
+                const char* bytes = mpack_node_data_unchecked(node);
+                for (i = 0; i < data->len; ++i) {
+                    char c = bytes[i];
+                    switch (c) {
+                        case '\n': mpack_print_append_cstr(print, "\\n"); break;
+                        case '\\': mpack_print_append_cstr(print, "\\\\"); break;
+                        case '"': mpack_print_append_cstr(print, "\\\""); break;
+                        default: mpack_print_append(print, &c, 1); break;
+                    }
+                }
+                mpack_print_append_cstr(print, "\"");
+            }
+            break;
+
+        case mpack_type_array:
+            mpack_print_append_cstr(print, "[\n");
+            for (i = 0; i < data->len; ++i) {
+                for (j = 0; j < depth + 1; ++j)
+                    mpack_print_append_cstr(print, "    ");
+                mpack_node_print_element(mpack_node_array_at(node, i), print, depth + 1);
+                if (i != data->len - 1)
+                    mpack_print_append_cstr(print, ",");
+                mpack_print_append_cstr(print, "\n");
+            }
+            for (i = 0; i < depth; ++i)
+                mpack_print_append_cstr(print, "    ");
+            mpack_print_append_cstr(print, "]");
+            break;
+
+        case mpack_type_map:
+            mpack_print_append_cstr(print, "{\n");
+            for (i = 0; i < data->len; ++i) {
+                for (j = 0; j < depth + 1; ++j)
+                    mpack_print_append_cstr(print, "    ");
+                mpack_node_print_element(mpack_node_map_key_at(node, i), print, depth + 1);
+                mpack_print_append_cstr(print, ": ");
+                mpack_node_print_element(mpack_node_map_value_at(node, i), print, depth + 1);
+                if (i != data->len - 1)
+                    mpack_print_append_cstr(print, ",");
+                mpack_print_append_cstr(print, "\n");
+            }
+            for (i = 0; i < depth; ++i)
+                mpack_print_append_cstr(print, "    ");
+            mpack_print_append_cstr(print, "}");
+            break;
+
+        default:
+            {
+                const char* prefix = NULL;
+                size_t prefix_length = 0;
+                if (mpack_node_type(node) == mpack_type_bin
+                        #if MPACK_EXTENSIONS
+                        || mpack_node_type(node) == mpack_type_ext
+                        #endif
+                ) {
+                    prefix = mpack_node_data(node);
+                    prefix_length = mpack_node_data_len(node);
+                }
+
+                char buf[256];
+                mpack_tag_t tag = mpack_node_tag(node);
+                mpack_tag_debug_pseudo_json(tag, buf, sizeof(buf), prefix, prefix_length);
+                mpack_print_append_cstr(print, buf);
+            }
+            break;
+    }
+}
+
+void mpack_node_print_to_buffer(mpack_node_t node, char* buffer, size_t buffer_size) {
+    if (buffer_size == 0) {
+        mpack_assert(false, "buffer size is zero!");
+        return;
+    }
+
+    mpack_print_t print;
+    mpack_memset(&print, 0, sizeof(print));
+    print.buffer = buffer;
+    print.size = buffer_size;
+    mpack_node_print_element(node, &print, 0);
+    mpack_print_append(&print, "",  1); // null-terminator
+    mpack_print_flush(&print);
+
+    // we always make sure there's a null-terminator at the end of the buffer
+    // in case we ran out of space.
+    print.buffer[print.size - 1] = '\0';
+}
+
+void mpack_node_print_to_callback(mpack_node_t node, mpack_print_callback_t callback, void* context) {
+    char buffer[1024];
+    mpack_print_t print;
+    mpack_memset(&print, 0, sizeof(print));
+    print.buffer = buffer;
+    print.size = sizeof(buffer);
+    print.callback = callback;
+    print.context = context;
+    mpack_node_print_element(node, &print, 0);
+    mpack_print_flush(&print);
+}
+
+void mpack_node_print_to_file(mpack_node_t node, FILE* file) {
+    mpack_assert(file != NULL, "file is NULL");
+
+    char buffer[1024];
+    mpack_print_t print;
+    mpack_memset(&print, 0, sizeof(print));
+    print.buffer = buffer;
+    print.size = sizeof(buffer);
+    print.callback = &mpack_print_file_callback;
+    print.context = file;
+
+    size_t depth = 2;
+    size_t i;
+    for (i = 0; i < depth; ++i)
+        mpack_print_append_cstr(&print, "    ");
+    mpack_node_print_element(node, &print, depth);
+    mpack_print_append_cstr(&print, "\n");
+    mpack_print_flush(&print);
+}
+#endif
+
+
+
+/*
+ * Node Value Functions
+ */
+
+#if MPACK_EXTENSIONS
+mpack_timestamp_t mpack_node_timestamp(mpack_node_t node) {
+    mpack_timestamp_t timestamp = {0, 0};
+
+    // we'll let mpack_node_exttype() do most checks
+    if (mpack_node_exttype(node) != MPACK_EXTTYPE_TIMESTAMP) {
+        mpack_log("exttype %i\n", mpack_node_exttype(node));
+        mpack_node_flag_error(node, mpack_error_type);
+        return timestamp;
+    }
+
+    const char* p = mpack_node_data_unchecked(node);
+
+    switch (node.data->len) {
+        case 4:
+            timestamp.nanoseconds = 0;
+            timestamp.seconds = mpack_load_u32(p);
+            break;
+
+        case 8: {
+            uint64_t value = mpack_load_u64(p);
+            timestamp.nanoseconds = (uint32_t)(value >> 34);
+            timestamp.seconds = value & ((MPACK_UINT64_C(1) << 34) - 1);
+            break;
+        }
+
+        case 12:
+            timestamp.nanoseconds = mpack_load_u32(p);
+            timestamp.seconds = mpack_load_i64(p + 4);
+            break;
+
+        default:
+            mpack_tree_flag_error(node.tree, mpack_error_invalid);
+            return timestamp;
+    }
+
+    if (timestamp.nanoseconds > MPACK_TIMESTAMP_NANOSECONDS_MAX) {
+        mpack_tree_flag_error(node.tree, mpack_error_invalid);
+        mpack_timestamp_t zero = {0, 0};
+        return zero;
+    }
+
+    return timestamp;
+}
+
+int64_t mpack_node_timestamp_seconds(mpack_node_t node) {
+    return mpack_node_timestamp(node).seconds;
+}
+
+uint32_t mpack_node_timestamp_nanoseconds(mpack_node_t node) {
+    return mpack_node_timestamp(node).nanoseconds;
+}
+#endif
+
+
+
+/*
+ * Node Data Functions
+ */
+
+void mpack_node_check_utf8(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return;
+    mpack_node_data_t* data = node.data;
+    if (data->type != mpack_type_str || !mpack_utf8_check(mpack_node_data_unchecked(node), data->len))
+        mpack_node_flag_error(node, mpack_error_type);
+}
+
+void mpack_node_check_utf8_cstr(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return;
+    mpack_node_data_t* data = node.data;
+    if (data->type != mpack_type_str || !mpack_utf8_check_no_null(mpack_node_data_unchecked(node), data->len))
+        mpack_node_flag_error(node, mpack_error_type);
+}
+
+size_t mpack_node_copy_data(mpack_node_t node, char* buffer, size_t bufsize) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0;
+
+    mpack_assert(bufsize == 0 || buffer != NULL, "buffer is NULL for maximum of %i bytes", (int)bufsize);
+
+    mpack_type_t type = node.data->type;
+    if (type != mpack_type_str && type != mpack_type_bin
+            #if MPACK_EXTENSIONS
+            && type != mpack_type_ext
+            #endif
+    ) {
+        mpack_node_flag_error(node, mpack_error_type);
+        return 0;
+    }
+
+    if (node.data->len > bufsize) {
+        mpack_node_flag_error(node, mpack_error_too_big);
+        return 0;
+    }
+
+    mpack_memcpy(buffer, mpack_node_data_unchecked(node), node.data->len);
+    return (size_t)node.data->len;
+}
+
+size_t mpack_node_copy_utf8(mpack_node_t node, char* buffer, size_t bufsize) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0;
+
+    mpack_assert(bufsize == 0 || buffer != NULL, "buffer is NULL for maximum of %i bytes", (int)bufsize);
+
+    mpack_type_t type = node.data->type;
+    if (type != mpack_type_str) {
+        mpack_node_flag_error(node, mpack_error_type);
+        return 0;
+    }
+
+    if (node.data->len > bufsize) {
+        mpack_node_flag_error(node, mpack_error_too_big);
+        return 0;
+    }
+
+    if (!mpack_utf8_check(mpack_node_data_unchecked(node), node.data->len)) {
+        mpack_node_flag_error(node, mpack_error_type);
+        return 0;
+    }
+
+    mpack_memcpy(buffer, mpack_node_data_unchecked(node), node.data->len);
+    return (size_t)node.data->len;
+}
+
+void mpack_node_copy_cstr(mpack_node_t node, char* buffer, size_t bufsize) {
+
+    // we can't break here because the error isn't recoverable; we
+    // have to add a null-terminator.
+    mpack_assert(buffer != NULL, "buffer is NULL");
+    mpack_assert(bufsize >= 1, "buffer size is zero; you must have room for at least a null-terminator");
+
+    if (mpack_node_error(node) != mpack_ok) {
+        buffer[0] = '\0';
+        return;
+    }
+
+    if (node.data->type != mpack_type_str) {
+        buffer[0] = '\0';
+        mpack_node_flag_error(node, mpack_error_type);
+        return;
+    }
+
+    if (node.data->len > bufsize - 1) {
+        buffer[0] = '\0';
+        mpack_node_flag_error(node, mpack_error_too_big);
+        return;
+    }
+
+    if (!mpack_str_check_no_null(mpack_node_data_unchecked(node), node.data->len)) {
+        buffer[0] = '\0';
+        mpack_node_flag_error(node, mpack_error_type);
+        return;
+    }
+
+    mpack_memcpy(buffer, mpack_node_data_unchecked(node), node.data->len);
+    buffer[node.data->len] = '\0';
+}
+
+void mpack_node_copy_utf8_cstr(mpack_node_t node, char* buffer, size_t bufsize) {
+
+    // we can't break here because the error isn't recoverable; we
+    // have to add a null-terminator.
+    mpack_assert(buffer != NULL, "buffer is NULL");
+    mpack_assert(bufsize >= 1, "buffer size is zero; you must have room for at least a null-terminator");
+
+    if (mpack_node_error(node) != mpack_ok) {
+        buffer[0] = '\0';
+        return;
+    }
+
+    if (node.data->type != mpack_type_str) {
+        buffer[0] = '\0';
+        mpack_node_flag_error(node, mpack_error_type);
+        return;
+    }
+
+    if (node.data->len > bufsize - 1) {
+        buffer[0] = '\0';
+        mpack_node_flag_error(node, mpack_error_too_big);
+        return;
+    }
+
+    if (!mpack_utf8_check_no_null(mpack_node_data_unchecked(node), node.data->len)) {
+        buffer[0] = '\0';
+        mpack_node_flag_error(node, mpack_error_type);
+        return;
+    }
+
+    mpack_memcpy(buffer, mpack_node_data_unchecked(node), node.data->len);
+    buffer[node.data->len] = '\0';
+}
+
+#ifdef MPACK_MALLOC
+char* mpack_node_data_alloc(mpack_node_t node, size_t maxlen) {
+    if (mpack_node_error(node) != mpack_ok)
+        return NULL;
+
+    // make sure this is a valid data type
+    mpack_type_t type = node.data->type;
+    if (type != mpack_type_str && type != mpack_type_bin
+            #if MPACK_EXTENSIONS
+            && type != mpack_type_ext
+            #endif
+    ) {
+        mpack_node_flag_error(node, mpack_error_type);
+        return NULL;
+    }
+
+    if (node.data->len > maxlen) {
+        mpack_node_flag_error(node, mpack_error_too_big);
+        return NULL;
+    }
+
+    char* ret = (char*) MPACK_MALLOC((size_t)node.data->len);
+    if (ret == NULL) {
+        mpack_node_flag_error(node, mpack_error_memory);
+        return NULL;
+    }
+
+    mpack_memcpy(ret, mpack_node_data_unchecked(node), node.data->len);
+    return ret;
+}
+
+char* mpack_node_cstr_alloc(mpack_node_t node, size_t maxlen) {
+    if (mpack_node_error(node) != mpack_ok)
+        return NULL;
+
+    // make sure maxlen makes sense
+    if (maxlen < 1) {
+        mpack_break("maxlen is zero; you must have room for at least a null-terminator");
+        mpack_node_flag_error(node, mpack_error_bug);
+        return NULL;
+    }
+
+    if (node.data->type != mpack_type_str) {
+        mpack_node_flag_error(node, mpack_error_type);
+        return NULL;
+    }
+
+    if (node.data->len > maxlen - 1) {
+        mpack_node_flag_error(node, mpack_error_too_big);
+        return NULL;
+    }
+
+    if (!mpack_str_check_no_null(mpack_node_data_unchecked(node), node.data->len)) {
+        mpack_node_flag_error(node, mpack_error_type);
+        return NULL;
+    }
+
+    char* ret = (char*) MPACK_MALLOC((size_t)(node.data->len + 1));
+    if (ret == NULL) {
+        mpack_node_flag_error(node, mpack_error_memory);
+        return NULL;
+    }
+
+    mpack_memcpy(ret, mpack_node_data_unchecked(node), node.data->len);
+    ret[node.data->len] = '\0';
+    return ret;
+}
+
+char* mpack_node_utf8_cstr_alloc(mpack_node_t node, size_t maxlen) {
+    if (mpack_node_error(node) != mpack_ok)
+        return NULL;
+
+    // make sure maxlen makes sense
+    if (maxlen < 1) {
+        mpack_break("maxlen is zero; you must have room for at least a null-terminator");
+        mpack_node_flag_error(node, mpack_error_bug);
+        return NULL;
+    }
+
+    if (node.data->type != mpack_type_str) {
+        mpack_node_flag_error(node, mpack_error_type);
+        return NULL;
+    }
+
+    if (node.data->len > maxlen - 1) {
+        mpack_node_flag_error(node, mpack_error_too_big);
+        return NULL;
+    }
+
+    if (!mpack_utf8_check_no_null(mpack_node_data_unchecked(node), node.data->len)) {
+        mpack_node_flag_error(node, mpack_error_type);
+        return NULL;
+    }
+
+    char* ret = (char*) MPACK_MALLOC((size_t)(node.data->len + 1));
+    if (ret == NULL) {
+        mpack_node_flag_error(node, mpack_error_memory);
+        return NULL;
+    }
+
+    mpack_memcpy(ret, mpack_node_data_unchecked(node), node.data->len);
+    ret[node.data->len] = '\0';
+    return ret;
+}
+#endif
+
+
+/*
+ * Compound Node Functions
+ */
+
+static mpack_node_data_t* mpack_node_map_int_impl(mpack_node_t node, int64_t num) {
+    if (mpack_node_error(node) != mpack_ok)
+        return NULL;
+
+    if (node.data->type != mpack_type_map) {
+        mpack_node_flag_error(node, mpack_error_type);
+        return NULL;
+    }
+
+    mpack_node_data_t* found = NULL;
+
+    size_t i;
+    for (i = 0; i < node.data->len; ++i) {
+        mpack_node_data_t* key = mpack_node_child(node, i * 2);
+
+        if ((key->type == mpack_type_int && key->value.i == num) ||
+            (key->type == mpack_type_uint && num >= 0 && key->value.u == (uint64_t)num))
+        {
+            if (found) {
+                mpack_node_flag_error(node, mpack_error_data);
+                return NULL;
+            }
+            found = mpack_node_child(node, i * 2 + 1);
+        }
+    }
+
+    if (found)
+        return found;
+
+    return NULL;
+}
+
+static mpack_node_data_t* mpack_node_map_uint_impl(mpack_node_t node, uint64_t num) {
+    if (mpack_node_error(node) != mpack_ok)
+        return NULL;
+
+    if (node.data->type != mpack_type_map) {
+        mpack_node_flag_error(node, mpack_error_type);
+        return NULL;
+    }
+
+    mpack_node_data_t* found = NULL;
+
+    size_t i;
+    for (i = 0; i < node.data->len; ++i) {
+        mpack_node_data_t* key = mpack_node_child(node, i * 2);
+
+        if ((key->type == mpack_type_uint && key->value.u == num) ||
+            (key->type == mpack_type_int && key->value.i >= 0 && (uint64_t)key->value.i == num))
+        {
+            if (found) {
+                mpack_node_flag_error(node, mpack_error_data);
+                return NULL;
+            }
+            found = mpack_node_child(node, i * 2 + 1);
+        }
+    }
+
+    if (found)
+        return found;
+
+    return NULL;
+}
+
+static mpack_node_data_t* mpack_node_map_str_impl(mpack_node_t node, const char* str, size_t length) {
+    if (mpack_node_error(node) != mpack_ok)
+        return NULL;
+
+    mpack_assert(length == 0 || str != NULL, "str of length %i is NULL", (int)length);
+
+    if (node.data->type != mpack_type_map) {
+        mpack_node_flag_error(node, mpack_error_type);
+        return NULL;
+    }
+
+    mpack_tree_t* tree = node.tree;
+    mpack_node_data_t* found = NULL;
+
+    size_t i;
+    for (i = 0; i < node.data->len; ++i) {
+        mpack_node_data_t* key = mpack_node_child(node, i * 2);
+
+        if (key->type == mpack_type_str && key->len == length &&
+                mpack_memcmp(str, mpack_node_data_unchecked(mpack_node(tree, key)), length) == 0) {
+            if (found) {
+                mpack_node_flag_error(node, mpack_error_data);
+                return NULL;
+            }
+            found = mpack_node_child(node, i * 2 + 1);
+        }
+    }
+
+    if (found)
+        return found;
+
+    return NULL;
+}
+
+static mpack_node_t mpack_node_wrap_lookup(mpack_tree_t* tree, mpack_node_data_t* data) {
+    if (!data) {
+        if (tree->error == mpack_ok)
+            mpack_tree_flag_error(tree, mpack_error_data);
+        return mpack_tree_nil_node(tree);
+    }
+    return mpack_node(tree, data);
+}
+
+static mpack_node_t mpack_node_wrap_lookup_optional(mpack_tree_t* tree, mpack_node_data_t* data) {
+    if (!data) {
+        if (tree->error == mpack_ok)
+            return mpack_tree_missing_node(tree);
+        return mpack_tree_nil_node(tree);
+    }
+    return mpack_node(tree, data);
+}
+
+mpack_node_t mpack_node_map_int(mpack_node_t node, int64_t num) {
+    return mpack_node_wrap_lookup(node.tree, mpack_node_map_int_impl(node, num));
+}
+
+mpack_node_t mpack_node_map_int_optional(mpack_node_t node, int64_t num) {
+    return mpack_node_wrap_lookup_optional(node.tree, mpack_node_map_int_impl(node, num));
+}
+
+mpack_node_t mpack_node_map_uint(mpack_node_t node, uint64_t num) {
+    return mpack_node_wrap_lookup(node.tree, mpack_node_map_uint_impl(node, num));
+}
+
+mpack_node_t mpack_node_map_uint_optional(mpack_node_t node, uint64_t num) {
+    return mpack_node_wrap_lookup_optional(node.tree, mpack_node_map_uint_impl(node, num));
+}
+
+mpack_node_t mpack_node_map_str(mpack_node_t node, const char* str, size_t length) {
+    return mpack_node_wrap_lookup(node.tree, mpack_node_map_str_impl(node, str, length));
+}
+
+mpack_node_t mpack_node_map_str_optional(mpack_node_t node, const char* str, size_t length) {
+    return mpack_node_wrap_lookup_optional(node.tree, mpack_node_map_str_impl(node, str, length));
+}
+
+mpack_node_t mpack_node_map_cstr(mpack_node_t node, const char* cstr) {
+    mpack_assert(cstr != NULL, "cstr is NULL");
+    return mpack_node_map_str(node, cstr, mpack_strlen(cstr));
+}
+
+mpack_node_t mpack_node_map_cstr_optional(mpack_node_t node, const char* cstr) {
+    mpack_assert(cstr != NULL, "cstr is NULL");
+    return mpack_node_map_str_optional(node, cstr, mpack_strlen(cstr));
+}
+
+bool mpack_node_map_contains_int(mpack_node_t node, int64_t num) {
+    return mpack_node_map_int_impl(node, num) != NULL;
+}
+
+bool mpack_node_map_contains_uint(mpack_node_t node, uint64_t num) {
+    return mpack_node_map_uint_impl(node, num) != NULL;
+}
+
+bool mpack_node_map_contains_str(mpack_node_t node, const char* str, size_t length) {
+    return mpack_node_map_str_impl(node, str, length) != NULL;
+}
+
+bool mpack_node_map_contains_cstr(mpack_node_t node, const char* cstr) {
+    mpack_assert(cstr != NULL, "cstr is NULL");
+    return mpack_node_map_contains_str(node, cstr, mpack_strlen(cstr));
+}
+
+size_t mpack_node_enum_optional(mpack_node_t node, const char* strings[], size_t count) {
+    if (mpack_node_error(node) != mpack_ok)
+        return count;
+
+    // the value is only recognized if it is a string
+    if (mpack_node_type(node) != mpack_type_str)
+        return count;
+
+    // fetch the string
+    const char* key = mpack_node_str(node);
+    size_t keylen = mpack_node_strlen(node);
+    mpack_assert(mpack_node_error(node) == mpack_ok, "these should not fail");
+
+    // find what key it matches
+    size_t i;
+    for (i = 0; i < count; ++i) {
+        const char* other = strings[i];
+        size_t otherlen = mpack_strlen(other);
+        if (keylen == otherlen && mpack_memcmp(key, other, keylen) == 0)
+            return i;
+    }
+
+    // no matches
+    return count;
+}
+
+size_t mpack_node_enum(mpack_node_t node, const char* strings[], size_t count) {
+    size_t value = mpack_node_enum_optional(node, strings, count);
+    if (value == count)
+        mpack_node_flag_error(node, mpack_error_type);
+    return value;
+}
+
+mpack_type_t mpack_node_type(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return mpack_type_nil;
+    return node.data->type;
+}
+
+bool mpack_node_is_nil(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok) {
+        // All nodes are treated as nil nodes when we are in error.
+        return true;
+    }
+    return node.data->type == mpack_type_nil;
+}
+
+bool mpack_node_is_missing(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok) {
+        // errors still return nil nodes, not missing nodes.
+        return false;
+    }
+    return node.data->type == mpack_type_missing;
+}
+
+void mpack_node_nil(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return;
+    if (node.data->type != mpack_type_nil)
+        mpack_node_flag_error(node, mpack_error_type);
+}
+
+void mpack_node_missing(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return;
+    if (node.data->type != mpack_type_missing)
+        mpack_node_flag_error(node, mpack_error_type);
+}
+
+bool mpack_node_bool(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return false;
+
+    if (node.data->type == mpack_type_bool)
+        return node.data->value.b;
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return false;
+}
+
+void mpack_node_true(mpack_node_t node) {
+    if (mpack_node_bool(node) != true)
+        mpack_node_flag_error(node, mpack_error_type);
+}
+
+void mpack_node_false(mpack_node_t node) {
+    if (mpack_node_bool(node) != false)
+        mpack_node_flag_error(node, mpack_error_type);
+}
+
+uint8_t mpack_node_u8(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0;
+
+    if (node.data->type == mpack_type_uint) {
+        if (node.data->value.u <= MPACK_UINT8_MAX)
+            return (uint8_t)node.data->value.u;
+    } else if (node.data->type == mpack_type_int) {
+        if (node.data->value.i >= 0 && node.data->value.i <= MPACK_UINT8_MAX)
+            return (uint8_t)node.data->value.i;
+    }
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return 0;
+}
+
+int8_t mpack_node_i8(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0;
+
+    if (node.data->type == mpack_type_uint) {
+        if (node.data->value.u <= MPACK_INT8_MAX)
+            return (int8_t)node.data->value.u;
+    } else if (node.data->type == mpack_type_int) {
+        if (node.data->value.i >= MPACK_INT8_MIN && node.data->value.i <= MPACK_INT8_MAX)
+            return (int8_t)node.data->value.i;
+    }
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return 0;
+}
+
+uint16_t mpack_node_u16(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0;
+
+    if (node.data->type == mpack_type_uint) {
+        if (node.data->value.u <= MPACK_UINT16_MAX)
+            return (uint16_t)node.data->value.u;
+    } else if (node.data->type == mpack_type_int) {
+        if (node.data->value.i >= 0 && node.data->value.i <= MPACK_UINT16_MAX)
+            return (uint16_t)node.data->value.i;
+    }
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return 0;
+}
+
+int16_t mpack_node_i16(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0;
+
+    if (node.data->type == mpack_type_uint) {
+        if (node.data->value.u <= MPACK_INT16_MAX)
+            return (int16_t)node.data->value.u;
+    } else if (node.data->type == mpack_type_int) {
+        if (node.data->value.i >= MPACK_INT16_MIN && node.data->value.i <= MPACK_INT16_MAX)
+            return (int16_t)node.data->value.i;
+    }
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return 0;
+}
+
+uint32_t mpack_node_u32(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0;
+
+    if (node.data->type == mpack_type_uint) {
+        if (node.data->value.u <= MPACK_UINT32_MAX)
+            return (uint32_t)node.data->value.u;
+    } else if (node.data->type == mpack_type_int) {
+        if (node.data->value.i >= 0 && node.data->value.i <= MPACK_UINT32_MAX)
+            return (uint32_t)node.data->value.i;
+    }
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return 0;
+}
+
+int32_t mpack_node_i32(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0;
+
+    if (node.data->type == mpack_type_uint) {
+        if (node.data->value.u <= MPACK_INT32_MAX)
+            return (int32_t)node.data->value.u;
+    } else if (node.data->type == mpack_type_int) {
+        if (node.data->value.i >= MPACK_INT32_MIN && node.data->value.i <= MPACK_INT32_MAX)
+            return (int32_t)node.data->value.i;
+    }
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return 0;
+}
+
+uint64_t mpack_node_u64(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0;
+
+    if (node.data->type == mpack_type_uint) {
+        return node.data->value.u;
+    } else if (node.data->type == mpack_type_int) {
+        if (node.data->value.i >= 0)
+            return (uint64_t)node.data->value.i;
+    }
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return 0;
+}
+
+int64_t mpack_node_i64(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0;
+
+    if (node.data->type == mpack_type_uint) {
+        if (node.data->value.u <= (uint64_t)MPACK_INT64_MAX)
+            return (int64_t)node.data->value.u;
+    } else if (node.data->type == mpack_type_int) {
+        return node.data->value.i;
+    }
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return 0;
+}
+
+unsigned int mpack_node_uint(mpack_node_t node) {
+
+    // This should be true at compile-time, so this just wraps the 32-bit function.
+    if (sizeof(unsigned int) == 4)
+        return (unsigned int)mpack_node_u32(node);
+
+    // Otherwise we use u64 and check the range.
+    uint64_t val = mpack_node_u64(node);
+    if (val <= MPACK_UINT_MAX)
+        return (unsigned int)val;
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return 0;
+}
+
+int mpack_node_int(mpack_node_t node) {
+
+    // This should be true at compile-time, so this just wraps the 32-bit function.
+    if (sizeof(int) == 4)
+        return (int)mpack_node_i32(node);
+
+    // Otherwise we use i64 and check the range.
+    int64_t val = mpack_node_i64(node);
+    if (val >= MPACK_INT_MIN && val <= MPACK_INT_MAX)
+        return (int)val;
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return 0;
+}
+
+#if MPACK_FLOAT
+float mpack_node_float(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0.0f;
+
+    if (node.data->type == mpack_type_uint)
+        return (float)node.data->value.u;
+    if (node.data->type == mpack_type_int)
+        return (float)node.data->value.i;
+    if (node.data->type == mpack_type_float)
+        return node.data->value.f;
+
+    if (node.data->type == mpack_type_double) {
+        #if MPACK_DOUBLE
+        return (float)node.data->value.d;
+        #else
+        return mpack_shorten_raw_double_to_float(node.data->value.d);
+        #endif
+    }
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return 0.0f;
+}
+#endif
+
+#if MPACK_DOUBLE
+double mpack_node_double(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0.0;
+
+    if (node.data->type == mpack_type_uint)
+        return (double)node.data->value.u;
+    else if (node.data->type == mpack_type_int)
+        return (double)node.data->value.i;
+    else if (node.data->type == mpack_type_float)
+        return (double)node.data->value.f;
+    else if (node.data->type == mpack_type_double)
+        return node.data->value.d;
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return 0.0;
+}
+#endif
+
+#if MPACK_FLOAT
+float mpack_node_float_strict(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0.0f;
+
+    if (node.data->type == mpack_type_float)
+        return node.data->value.f;
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return 0.0f;
+}
+#endif
+
+#if MPACK_DOUBLE
+double mpack_node_double_strict(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0.0;
+
+    if (node.data->type == mpack_type_float)
+        return (double)node.data->value.f;
+    else if (node.data->type == mpack_type_double)
+        return node.data->value.d;
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return 0.0;
+}
+#endif
+
+#if !MPACK_FLOAT
+uint32_t mpack_node_raw_float(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0;
+
+    if (node.data->type == mpack_type_float)
+        return node.data->value.f;
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return 0;
+}
+#endif
+
+#if !MPACK_DOUBLE
+uint64_t mpack_node_raw_double(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0;
+
+    if (node.data->type == mpack_type_double)
+        return node.data->value.d;
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return 0;
+}
+#endif
+
+#if MPACK_EXTENSIONS
+int8_t mpack_node_exttype(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0;
+
+    if (node.data->type == mpack_type_ext)
+        return mpack_node_exttype_unchecked(node);
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return 0;
+}
+#endif
+
+uint32_t mpack_node_data_len(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0;
+
+    mpack_type_t type = node.data->type;
+    if (type == mpack_type_str || type == mpack_type_bin
+            #if MPACK_EXTENSIONS
+            || type == mpack_type_ext
+            #endif
+            )
+        return (uint32_t)node.data->len;
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return 0;
+}
+
+size_t mpack_node_strlen(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0;
+
+    if (node.data->type == mpack_type_str)
+        return (size_t)node.data->len;
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return 0;
+}
+
+const char* mpack_node_str(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return NULL;
+
+    mpack_type_t type = node.data->type;
+    if (type == mpack_type_str)
+        return mpack_node_data_unchecked(node);
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return NULL;
+}
+
+const char* mpack_node_data(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return NULL;
+
+    mpack_type_t type = node.data->type;
+    if (type == mpack_type_str || type == mpack_type_bin
+            #if MPACK_EXTENSIONS
+            || type == mpack_type_ext
+            #endif
+            )
+        return mpack_node_data_unchecked(node);
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return NULL;
+}
+
+const char* mpack_node_bin_data(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return NULL;
+
+    if (node.data->type == mpack_type_bin)
+        return mpack_node_data_unchecked(node);
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return NULL;
+}
+
+size_t mpack_node_bin_size(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0;
+
+    if (node.data->type == mpack_type_bin)
+        return (size_t)node.data->len;
+
+    mpack_node_flag_error(node, mpack_error_type);
+    return 0;
+}
+
+size_t mpack_node_array_length(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0;
+
+    if (node.data->type != mpack_type_array) {
+        mpack_node_flag_error(node, mpack_error_type);
+        return 0;
+    }
+
+    return (size_t)node.data->len;
+}
+
+mpack_node_t mpack_node_array_at(mpack_node_t node, size_t index) {
+    if (mpack_node_error(node) != mpack_ok)
+        return mpack_tree_nil_node(node.tree);
+
+    if (node.data->type != mpack_type_array) {
+        mpack_node_flag_error(node, mpack_error_type);
+        return mpack_tree_nil_node(node.tree);
+    }
+
+    if (index >= node.data->len) {
+        mpack_node_flag_error(node, mpack_error_data);
+        return mpack_tree_nil_node(node.tree);
+    }
+
+    return mpack_node(node.tree, mpack_node_child(node, index));
+}
+
+size_t mpack_node_map_count(mpack_node_t node) {
+    if (mpack_node_error(node) != mpack_ok)
+        return 0;
+
+    if (node.data->type != mpack_type_map) {
+        mpack_node_flag_error(node, mpack_error_type);
+        return 0;
+    }
+
+    return node.data->len;
+}
+
+// internal node map lookup
+static mpack_node_t mpack_node_map_at(mpack_node_t node, size_t index, size_t offset) {
+    if (mpack_node_error(node) != mpack_ok)
+        return mpack_tree_nil_node(node.tree);
+
+    if (node.data->type != mpack_type_map) {
+        mpack_node_flag_error(node, mpack_error_type);
+        return mpack_tree_nil_node(node.tree);
+    }
+
+    if (index >= node.data->len) {
+        mpack_node_flag_error(node, mpack_error_data);
+        return mpack_tree_nil_node(node.tree);
+    }
+
+    return mpack_node(node.tree, mpack_node_child(node, index * 2 + offset));
+}
+
+mpack_node_t mpack_node_map_key_at(mpack_node_t node, size_t index) {
+    return mpack_node_map_at(node, index, 0);
+}
+
+mpack_node_t mpack_node_map_value_at(mpack_node_t node, size_t index) {
+    return mpack_node_map_at(node, index, 1);
+}
+
+#endif
+
+MPACK_SILENCE_WARNINGS_END
diff --git a/vendors/mpack/mpack.h b/vendors/mpack/mpack.h
new file mode 100644
index 0000000..1f2386a
--- /dev/null
+++ b/vendors/mpack/mpack.h
@@ -0,0 +1,8207 @@
+/**
+ * The MIT License (MIT)
+ * 
+ * Copyright (c) 2015-2021 Nicholas Fraser and the MPack authors
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ * 
+ */
+
+/*
+ * This is the MPack 1.1.1 amalgamation package.
+ *
+ * http://github.com/ludocode/mpack
+ */
+
+#ifndef MPACK_H
+#define MPACK_H 1
+
+#define MPACK_AMALGAMATED 1
+#define MPACK_RELEASE_VERSION 1
+
+#if defined(MPACK_HAS_CONFIG) && MPACK_HAS_CONFIG
+#include "mpack-config.h"
+#endif
+
+
+/* mpack/mpack-platform.h.h */
+
+/**
+ * @file
+ *
+ * Abstracts all platform-specific code from MPack and handles configuration
+ * options.
+ *
+ * This verifies the configuration and sets defaults based on the platform,
+ * contains implementations of standard C functions when libc is not available,
+ * and provides wrappers to all library functions.
+ *
+ * Documentation for configuration options is available here:
+ *
+ *     https://ludocode.github.io/mpack/group__config.html
+ */
+
+#ifndef MPACK_PLATFORM_H
+#define MPACK_PLATFORM_H 1
+
+
+
+/**
+ * @defgroup config Configuration Options
+ *
+ * Defines the MPack configuration options.
+ *
+ * Custom configuration of MPack is not usually necessary. In almost all
+ * cases you can ignore this and use the defaults.
+ *
+ * If you do want to configure MPack, you can define the below options as part
+ * of your build system or project settings. This will override the below
+ * defaults.
+ *
+ * If you'd like to use a file for configuration instead, define
+ * @ref MPACK_HAS_CONFIG to 1 in your build system or project settings.
+ * This will cause MPack to include a file you create called @c mpack-config.h
+ * in which you can define your configuration. This is useful if you need to
+ * include specific headers (such as a custom allocator) in order to configure
+ * MPack to use it.
+ *
+ * @warning The value of all configuration options must be the same in
+ * all translation units of your project, as well as in the mpack source
+ * itself. These configuration options affect the layout of structs, among
+ * other things, which cannot be different in source files that are linked
+ * together.
+ *
+ * @note MPack does not contain defaults for building inside the Linux kernel.
+ * There is a <a href="https://github.com/ludocode/mpack-linux-kernel">
+ * configuration file for the Linux kernel</a> that can be used instead.
+ *
+ * @{
+ */
+
+
+
+/*
+ * Pre-include checks
+ *
+ * These need to come before the user's mpack-config.h because they might be
+ * including headers in it.
+ */
+
+/** @cond */
+#if defined(_MSC_VER) && _MSC_VER < 1800 && !defined(__cplusplus)
+    #error "In Visual Studio 2012 and earlier, MPack must be compiled as C++. Enable the /Tp compiler flag."
+#endif
+
+#if defined(_WIN32) && MPACK_INTERNAL
+    #define _CRT_SECURE_NO_WARNINGS 1
+#endif
+
+#ifndef __STDC_LIMIT_MACROS
+    #define __STDC_LIMIT_MACROS 1
+#endif
+#ifndef __STDC_FORMAT_MACROS
+    #define __STDC_FORMAT_MACROS 1
+#endif
+#ifndef __STDC_CONSTANT_MACROS
+    #define __STDC_CONSTANT_MACROS 1
+#endif
+/** @endcond */
+
+
+
+/**
+ * @name File Configuration
+ * @{
+ */
+
+/**
+ * @def MPACK_HAS_CONFIG
+ *
+ * Causes MPack to include a file you create called @c mpack-config.h .
+ *
+ * The file is included before MPack sets any defaults for undefined
+ * configuration options. You can use it to configure MPack.
+ *
+ * This is off by default.
+ */
+#if defined(MPACK_HAS_CONFIG)
+    #if MPACK_HAS_CONFIG
+        #include "mpack-config.h"
+    #endif
+#else
+    #define MPACK_HAS_CONFIG 0
+#endif
+
+/**
+ * @}
+ */
+
+// this needs to come first since some stuff depends on it
+/** @cond */
+#ifndef MPACK_NO_BUILTINS
+    #define MPACK_NO_BUILTINS 0
+#endif
+/** @endcond */
+
+
+
+/**
+ * @name Features
+ * @{
+ */
+
+/**
+ * @def MPACK_READER
+ *
+ * Enables compilation of the base Tag Reader.
+ */
+#ifndef MPACK_READER
+#define MPACK_READER 1
+#endif
+
+/**
+ * @def MPACK_EXPECT
+ *
+ * Enables compilation of the static Expect API.
+ */
+#ifndef MPACK_EXPECT
+#define MPACK_EXPECT 1
+#endif
+
+/**
+ * @def MPACK_NODE
+ *
+ * Enables compilation of the dynamic Node API.
+ */
+#ifndef MPACK_NODE
+#define MPACK_NODE 1
+#endif
+
+/**
+ * @def MPACK_WRITER
+ *
+ * Enables compilation of the Writer.
+ */
+#ifndef MPACK_WRITER
+#define MPACK_WRITER 1
+#endif
+
+/**
+ * @def MPACK_BUILDER
+ *
+ * Enables compilation of the Builder.
+ *
+ * The Builder API provides additional functions to the Writer for
+ * automatically determining the element count of compound elements so you do
+ * not have to specify them up-front.
+ *
+ * This requires a @c malloc(). It is enabled by default if MPACK_WRITER is
+ * enabled and MPACK_MALLOC is defined.
+ *
+ * @see mpack_build_map()
+ * @see mpack_build_array()
+ * @see mpack_complete_map()
+ * @see mpack_complete_array()
+ */
+// This is defined furthur below after we've resolved whether we have malloc().
+
+/**
+ * @def MPACK_COMPATIBILITY
+ *
+ * Enables compatibility features for reading and writing older
+ * versions of MessagePack.
+ *
+ * This is disabled by default. When disabled, the behaviour is equivalent to
+ * using the default version, @ref mpack_version_current.
+ *
+ * Enable this if you need to interoperate with applications or data that do
+ * not support the new (v5) MessagePack spec. See the section on v4
+ * compatibility in @ref docs/protocol.md for more information.
+ */
+#ifndef MPACK_COMPATIBILITY
+#define MPACK_COMPATIBILITY 0
+#endif
+
+/**
+ * @def MPACK_EXTENSIONS
+ *
+ * Enables the use of extension types.
+ *
+ * This is disabled by default. Define it to 1 to enable it. If disabled,
+ * functions to read and write extensions will not exist, and any occurrence of
+ * extension types in parsed messages will flag @ref mpack_error_invalid.
+ *
+ * MPack discourages the use of extension types. See the section on extension
+ * types in @ref docs/protocol.md for more information.
+ */
+#ifndef MPACK_EXTENSIONS
+#define MPACK_EXTENSIONS 0
+#endif
+
+/**
+ * @}
+ */
+
+
+
+// workarounds for Doxygen
+#if defined(MPACK_DOXYGEN)
+#if MPACK_DOXYGEN
+// We give these their default values of 0 here even though they are defined to
+// 1 in the doxyfile. Doxygen will show this as the value in the docs, even
+// though it ignores it when parsing the rest of the source. This is what we
+// want, since we want the documentation to show these defaults but still
+// generate documentation for the functions they add when they're on.
+#define MPACK_COMPATIBILITY 0
+#define MPACK_EXTENSIONS 0
+#endif
+#endif
+
+
+
+/**
+ * @name Dependencies
+ * @{
+ */
+
+/**
+ * @def MPACK_CONFORMING
+ *
+ * Enables the inclusion of basic C headers to define standard types and
+ * macros.
+ *
+ * This causes MPack to include headers required for conforming implementations
+ * of C99 even in freestanding, in particular <stddef.h>, <stdint.h>,
+ * <stdbool.h> and <limits.h>. It also includes <inttypes.h>; this is
+ * technically not required for freestanding but MPack needs it to detect
+ * integer limits.
+ *
+ * You can disable this if these headers are unavailable or if they do not
+ * define the standard types and macros (for example inside the Linux kernel.)
+ * If this is disabled, MPack will include no headers and will assume a 32-bit
+ * int. You will probably also want to define @ref MPACK_HAS_CONFIG to 1 and
+ * include your own headers in the config file. You must provide definitions
+ * for standard types such as @c size_t, @c bool, @c int32_t and so on.
+ *
+ * @see <a href="https://en.cppreference.com/w/c/language/conformance">
+ * cppreference.com documentation on Conformance</a>
+ */
+#ifndef MPACK_CONFORMING
+    #define MPACK_CONFORMING 1
+#endif
+
+/**
+ * @def MPACK_STDLIB
+ *
+ * Enables the use of the C stdlib.
+ *
+ * This allows the library to use basic functions like @c memcmp() and @c
+ * strlen(), as well as @c malloc() for debugging and in allocation helpers.
+ *
+ * If this is disabled, allocation helper functions will not be defined, and
+ * MPack will attempt to detect compiler intrinsics for functions like @c
+ * memcmp() (assuming @ref MPACK_NO_BUILTINS is not set.) It will fallback to
+ * its own (slow) implementations if it cannot use builtins. You may want to
+ * define @ref MPACK_MEMCMP and friends if you disable this.
+ *
+ * @see MPACK_MEMCMP
+ * @see MPACK_MEMCPY
+ * @see MPACK_MEMMOVE
+ * @see MPACK_MEMSET
+ * @see MPACK_STRLEN
+ * @see MPACK_MALLOC
+ * @see MPACK_REALLOC
+ * @see MPACK_FREE
+ */
+#ifndef MPACK_STDLIB
+    #if !MPACK_CONFORMING
+        // If we don't even have a proper <limits.h> we assume we won't have
+        // malloc() either.
+        #define MPACK_STDLIB 0
+    #else
+        #define MPACK_STDLIB 1
+    #endif
+#endif
+
+/**
+ * @def MPACK_STDIO
+ *
+ * Enables the use of C stdio. This adds helpers for easily
+ * reading/writing C files and makes debugging easier.
+ */
+#ifndef MPACK_STDIO
+    #if !MPACK_STDLIB || defined(__AVR__)
+        #define MPACK_STDIO 0
+    #else
+        #define MPACK_STDIO 1
+    #endif
+#endif
+
+/**
+ * Whether the 'float' type and floating point operations are supported.
+ *
+ * If @ref MPACK_FLOAT is disabled, floats are read and written as @c uint32_t
+ * instead. This way messages with floats do not result in errors and you can
+ * still perform manual float parsing yourself.
+ */
+#ifndef MPACK_FLOAT
+    #define MPACK_FLOAT 1
+#endif
+
+/**
+ * Whether the 'double' type is supported. This requires support for 'float'.
+ *
+ * If @ref MPACK_DOUBLE is disabled, doubles are read and written as @c
+ * uint32_t instead. This way messages with doubles do not result in errors and
+ * you can still perform manual doubles parsing yourself.
+ *
+ * If @ref MPACK_FLOAT is enabled but @ref MPACK_DOUBLE is not, doubles can be
+ * read as floats using the shortening conversion functions, e.g. @ref
+ * mpack_expect_float() or @ref mpack_node_float().
+ */
+#ifndef MPACK_DOUBLE
+    #if !MPACK_FLOAT || defined(__AVR__)
+        // AVR supports only float, not double.
+        #define MPACK_DOUBLE 0
+    #else
+        #define MPACK_DOUBLE 1
+    #endif
+#endif
+
+/**
+ * @}
+ */
+
+
+
+/**
+ * @name Allocation Functions
+ * @{
+ */
+
+/**
+ * @def MPACK_MALLOC
+ *
+ * Defines the memory allocation function used by MPack. This is used by
+ * helpers for automatically allocating data the correct size, and for
+ * debugging functions. If this macro is undefined, the allocation helpers
+ * will not be compiled.
+ *
+ * Set this to use a custom @c malloc() function. Your function must have the
+ * signature:
+ *
+ * @code
+ * void* malloc(size_t size);
+ * @endcode
+ *
+ * The default is @c malloc() if @ref MPACK_STDLIB is enabled.
+ */
+/**
+ * @def MPACK_FREE
+ *
+ * Defines the memory free function used by MPack. This is used by helpers
+ * for automatically allocating data the correct size. If this macro is
+ * undefined, the allocation helpers will not be compiled.
+ *
+ * Set this to use a custom @c free() function. Your function must have the
+ * signature:
+ *
+ * @code
+ * void free(void* p);
+ * @endcode
+ *
+ * The default is @c free() if @ref MPACK_MALLOC has not been customized and
+ * @ref MPACK_STDLIB is enabled.
+ */
+/**
+ * @def MPACK_REALLOC
+ *
+ * Defines the realloc function used by MPack. It is used by growable
+ * buffers to resize more efficiently.
+ *
+ * The default is @c realloc() if @ref MPACK_MALLOC has not been customized and
+ * @ref MPACK_STDLIB is enabled.
+ *
+ * Set this to use a custom @c realloc() function. Your function must have the
+ * signature:
+ *
+ * @code
+ * void* realloc(void* p, size_t new_size);
+ * @endcode
+ *
+ * This is optional, even when @ref MPACK_MALLOC is used. If @ref MPACK_MALLOC is
+ * set and @ref MPACK_REALLOC is not, @ref MPACK_MALLOC is used with a simple copy
+ * to grow buffers.
+ */
+
+#if defined(MPACK_MALLOC) && !defined(MPACK_FREE)
+    #error "MPACK_MALLOC requires MPACK_FREE."
+#endif
+#if !defined(MPACK_MALLOC) && defined(MPACK_FREE)
+    #error "MPACK_FREE requires MPACK_MALLOC."
+#endif
+
+// These were never configurable in lowercase but we check anyway.
+#ifdef mpack_malloc
+    #error "Define MPACK_MALLOC, not mpack_malloc."
+#endif
+#ifdef mpack_realloc
+    #error "Define MPACK_REALLOC, not mpack_realloc."
+#endif
+#ifdef mpack_free
+    #error "Define MPACK_FREE, not mpack_free."
+#endif
+
+// We don't use calloc() at all.
+#ifdef MPACK_CALLOC
+    #error "Don't define MPACK_CALLOC. MPack does not use calloc()."
+#endif
+#ifdef mpack_calloc
+    #error "Don't define mpack_calloc. MPack does not use calloc()."
+#endif
+
+// Use defaults in stdlib if we have them. Without it we don't use malloc.
+#if defined(MPACK_STDLIB)
+    #if MPACK_STDLIB && !defined(MPACK_MALLOC)
+        #define MPACK_MALLOC malloc
+        #define MPACK_REALLOC realloc
+        #define MPACK_FREE free
+    #endif
+#endif
+
+/**
+ * @}
+ */
+
+
+
+// This needs to be defined after we've decided whether we have malloc().
+#ifndef MPACK_BUILDER
+    #if defined(MPACK_MALLOC) && MPACK_WRITER
+        #define MPACK_BUILDER 1
+    #else
+        #define MPACK_BUILDER 0
+    #endif
+#endif
+
+
+
+/**
+ * @name System Functions
+ * @{
+ */
+
+/**
+ * @def MPACK_MEMCMP
+ *
+ * The function MPack will use to provide @c memcmp().
+ *
+ * Set this to use a custom @c memcmp() function. Your function must have the
+ * signature:
+ *
+ * @code
+ * int memcmp(const void* left, const void* right, size_t count);
+ * @endcode
+ */
+/**
+ * @def MPACK_MEMCPY
+ *
+ * The function MPack will use to provide @c memcpy().
+ *
+ * Set this to use a custom @c memcpy() function. Your function must have the
+ * signature:
+ *
+ * @code
+ * void* memcpy(void* restrict dest, const void* restrict src, size_t count);
+ * @endcode
+ */
+/**
+ * @def MPACK_MEMMOVE
+ *
+ * The function MPack will use to provide @c memmove().
+ *
+ * Set this to use a custom @c memmove() function. Your function must have the
+ * signature:
+ *
+ * @code
+ * void* memmove(void* dest, const void* src, size_t count);
+ * @endcode
+ */
+/**
+ * @def MPACK_MEMSET
+ *
+ * The function MPack will use to provide @c memset().
+ *
+ * Set this to use a custom @c memset() function. Your function must have the
+ * signature:
+ *
+ * @code
+ * void* memset(void* p, int c, size_t count);
+ * @endcode
+ */
+/**
+ * @def MPACK_STRLEN
+ *
+ * The function MPack will use to provide @c strlen().
+ *
+ * Set this to use a custom @c strlen() function. Your function must have the
+ * signature:
+ *
+ * @code
+ * size_t strlen(const char* str);
+ * @endcode
+ */
+
+// These were briefly configurable in lowercase in an unreleased version. Just
+// to make sure no one is doing this, we make sure these aren't already defined.
+#ifdef mpack_memcmp
+    #error "Define MPACK_MEMCMP, not mpack_memcmp."
+#endif
+#ifdef mpack_memcpy
+    #error "Define MPACK_MEMCPY, not mpack_memcpy."
+#endif
+#ifdef mpack_memmove
+    #error "Define MPACK_MEMMOVE, not mpack_memmove."
+#endif
+#ifdef mpack_memset
+    #error "Define MPACK_MEMSET, not mpack_memset."
+#endif
+#ifdef mpack_strlen
+    #error "Define MPACK_STRLEN, not mpack_strlen."
+#endif
+
+// If the standard library is available, we prefer to use its functions.
+#if MPACK_STDLIB
+    #ifndef MPACK_MEMCMP
+        #define MPACK_MEMCMP memcmp
+    #endif
+    #ifndef MPACK_MEMCPY
+        #define MPACK_MEMCPY memcpy
+    #endif
+    #ifndef MPACK_MEMMOVE
+        #define MPACK_MEMMOVE memmove
+    #endif
+    #ifndef MPACK_MEMSET
+        #define MPACK_MEMSET memset
+    #endif
+    #ifndef MPACK_STRLEN
+        #define MPACK_STRLEN strlen
+    #endif
+#endif
+
+#if !MPACK_NO_BUILTINS
+    #ifdef __has_builtin
+        #if !defined(MPACK_MEMCMP) && __has_builtin(__builtin_memcmp)
+            #define MPACK_MEMCMP __builtin_memcmp
+        #endif
+        #if !defined(MPACK_MEMCPY) && __has_builtin(__builtin_memcpy)
+            #define MPACK_MEMCPY __builtin_memcpy
+        #endif
+        #if !defined(MPACK_MEMMOVE) && __has_builtin(__builtin_memmove)
+            #define MPACK_MEMMOVE __builtin_memmove
+        #endif
+        #if !defined(MPACK_MEMSET) && __has_builtin(__builtin_memset)
+            #define MPACK_MEMSET __builtin_memset
+        #endif
+        #if !defined(MPACK_STRLEN) && __has_builtin(__builtin_strlen)
+            #define MPACK_STRLEN __builtin_strlen
+        #endif
+    #elif defined(__GNUC__)
+        #ifndef MPACK_MEMCMP
+            #define MPACK_MEMCMP __builtin_memcmp
+        #endif
+        #ifndef MPACK_MEMCPY
+            #define MPACK_MEMCPY __builtin_memcpy
+        #endif
+        // There's not always a builtin memmove for GCC. If we can't test for
+        // it with __has_builtin above, we don't use it. It's been around for
+        // much longer under clang, but then so has __has_builtin, so we let
+        // the block above handle it.
+        #ifndef MPACK_MEMSET
+            #define MPACK_MEMSET __builtin_memset
+        #endif
+        #ifndef MPACK_STRLEN
+            #define MPACK_STRLEN __builtin_strlen
+        #endif
+    #endif
+#endif
+
+/**
+ * @}
+ */
+
+
+
+/**
+ * @name Debugging Options
+ * @{
+ */
+
+/**
+ * @def MPACK_DEBUG
+ *
+ * Enables debug features. You may want to wrap this around your
+ * own debug preprocs. By default, this is enabled if @c DEBUG or @c _DEBUG
+ * are defined. (@c NDEBUG is not used since it is allowed to have
+ * different values in different translation units.)
+ */
+#if !defined(MPACK_DEBUG)
+    #if defined(DEBUG) || defined(_DEBUG)
+        #define MPACK_DEBUG 1
+    #else
+        #define MPACK_DEBUG 0
+    #endif
+#endif
+
+/**
+ * @def MPACK_STRINGS
+ *
+ * Enables descriptive error and type strings.
+ *
+ * This can be turned off (by defining it to 0) to maximize space savings
+ * on embedded devices. If this is disabled, string functions such as
+ * mpack_error_to_string() and mpack_type_to_string() return an empty string.
+ */
+#ifndef MPACK_STRINGS
+    #ifdef __AVR__
+        #define MPACK_STRINGS 0
+    #else
+        #define MPACK_STRINGS 1
+    #endif
+#endif
+
+/**
+ * Set this to 1 to implement a custom @ref mpack_assert_fail() function.
+ * See the documentation on @ref mpack_assert_fail() for details.
+ *
+ * Asserts are only used when @ref MPACK_DEBUG is enabled, and can be
+ * triggered by bugs in MPack or bugs due to incorrect usage of MPack.
+ */
+#ifndef MPACK_CUSTOM_ASSERT
+#define MPACK_CUSTOM_ASSERT 0
+#endif
+
+/**
+ * @def MPACK_READ_TRACKING
+ *
+ * Enables compound type size tracking for readers. This ensures that the
+ * correct number of elements or bytes are read from a compound type.
+ *
+ * This is enabled by default in debug builds (provided a @c malloc() is
+ * available.)
+ */
+#if !defined(MPACK_READ_TRACKING)
+    #if MPACK_DEBUG && MPACK_READER && defined(MPACK_MALLOC)
+        #define MPACK_READ_TRACKING 1
+    #else
+        #define MPACK_READ_TRACKING 0
+    #endif
+#endif
+#if MPACK_READ_TRACKING && !MPACK_READER
+    #error "MPACK_READ_TRACKING requires MPACK_READER."
+#endif
+
+/**
+ * @def MPACK_WRITE_TRACKING
+ *
+ * Enables compound type size tracking for writers. This ensures that the
+ * correct number of elements or bytes are written in a compound type.
+ *
+ * Note that without write tracking enabled, it is possible for buggy code
+ * to emit invalid MessagePack without flagging an error by writing the wrong
+ * number of elements or bytes in a compound type. With tracking enabled,
+ * MPack will catch such errors and break on the offending line of code.
+ *
+ * This is enabled by default in debug builds (provided a @c malloc() is
+ * available.)
+ */
+#if !defined(MPACK_WRITE_TRACKING)
+    #if MPACK_DEBUG && MPACK_WRITER && defined(MPACK_MALLOC)
+        #define MPACK_WRITE_TRACKING 1
+    #else
+        #define MPACK_WRITE_TRACKING 0
+    #endif
+#endif
+#if MPACK_WRITE_TRACKING && !MPACK_WRITER
+    #error "MPACK_WRITE_TRACKING requires MPACK_WRITER."
+#endif
+
+/**
+ * @}
+ */
+
+
+
+
+/**
+ * @name Miscellaneous Options
+ * @{
+ */
+
+/**
+ * Whether to optimize for size or speed.
+ *
+ * Optimizing for size simplifies some parsing and encoding algorithms
+ * at the expense of speed and saves a few kilobytes of space in the
+ * resulting executable.
+ *
+ * This automatically detects -Os with GCC/Clang. Unfortunately there
+ * doesn't seem to be a macro defined for /Os under MSVC.
+ */
+#ifndef MPACK_OPTIMIZE_FOR_SIZE
+    #ifdef __OPTIMIZE_SIZE__
+        #define MPACK_OPTIMIZE_FOR_SIZE 1
+    #else
+        #define MPACK_OPTIMIZE_FOR_SIZE 0
+    #endif
+#endif
+
+/**
+ * Stack space in bytes to use when initializing a reader or writer
+ * with a stack-allocated buffer.
+ *
+ * @warning Make sure you have sufficient stack space. Some libc use relatively
+ * small stacks even on desktop platforms, e.g. musl.
+ */
+#ifndef MPACK_STACK_SIZE
+#define MPACK_STACK_SIZE 4096
+#endif
+
+/**
+ * Buffer size to use for allocated buffers (such as for a file writer.)
+ *
+ * Starting with a single page and growing as needed seems to
+ * provide the best performance with minimal memory waste.
+ * Increasing this does not improve performance even when writing
+ * huge messages.
+ */
+#ifndef MPACK_BUFFER_SIZE
+#define MPACK_BUFFER_SIZE 4096
+#endif
+
+/**
+ * Minimum size for paged allocations in bytes.
+ *
+ * This is the value used by default for MPACK_NODE_PAGE_SIZE and
+ * MPACK_BUILDER_PAGE_SIZE.
+ */
+#ifndef MPACK_PAGE_SIZE
+#define MPACK_PAGE_SIZE 4096
+#endif
+
+/**
+ * Minimum size of an allocated node page in bytes.
+ *
+ * The children for a given compound element must be contiguous, so
+ * larger pages than this may be allocated as needed. (Safety checks
+ * exist to prevent malicious data from causing too large allocations.)
+ *
+ * See @ref mpack_node_data_t for the size of nodes.
+ *
+ * Using as many nodes fit in one memory page seems to provide the
+ * best performance, and has very little waste when parsing small
+ * messages.
+ */
+#ifndef MPACK_NODE_PAGE_SIZE
+#define MPACK_NODE_PAGE_SIZE MPACK_PAGE_SIZE
+#endif
+
+/**
+ * Minimum size of an allocated builder page in bytes.
+ *
+ * Builder writes are deferred to the allocated builder buffer which is
+ * composed of a list of buffer pages. This defines the size of those pages.
+ */
+#ifndef MPACK_BUILDER_PAGE_SIZE
+#define MPACK_BUILDER_PAGE_SIZE MPACK_PAGE_SIZE
+#endif
+
+/**
+ * @def MPACK_BUILDER_INTERNAL_STORAGE
+ *
+ * Enables a small amount of internal storage within the writer to avoid some
+ * allocations when using builders.
+ *
+ * This is disabled by default. Enable it to potentially improve performance at
+ * the expense of a larger writer.
+ *
+ * @see MPACK_BUILDER_INTERNAL_STORAGE_SIZE to configure its size.
+ */
+#ifndef MPACK_BUILDER_INTERNAL_STORAGE
+#define MPACK_BUILDER_INTERNAL_STORAGE 0
+#endif
+
+/**
+ * Amount of space reserved inside @ref mpack_writer_t for the Builders. This
+ * can allow small messages to be built with the Builder API without incurring
+ * an allocation.
+ *
+ * Builder metadata is placed in this space in addition to the literal
+ * MessagePack data. It needs to be big enough to be useful, but not so big as
+ * to overflow the stack. If more space is needed, pages are allocated.
+ *
+ * This is only used if MPACK_BUILDER_INTERNAL_STORAGE is enabled.
+ *
+ * @see MPACK_BUILDER_PAGE_SIZE
+ * @see MPACK_BUILDER_INTERNAL_STORAGE
+ *
+ * @warning Writers are typically placed on the stack so make sure you have
+ * sufficient stack space. Some libc use relatively small stacks even on
+ * desktop platforms, e.g. musl.
+ */
+#ifndef MPACK_BUILDER_INTERNAL_STORAGE_SIZE
+#define MPACK_BUILDER_INTERNAL_STORAGE_SIZE 256
+#endif
+
+/**
+ * The initial depth for the node parser. When MPACK_MALLOC is available,
+ * the node parser has no practical depth limit, and it is not recursive
+ * so there is no risk of overflowing the call stack.
+ */
+#ifndef MPACK_NODE_INITIAL_DEPTH
+#define MPACK_NODE_INITIAL_DEPTH 8
+#endif
+
+/**
+ * The maximum depth for the node parser if @ref MPACK_MALLOC is not available.
+ */
+#ifndef MPACK_NODE_MAX_DEPTH_WITHOUT_MALLOC
+#define MPACK_NODE_MAX_DEPTH_WITHOUT_MALLOC 32
+#endif
+
+/**
+ * @def MPACK_NO_BUILTINS
+ *
+ * Whether to disable compiler intrinsics and other built-in functions.
+ *
+ * If this is enabled, MPack won't use `__attribute__`, `__declspec`, any
+ * function starting with `__builtin`, or pretty much anything else that isn't
+ * standard C.
+ */
+#if defined(MPACK_DOXYGEN)
+#if MPACK_DOXYGEN
+    #define MPACK_NO_BUILTINS 0
+#endif
+#endif
+
+/**
+ * @}
+ */
+
+
+
+#if MPACK_DEBUG
+/**
+ * @name Debug Functions
+ * @{
+ */
+/**
+ * Implement this and define @ref MPACK_CUSTOM_ASSERT to use a custom
+ * assertion function.
+ *
+ * This function should not return. If it does, MPack will @c abort().
+ *
+ * If you use C++, make sure you include @c mpack.h where you define
+ * this to get the correct linkage (or define it <code>extern "C"</code>.)
+ *
+ * Asserts are only used when @ref MPACK_DEBUG is enabled, and can be
+ * triggered by bugs in MPack or bugs due to incorrect usage of MPack.
+ */
+void mpack_assert_fail(const char* message);
+/**
+ * @}
+ */
+#endif
+
+
+
+// The rest of this file shouldn't show up in Doxygen docs.
+/** @cond */
+
+
+
+/*
+ * All remaining pseudo-configuration options that have not yet been set must
+ * be defined here in order to support -Wundef.
+ *
+ * These aren't real configuration options; they are implementation details of
+ * MPack.
+ */
+#ifndef MPACK_CUSTOM_BREAK
+#define MPACK_CUSTOM_BREAK 0
+#endif
+#ifndef MPACK_EMIT_INLINE_DEFS
+#define MPACK_EMIT_INLINE_DEFS 0
+#endif
+#ifndef MPACK_AMALGAMATED
+#define MPACK_AMALGAMATED 0
+#endif
+#ifndef MPACK_RELEASE_VERSION
+#define MPACK_RELEASE_VERSION 0
+#endif
+#ifndef MPACK_INTERNAL
+#define MPACK_INTERNAL 0
+#endif
+
+
+
+/* System headers (based on configuration) */
+
+#if MPACK_CONFORMING
+    #include <stddef.h>
+    #include <stdint.h>
+    #include <stdbool.h>
+    #include <inttypes.h>
+    #include <limits.h>
+#endif
+
+#if MPACK_STDLIB
+    #include <string.h>
+    #include <stdlib.h>
+#endif
+
+#if MPACK_STDIO
+    #include <stdio.h>
+    #include <errno.h>
+    #if MPACK_DEBUG
+        #include <stdarg.h>
+    #endif
+#endif
+
+
+
+/*
+ * Integer Constants and Limits
+ */
+
+#if MPACK_CONFORMING
+    #define MPACK_INT64_C INT64_C
+    #define MPACK_UINT64_C UINT64_C
+
+    #define MPACK_INT8_MIN INT8_MIN
+    #define MPACK_INT16_MIN INT16_MIN
+    #define MPACK_INT32_MIN INT32_MIN
+    #define MPACK_INT64_MIN INT64_MIN
+    #define MPACK_INT_MIN INT_MIN
+
+    #define MPACK_INT8_MAX INT8_MAX
+    #define MPACK_INT16_MAX INT16_MAX
+    #define MPACK_INT32_MAX INT32_MAX
+    #define MPACK_INT64_MAX INT64_MAX
+    #define MPACK_INT_MAX INT_MAX
+
+    #define MPACK_UINT8_MAX UINT8_MAX
+    #define MPACK_UINT16_MAX UINT16_MAX
+    #define MPACK_UINT32_MAX UINT32_MAX
+    #define MPACK_UINT64_MAX UINT64_MAX
+    #define MPACK_UINT_MAX UINT_MAX
+#else
+    // For a non-conforming implementation we assume int is 32 bits.
+
+    #define MPACK_INT64_C(x) ((int64_t)(x##LL))
+    #define MPACK_UINT64_C(x) ((uint64_t)(x##LLU))
+
+    #define MPACK_INT8_MIN ((int8_t)(0x80))
+    #define MPACK_INT16_MIN ((int16_t)(0x8000))
+    #define MPACK_INT32_MIN ((int32_t)(0x80000000))
+    #define MPACK_INT64_MIN MPACK_INT64_C(0x8000000000000000)
+    #define MPACK_INT_MIN MPACK_INT32_MIN
+
+    #define MPACK_INT8_MAX ((int8_t)(0x7f))
+    #define MPACK_INT16_MAX ((int16_t)(0x7fff))
+    #define MPACK_INT32_MAX ((int32_t)(0x7fffffff))
+    #define MPACK_INT64_MAX MPACK_INT64_C(0x7fffffffffffffff)
+    #define MPACK_INT_MAX MPACK_INT32_MAX
+
+    #define MPACK_UINT8_MAX ((uint8_t)(0xffu))
+    #define MPACK_UINT16_MAX ((uint16_t)(0xffffu))
+    #define MPACK_UINT32_MAX ((uint32_t)(0xffffffffu))
+    #define MPACK_UINT64_MAX MPACK_UINT64_C(0xffffffffffffffff)
+    #define MPACK_UINT_MAX MPACK_UINT32_MAX
+#endif
+
+
+
+/*
+ * Floating point support
+ */
+
+#if MPACK_DOUBLE && !MPACK_FLOAT
+    #error "MPACK_DOUBLE requires MPACK_FLOAT."
+#endif
+
+// If we don't have support for float or double, we poison the identifiers to
+// make sure we don't define anything related to them.
+#if MPACK_INTERNAL
+    #ifdef __GNUC__
+        #if !MPACK_FLOAT
+            #pragma GCC poison float
+        #endif
+        #if !MPACK_DOUBLE
+            #pragma GCC poison double
+        #endif
+    #endif
+#endif
+
+
+
+/*
+ * extern C
+ */
+
+#ifdef __cplusplus
+    #define MPACK_EXTERN_C_BEGIN extern "C" {
+    #define MPACK_EXTERN_C_END   }
+#else
+    #define MPACK_EXTERN_C_BEGIN /*nothing*/
+    #define MPACK_EXTERN_C_END   /*nothing*/
+#endif
+
+
+
+/*
+ * Warnings
+ */
+
+#if defined(__GNUC__)
+    // Diagnostic push is not supported before GCC 4.6.
+    #if defined(__clang__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+        #define MPACK_SILENCE_WARNINGS_PUSH _Pragma ("GCC diagnostic push")
+        #define MPACK_SILENCE_WARNINGS_POP _Pragma ("GCC diagnostic pop")
+    #endif
+#elif defined(_MSC_VER)
+    // To support VS2017 and earlier we need to use __pragma and not _Pragma
+    #define MPACK_SILENCE_WARNINGS_PUSH __pragma(warning(push))
+    #define MPACK_SILENCE_WARNINGS_POP __pragma(warning(pop))
+#endif
+
+#if defined(_MSC_VER)
+    // These are a bunch of mostly useless warnings emitted under MSVC /W4,
+    // some as a result of the expansion of macros.
+    #define MPACK_SILENCE_WARNINGS_MSVC_W4 \
+            __pragma(warning(disable:4996)) /* _CRT_SECURE_NO_WARNINGS */ \
+            __pragma(warning(disable:4127)) /* comparison is constant */ \
+            __pragma(warning(disable:4702)) /* unreachable code */ \
+            __pragma(warning(disable:4310)) /* cast truncates constant value */
+#else
+    #define MPACK_SILENCE_WARNINGS_MSVC_W4 /*nothing*/
+#endif
+
+/* GCC versions before 5.1 warn about defining a C99 non-static inline function
+ * before declaring it (see issue #20). */
+#if defined(__GNUC__) && !defined(__clang__)
+    #if __GNUC__ < 5 || (__GNUC__ == 5 && __GNUC_MINOR__ < 1)
+        #ifdef __cplusplus
+            #define MPACK_SILENCE_WARNINGS_MISSING_PROTOTYPES \
+                _Pragma ("GCC diagnostic ignored \"-Wmissing-declarations\"")
+        #else
+            #define MPACK_SILENCE_WARNINGS_MISSING_PROTOTYPES \
+                _Pragma ("GCC diagnostic ignored \"-Wmissing-prototypes\"")
+        #endif
+    #endif
+#endif
+#ifndef MPACK_SILENCE_WARNINGS_MISSING_PROTOTYPES
+    #define MPACK_SILENCE_WARNINGS_MISSING_PROTOTYPES /*nothing*/
+#endif
+
+/* GCC versions before 4.8 warn about shadowing a function with a variable that
+ * isn't a function or function pointer (like "index"). */
+#if defined(__GNUC__) && !defined(__clang__)
+    #if __GNUC__ == 4 && __GNUC_MINOR__ < 8
+        #define MPACK_SILENCE_WARNINGS_SHADOW \
+            _Pragma ("GCC diagnostic ignored \"-Wshadow\"")
+    #endif
+#endif
+#ifndef MPACK_SILENCE_WARNINGS_SHADOW
+    #define MPACK_SILENCE_WARNINGS_SHADOW /*nothing*/
+#endif
+
+// On platforms with small size_t (e.g. AVR) we get type limits warnings where
+// we compare a size_t to e.g. MPACK_UINT32_MAX.
+#ifdef __AVR__
+    #define MPACK_SILENCE_WARNINGS_TYPE_LIMITS \
+        _Pragma ("GCC diagnostic ignored \"-Wtype-limits\"")
+#else
+    #define MPACK_SILENCE_WARNINGS_TYPE_LIMITS /*nothing*/
+#endif
+
+// MPack uses declarations after statements. This silences warnings about it
+// (e.g. when using MPack in a Linux kernel module.)
+#if defined(__GNUC__) && !defined(__cplusplus)
+    #define MPACK_SILENCE_WARNINGS_DECLARATION_AFTER_STATEMENT \
+        _Pragma ("GCC diagnostic ignored \"-Wdeclaration-after-statement\"")
+#else
+    #define MPACK_SILENCE_WARNINGS_DECLARATION_AFTER_STATEMENT /*nothing*/
+#endif
+
+#ifdef MPACK_SILENCE_WARNINGS_PUSH
+    // We only silence warnings if push/pop is supported, that way we aren't
+    // silencing warnings in code that uses MPack. If your compiler doesn't
+    // support push/pop silencing of warnings, you'll have to turn off
+    // conflicting warnings manually.
+
+    #define MPACK_SILENCE_WARNINGS_BEGIN \
+        MPACK_SILENCE_WARNINGS_PUSH \
+        MPACK_SILENCE_WARNINGS_MSVC_W4 \
+        MPACK_SILENCE_WARNINGS_MISSING_PROTOTYPES \
+        MPACK_SILENCE_WARNINGS_SHADOW \
+        MPACK_SILENCE_WARNINGS_TYPE_LIMITS \
+        MPACK_SILENCE_WARNINGS_DECLARATION_AFTER_STATEMENT
+
+    #define MPACK_SILENCE_WARNINGS_END \
+        MPACK_SILENCE_WARNINGS_POP
+#else
+    #define MPACK_SILENCE_WARNINGS_BEGIN /*nothing*/
+    #define MPACK_SILENCE_WARNINGS_END /*nothing*/
+#endif
+
+MPACK_SILENCE_WARNINGS_BEGIN
+MPACK_EXTERN_C_BEGIN
+
+
+
+/* Miscellaneous helper macros */
+
+#define MPACK_UNUSED(var) ((void)(var))
+
+#define MPACK_STRINGIFY_IMPL(arg) #arg
+#define MPACK_STRINGIFY(arg) MPACK_STRINGIFY_IMPL(arg)
+
+// This is a workaround for MSVC's incorrect expansion of __VA_ARGS__. It
+// treats __VA_ARGS__ as a single preprocessor token when passed in the
+// argument list of another macro unless we use an outer wrapper to expand it
+// lexically first. (For safety/consistency we use this in all variadic macros
+// that don't ignore the variadic arguments regardless of whether __VA_ARGS__
+// is passed to another macro.)
+//     https://stackoverflow.com/a/32400131
+#define MPACK_EXPAND(x) x
+
+// Extracts the first argument of a variadic macro list, where there might only
+// be one argument.
+#define MPACK_EXTRACT_ARG0_IMPL(first, ...) first
+#define MPACK_EXTRACT_ARG0(...) MPACK_EXPAND(MPACK_EXTRACT_ARG0_IMPL( __VA_ARGS__ , ignored))
+
+// Stringifies the first argument of a variadic macro list, where there might
+// only be one argument.
+#define MPACK_STRINGIFY_ARG0_impl(first, ...) #first
+#define MPACK_STRINGIFY_ARG0(...) MPACK_EXPAND(MPACK_STRINGIFY_ARG0_impl( __VA_ARGS__ , ignored))
+
+
+
+/*
+ * Definition of inline macros.
+ *
+ * MPack does not use static inline in header files; only one non-inline definition
+ * of each function should exist in the final build. This can reduce the binary size
+ * in cases where the compiler cannot or chooses not to inline a function.
+ * The addresses of functions should also compare equal across translation units
+ * regardless of whether they are declared inline.
+ *
+ * The above requirements mean that the declaration and definition of non-trivial
+ * inline functions must be separated so that the definitions will only
+ * appear when necessary. In addition, three different linkage models need
+ * to be supported:
+ *
+ *  - The C99 model, where a standalone function is emitted only if there is any
+ *    `extern inline` or non-`inline` declaration (including the definition itself)
+ *
+ *  - The GNU model, where an `inline` definition emits a standalone function and an
+ *    `extern inline` definition does not, regardless of other declarations
+ *
+ *  - The C++ model, where `inline` emits a standalone function with special
+ *    (COMDAT) linkage
+ *
+ * The macros below wrap up everything above. All inline functions defined in header
+ * files have a single non-inline definition emitted in the compilation of
+ * mpack-platform.c. All inline declarations and definitions use the same MPACK_INLINE
+ * specification to simplify the rules on when standalone functions are emitted.
+ * Inline functions in source files are defined MPACK_STATIC_INLINE.
+ *
+ * Additional reading:
+ *     http://www.greenend.org.uk/rjk/tech/inline.html
+ */
+
+#if defined(__cplusplus)
+    // C++ rules
+    // The linker will need COMDAT support to link C++ object files,
+    // so we don't need to worry about emitting definitions from C++
+    // translation units. If mpack-platform.c (or the amalgamation)
+    // is compiled as C, its definition will be used, otherwise a
+    // C++ definition will be used, and no other C files will emit
+    // a definition.
+    #define MPACK_INLINE inline
+
+#elif defined(_MSC_VER)
+    // MSVC 2013 always uses COMDAT linkage, but it doesn't treat 'inline' as a
+    // keyword in C99 mode. (This appears to be fixed in a later version of
+    // MSVC but we don't bother detecting it.)
+    #define MPACK_INLINE __inline
+    #define MPACK_STATIC_INLINE static __inline
+
+#elif defined(__GNUC__) && (defined(__GNUC_GNU_INLINE__) || \
+        (!defined(__GNUC_STDC_INLINE__) && !defined(__GNUC_GNU_INLINE__)))
+    // GNU rules
+    #if MPACK_EMIT_INLINE_DEFS
+        #define MPACK_INLINE inline
+    #else
+        #define MPACK_INLINE extern inline
+    #endif
+
+#elif defined(__TINYC__)
+    // tcc ignores the inline keyword, so we have to use static inline. We
+    // issue a warning to make sure you are aware. You can define the below
+    // macro to disable the warning. Hopefully this will be fixed soon:
+    //     https://lists.nongnu.org/archive/html/tinycc-devel/2019-06/msg00000.html
+    #ifndef MPACK_DISABLE_TINYC_INLINE_WARNING
+        #warning "Single-definition inline is not supported by tcc. All inlines will be static. Define MPACK_DISABLE_TINYC_INLINE_WARNING to disable this warning."
+    #endif
+    #define MPACK_INLINE static inline
+
+#else
+    // C99 rules
+    #if MPACK_EMIT_INLINE_DEFS
+        #define MPACK_INLINE extern inline
+    #else
+        #define MPACK_INLINE inline
+    #endif
+#endif
+
+#ifndef MPACK_STATIC_INLINE
+#define MPACK_STATIC_INLINE static inline
+#endif
+
+#ifdef MPACK_OPTIMIZE_FOR_SPEED
+    #error "You should define MPACK_OPTIMIZE_FOR_SIZE, not MPACK_OPTIMIZE_FOR_SPEED."
+#endif
+
+
+
+/*
+ * Prevent inlining
+ *
+ * When a function is only used once, it is almost always inlined
+ * automatically. This can cause poor instruction cache usage because a
+ * function that should rarely be called (such as buffer exhaustion handling)
+ * will get inlined into the middle of a hot code path.
+ */
+
+#if !MPACK_NO_BUILTINS
+    #if defined(_MSC_VER)
+        #define MPACK_NOINLINE __declspec(noinline)
+    #elif defined(__GNUC__) || defined(__clang__)
+        #define MPACK_NOINLINE __attribute__((__noinline__))
+    #endif
+#endif
+#ifndef MPACK_NOINLINE
+    #define MPACK_NOINLINE /* nothing */
+#endif
+
+
+
+/* restrict */
+
+// We prefer the builtins even though e.g. MSVC's __restrict may not have
+// exactly the same behaviour as the proper C99 restrict keyword because the
+// builtins work in C++, so using the same keyword in both C and C++ prevents
+// any incompatibilities when using MPack compiled as C in C++ code.
+#if !MPACK_NO_BUILTINS
+    #if defined(__GNUC__)
+        #define MPACK_RESTRICT __restrict__
+    #elif defined(_MSC_VER)
+        #define MPACK_RESTRICT __restrict
+    #endif
+#endif
+
+#ifndef MPACK_RESTRICT
+    #ifdef __cplusplus
+        #define MPACK_RESTRICT /* nothing, unavailable in C++ */
+    #endif
+#endif
+
+#ifndef MPACK_RESTRICT
+    #ifdef _MSC_VER
+        // MSVC 2015 apparently doesn't properly support the restrict keyword
+        // in C. We're using builtins above which do work on 2015, but when
+        // MPACK_NO_BUILTINS is enabled we can't use it.
+        #if _MSC_VER < 1910
+            #define MPACK_RESTRICT /*nothing*/
+        #endif
+    #endif
+#endif
+
+#ifndef MPACK_RESTRICT
+    #define MPACK_RESTRICT restrict /* required in C99 */
+#endif
+
+
+
+/* Some compiler-specific keywords and builtins */
+
+#if !MPACK_NO_BUILTINS
+    #if defined(__GNUC__) || defined(__clang__)
+        #define MPACK_UNREACHABLE __builtin_unreachable()
+        #define MPACK_NORETURN(fn) fn __attribute__((__noreturn__))
+    #elif defined(_MSC_VER)
+        #define MPACK_UNREACHABLE __assume(0)
+        #define MPACK_NORETURN(fn) __declspec(noreturn) fn
+    #endif
+#endif
+
+#ifndef MPACK_UNREACHABLE
+#define MPACK_UNREACHABLE ((void)0)
+#endif
+#ifndef MPACK_NORETURN
+#define MPACK_NORETURN(fn) fn
+#endif
+
+
+
+/*
+ * Likely/unlikely
+ *
+ * These should only really be used when a branch is taken (or not taken) less
+ * than 1/1000th of the time. Buffer flush checks when writing very small
+ * elements are a good example.
+ */
+
+#if !MPACK_NO_BUILTINS
+    #if defined(__GNUC__) || defined(__clang__)
+        #ifndef MPACK_LIKELY
+            #define MPACK_LIKELY(x) __builtin_expect((x),1)
+        #endif
+        #ifndef MPACK_UNLIKELY
+            #define MPACK_UNLIKELY(x) __builtin_expect((x),0)
+        #endif
+    #endif
+#endif
+
+#ifndef MPACK_LIKELY
+    #define MPACK_LIKELY(x) (x)
+#endif
+#ifndef MPACK_UNLIKELY
+    #define MPACK_UNLIKELY(x) (x)
+#endif
+
+
+
+/* alignof */
+
+#ifndef MPACK_ALIGNOF
+    #if defined(__STDC_VERSION__)
+        #if __STDC_VERSION__ >= 201112L
+            #define MPACK_ALIGNOF(T) (_Alignof(T))
+        #endif
+    #endif
+#endif
+
+#ifndef MPACK_ALIGNOF
+    #if defined(__cplusplus)
+        #if __cplusplus >= 201103L
+            #define MPACK_ALIGNOF(T) (alignof(T))
+        #endif
+    #endif
+#endif
+
+#ifndef MPACK_ALIGNOF
+    #if defined(__GNUC__) && !defined(MPACK_NO_BUILTINS)
+        #if defined(__clang__) || __GNUC__ >= 4
+            #define MPACK_ALIGNOF(T) (__alignof__(T))
+        #endif
+    #endif
+#endif
+
+#ifndef MPACK_ALIGNOF
+    #ifdef _MSC_VER
+        #define MPACK_ALIGNOF(T) __alignof(T)
+    #endif
+#endif
+
+// MPACK_ALIGNOF may not exist, in which case a workaround is used.
+
+
+
+/* Static assert */
+
+#ifndef MPACK_STATIC_ASSERT
+    #if defined(__cplusplus)
+        #if __cplusplus >= 201103L
+            #define MPACK_STATIC_ASSERT static_assert
+        #endif
+    #elif defined(__STDC_VERSION__)
+        #if __STDC_VERSION__ >= 201112L
+            #define MPACK_STATIC_ASSERT _Static_assert
+        #endif
+    #endif
+#endif
+
+#if !MPACK_NO_BUILTINS
+    #ifndef MPACK_STATIC_ASSERT
+        #if defined(__has_feature)
+            #if __has_feature(cxx_static_assert)
+                #define MPACK_STATIC_ASSERT static_assert
+            #elif __has_feature(c_static_assert)
+                #define MPACK_STATIC_ASSERT _Static_assert
+            #endif
+        #endif
+    #endif
+
+    #ifndef MPACK_STATIC_ASSERT
+        #if defined(__GNUC__)
+            /* Diagnostic push is not supported before GCC 4.6. */
+            #if defined(__clang__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+                #ifndef __cplusplus
+                    #if defined(__clang__) || __GNUC__ >= 5
+                    #define MPACK_IGNORE_PEDANTIC "GCC diagnostic ignored \"-Wpedantic\""
+                    #else
+                    #define MPACK_IGNORE_PEDANTIC "GCC diagnostic ignored \"-pedantic\""
+                    #endif
+                    #define MPACK_STATIC_ASSERT(expr, str) do { \
+                        _Pragma ("GCC diagnostic push") \
+                        _Pragma (MPACK_IGNORE_PEDANTIC) \
+                        _Pragma ("GCC diagnostic ignored \"-Wc++-compat\"") \
+                        _Static_assert(expr, str); \
+                        _Pragma ("GCC diagnostic pop") \
+                    } while (0)
+                #endif
+            #endif
+        #endif
+    #endif
+
+    #ifndef MPACK_STATIC_ASSERT
+        #ifdef _MSC_VER
+            #if _MSC_VER >= 1600
+                #define MPACK_STATIC_ASSERT static_assert
+            #endif
+        #endif
+    #endif
+#endif
+
+#ifndef MPACK_STATIC_ASSERT
+    #define MPACK_STATIC_ASSERT(expr, str) (MPACK_UNUSED(sizeof(char[1 - 2*!(expr)])))
+#endif
+
+
+
+/* _Generic */
+
+#ifndef MPACK_HAS_GENERIC
+    #if defined(__clang__) && defined(__has_feature)
+        // With Clang we can test for _Generic support directly
+        // and ignore C/C++ version
+        #if __has_feature(c_generic_selections)
+            #define MPACK_HAS_GENERIC 1
+        #else
+            #define MPACK_HAS_GENERIC 0
+        #endif
+    #endif
+#endif
+
+#ifndef MPACK_HAS_GENERIC
+    #if defined(__STDC_VERSION__)
+        #if __STDC_VERSION__ >= 201112L
+            #if defined(__GNUC__) && !defined(__clang__)
+                // GCC does not have full C11 support in GCC 4.7 and 4.8
+                #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9)
+                    #define MPACK_HAS_GENERIC 1
+                #endif
+            #else
+                // We hope other compilers aren't lying about C11/_Generic support
+                #define MPACK_HAS_GENERIC 1
+            #endif
+        #endif
+    #endif
+#endif
+
+#ifndef MPACK_HAS_GENERIC
+    #define MPACK_HAS_GENERIC 0
+#endif
+
+
+
+/*
+ * Finite Math
+ *
+ * -ffinite-math-only, included in -ffast-math, breaks functions that
+ * that check for non-finite real values such as isnan() and isinf().
+ *
+ * We should use this to trap errors when reading data that contains
+ * non-finite reals. This isn't currently implemented.
+ */
+
+#ifndef MPACK_FINITE_MATH
+#if defined(__FINITE_MATH_ONLY__) && __FINITE_MATH_ONLY__
+#define MPACK_FINITE_MATH 1
+#endif
+#endif
+
+#ifndef MPACK_FINITE_MATH
+#define MPACK_FINITE_MATH 0
+#endif
+
+
+
+/*
+ * Endianness checks
+ *
+ * These define MPACK_NHSWAP*() which swap network<->host byte
+ * order when needed.
+ *
+ * We leave them undefined if we can't determine the endianness
+ * at compile-time, in which case we fall back to bit-shifts.
+ *
+ * See the notes in mpack-common.h.
+ */
+
+#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && defined(__ORDER_BIG_ENDIAN__)
+    #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+        #define MPACK_NHSWAP16(x) (x)
+        #define MPACK_NHSWAP32(x) (x)
+        #define MPACK_NHSWAP64(x) (x)
+    #elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+
+        #if !MPACK_NO_BUILTINS
+            #if defined(__clang__)
+                #ifdef __has_builtin
+                    // Unlike the GCC builtins, the bswap builtins in Clang
+                    // significantly improve ARM performance.
+                    #if __has_builtin(__builtin_bswap16)
+                        #define MPACK_NHSWAP16(x) __builtin_bswap16(x)
+                    #endif
+                    #if __has_builtin(__builtin_bswap32)
+                        #define MPACK_NHSWAP32(x) __builtin_bswap32(x)
+                    #endif
+                    #if __has_builtin(__builtin_bswap64)
+                        #define MPACK_NHSWAP64(x) __builtin_bswap64(x)
+                    #endif
+                #endif
+
+            #elif defined(__GNUC__)
+
+                // The GCC bswap builtins are apparently poorly optimized on older
+                // versions of GCC, so we set a minimum version here just in case.
+                //     http://hardwarebug.org/2010/01/14/beware-the-builtins/
+
+                #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
+                    #define MPACK_NHSWAP64(x) __builtin_bswap64(x)
+                #endif
+
+                // __builtin_bswap16() was not implemented on all platforms
+                // until GCC 4.8.0:
+                //     https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52624
+                //
+                // The 16- and 32-bit versions in GCC significantly reduce performance
+                // on ARM with little effect on code size so we don't use them.
+
+            #endif
+        #endif
+    #endif
+
+#elif defined(_MSC_VER) && defined(_WIN32) && MPACK_STDLIB && !MPACK_NO_BUILTINS
+
+    // On Windows, we assume x86 and x86_64 are always little-endian.
+    // We make no assumptions about ARM even though all current
+    // Windows Phone devices are little-endian in case Microsoft's
+    // compiler is ever used with a big-endian ARM device.
+
+    // These are functions in <stdlib.h> so we depend on MPACK_STDLIB.
+    // It's not clear if these are actually faster than just doing the
+    // swap manually; maybe we shouldn't bother with this.
+
+    #if defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64)
+        #define MPACK_NHSWAP16(x) _byteswap_ushort(x)
+        #define MPACK_NHSWAP32(x) _byteswap_ulong(x)
+        #define MPACK_NHSWAP64(x) _byteswap_uint64(x)
+    #endif
+
+#endif
+
+#if defined(__FLOAT_WORD_ORDER__) && defined(__BYTE_ORDER__)
+
+    // We check where possible that the float byte order matches the
+    // integer byte order. This is extremely unlikely to fail, but
+    // we check anyway just in case.
+    //
+    // (The static assert is placed in float/double encoders instead
+    // of here because our static assert fallback doesn't work at
+    // file scope)
+
+    #define MPACK_CHECK_FLOAT_ORDER() \
+        MPACK_STATIC_ASSERT(__FLOAT_WORD_ORDER__ == __BYTE_ORDER__, \
+            "float byte order does not match int byte order! float/double " \
+            "encoding is not properly implemented on this platform.")
+
+#endif
+
+#ifndef MPACK_CHECK_FLOAT_ORDER
+    #define MPACK_CHECK_FLOAT_ORDER() /* nothing */
+#endif
+
+
+/*
+ * Here we define mpack_assert() and mpack_break(). They both work like a normal
+ * assertion function in debug mode, causing a trap or abort. However, on some platforms
+ * you can safely resume execution from mpack_break(), whereas mpack_assert() is
+ * always fatal.
+ *
+ * In release mode, mpack_assert() is converted to an assurance to the compiler
+ * that the expression cannot be false (via e.g. __assume() or __builtin_unreachable())
+ * to improve optimization where supported. There is thus no point in "safely" handling
+ * the case of this being false. Writing mpack_assert(0) rarely makes sense (except
+ * possibly as a default handler in a switch) since the compiler will throw away any
+ * code after it. If at any time an mpack_assert() is not true, the behaviour is
+ * undefined. This also means the expression is evaluated even in release.
+ *
+ * mpack_break() on the other hand is compiled to nothing in release. It is
+ * used in situations where we want to highlight a programming error as early as
+ * possible (in the debugger), but we still handle the situation safely if it
+ * happens in release to avoid producing incorrect results (such as in
+ * MPACK_WRITE_TRACKING.) It does not take an expression to test because it
+ * belongs in a safe-handling block after its failing condition has been tested.
+ *
+ * If stdio is available, we can add a format string describing the error, and
+ * on some compilers we can declare it noreturn to get correct results from static
+ * analysis tools. Note that the format string and arguments are not evaluated unless
+ * the assertion is hit.
+ *
+ * Note that any arguments to mpack_assert() beyond the first are only evaluated
+ * if the expression is false (and are never evaluated in release.)
+ *
+ * mpack_assert_fail() and mpack_break_hit() are defined separately
+ * because assert is noreturn and break isn't. This distinction is very
+ * important for static analysis tools to give correct results.
+ */
+
+#if MPACK_DEBUG
+    MPACK_NORETURN(void mpack_assert_fail_wrapper(const char* message));
+    #if MPACK_STDIO
+        MPACK_NORETURN(void mpack_assert_fail_format(const char* format, ...));
+        #define mpack_assert_fail_at(line, file, exprstr, format, ...) \
+                MPACK_EXPAND(mpack_assert_fail_format("mpack assertion failed at " file ":" #line "\n%s\n" format, exprstr, __VA_ARGS__))
+    #else
+        #define mpack_assert_fail_at(line, file, exprstr, format, ...) \
+                mpack_assert_fail_wrapper("mpack assertion failed at " file ":" #line "\n" exprstr "\n")
+    #endif
+
+    #define mpack_assert_fail_pos(line, file, exprstr, expr, ...) \
+            MPACK_EXPAND(mpack_assert_fail_at(line, file, exprstr, __VA_ARGS__))
+
+    // This contains a workaround to the pedantic C99 requirement of having at
+    // least one argument to a variadic macro. The first argument is the
+    // boolean expression, the optional second argument (if provided) must be a
+    // literal format string, and any additional arguments are the format
+    // argument list.
+    //
+    // Unfortunately this means macros are expanded in the expression before it
+    // gets stringified. I haven't found a workaround to this.
+    //
+    // This adds two unused arguments to the format argument list when a
+    // format string is provided, so this would complicate the use of
+    // -Wformat and __attribute__((__format__)) on mpack_assert_fail_format()
+    // if we ever bothered to implement it.
+    #define mpack_assert(...) \
+            MPACK_EXPAND(((!(MPACK_EXTRACT_ARG0(__VA_ARGS__))) ? \
+                mpack_assert_fail_pos(__LINE__, __FILE__, MPACK_STRINGIFY_ARG0(__VA_ARGS__) , __VA_ARGS__ , "", NULL) : \
+                (void)0))
+
+    void mpack_break_hit(const char* message);
+    #if MPACK_STDIO
+        void mpack_break_hit_format(const char* format, ...);
+        #define mpack_break_hit_at(line, file, ...) \
+                MPACK_EXPAND(mpack_break_hit_format("mpack breakpoint hit at " file ":" #line "\n" __VA_ARGS__))
+    #else
+        #define mpack_break_hit_at(line, file, ...) \
+                mpack_break_hit("mpack breakpoint hit at " file ":" #line )
+    #endif
+    #define mpack_break_hit_pos(line, file, ...) MPACK_EXPAND(mpack_break_hit_at(line, file, __VA_ARGS__))
+    #define mpack_break(...) MPACK_EXPAND(mpack_break_hit_pos(__LINE__, __FILE__, __VA_ARGS__))
+#else
+    #define mpack_assert(...) \
+            (MPACK_EXPAND((!(MPACK_EXTRACT_ARG0(__VA_ARGS__))) ? \
+                (MPACK_UNREACHABLE, (void)0) : \
+                (void)0))
+    #define mpack_break(...) ((void)0)
+#endif
+
+
+
+// make sure we don't use the stdlib directly during development
+#if MPACK_STDLIB && defined(MPACK_UNIT_TESTS) && MPACK_INTERNAL && defined(__GNUC__)
+    #undef memcmp
+    #undef memcpy
+    #undef memmove
+    #undef memset
+    #undef strlen
+    #undef malloc
+    #undef calloc
+    #undef realloc
+    #undef free
+    #pragma GCC poison memcmp
+    #pragma GCC poison memcpy
+    #pragma GCC poison memmove
+    #pragma GCC poison memset
+    #pragma GCC poison strlen
+    #pragma GCC poison malloc
+    #pragma GCC poison calloc
+    #pragma GCC poison realloc
+    #pragma GCC poison free
+#endif
+
+
+
+// If we don't have these stdlib functions, we need to define them ourselves.
+// Either way we give them a lowercase name to make the code a bit nicer.
+
+#ifdef MPACK_MEMCMP
+    #define mpack_memcmp MPACK_MEMCMP
+#else
+    int mpack_memcmp(const void* s1, const void* s2, size_t n);
+#endif
+
+#ifdef MPACK_MEMCPY
+    #define mpack_memcpy MPACK_MEMCPY
+#else
+    void* mpack_memcpy(void* MPACK_RESTRICT s1, const void* MPACK_RESTRICT s2, size_t n);
+#endif
+
+#ifdef MPACK_MEMMOVE
+    #define mpack_memmove MPACK_MEMMOVE
+#else
+    void* mpack_memmove(void* s1, const void* s2, size_t n);
+#endif
+
+#ifdef MPACK_MEMSET
+    #define mpack_memset MPACK_MEMSET
+#else
+    void* mpack_memset(void* s, int c, size_t n);
+#endif
+
+#ifdef MPACK_STRLEN
+    #define mpack_strlen MPACK_STRLEN
+#else
+    size_t mpack_strlen(const char* s);
+#endif
+
+
+
+#if MPACK_STDIO
+    #if defined(WIN32)
+        #define mpack_snprintf _snprintf
+    #else
+        #define mpack_snprintf snprintf
+    #endif
+#endif
+
+
+
+/* Debug logging */
+#if 0
+    #include <stdio.h>
+    #define mpack_log(...) (MPACK_EXPAND(printf(__VA_ARGS__)), fflush(stdout))
+#else
+    #define mpack_log(...) ((void)0)
+#endif
+
+
+
+/* Make sure our configuration makes sense */
+#ifndef MPACK_MALLOC
+    #if MPACK_STDIO
+        #error "MPACK_STDIO requires preprocessor definitions for MPACK_MALLOC and MPACK_FREE."
+    #endif
+    #if MPACK_READ_TRACKING
+        #error "MPACK_READ_TRACKING requires preprocessor definitions for MPACK_MALLOC and MPACK_FREE."
+    #endif
+    #if MPACK_WRITE_TRACKING
+        #error "MPACK_WRITE_TRACKING requires preprocessor definitions for MPACK_MALLOC and MPACK_FREE."
+    #endif
+#endif
+
+
+
+/* Implement realloc if unavailable */
+#ifdef MPACK_MALLOC
+    #ifdef MPACK_REALLOC
+        MPACK_INLINE void* mpack_realloc(void* old_ptr, size_t used_size, size_t new_size) {
+            MPACK_UNUSED(used_size);
+            return MPACK_REALLOC(old_ptr, new_size);
+        }
+    #else
+        void* mpack_realloc(void* old_ptr, size_t used_size, size_t new_size);
+    #endif
+#endif
+
+
+
+/** @endcond */
+/**
+ * @}
+ */
+
+MPACK_EXTERN_C_END
+MPACK_SILENCE_WARNINGS_END
+
+#endif
+
+/* mpack/mpack-common.h.h */
+
+/**
+ * @file
+ *
+ * Defines types and functions shared by the MPack reader and writer.
+ */
+
+#ifndef MPACK_COMMON_H
+#define MPACK_COMMON_H 1
+
+/* #include "mpack-platform.h" */
+
+#ifndef MPACK_PRINT_BYTE_COUNT
+#define MPACK_PRINT_BYTE_COUNT 12
+#endif
+
+MPACK_SILENCE_WARNINGS_BEGIN
+MPACK_EXTERN_C_BEGIN
+
+
+
+/**
+ * @defgroup common Tags and Common Elements
+ *
+ * Contains types, constants and functions shared by both the encoding
+ * and decoding portions of MPack.
+ *
+ * @{
+ */
+
+/* Version information */
+
+#define MPACK_VERSION_MAJOR 1  /**< The major version number of MPack. */
+#define MPACK_VERSION_MINOR 1  /**< The minor version number of MPack. */
+#define MPACK_VERSION_PATCH 1  /**< The patch version number of MPack. */
+
+/** A number containing the version number of MPack for comparison purposes. */
+#define MPACK_VERSION ((MPACK_VERSION_MAJOR * 10000) + \
+        (MPACK_VERSION_MINOR * 100) + MPACK_VERSION_PATCH)
+
+/** A macro to test for a minimum version of MPack. */
+#define MPACK_VERSION_AT_LEAST(major, minor, patch) \
+        (MPACK_VERSION >= (((major) * 10000) + ((minor) * 100) + (patch)))
+
+/** @cond */
+#if (MPACK_VERSION_PATCH > 0)
+#define MPACK_VERSION_STRING_BASE \
+        MPACK_STRINGIFY(MPACK_VERSION_MAJOR) "." \
+        MPACK_STRINGIFY(MPACK_VERSION_MINOR) "." \
+        MPACK_STRINGIFY(MPACK_VERSION_PATCH)
+#else
+#define MPACK_VERSION_STRING_BASE \
+        MPACK_STRINGIFY(MPACK_VERSION_MAJOR) "." \
+        MPACK_STRINGIFY(MPACK_VERSION_MINOR)
+#endif
+/** @endcond */
+
+/**
+ * @def MPACK_VERSION_STRING
+ * @hideinitializer
+ *
+ * A string containing the MPack version.
+ */
+#if MPACK_RELEASE_VERSION
+#define MPACK_VERSION_STRING MPACK_VERSION_STRING_BASE
+#else
+#define MPACK_VERSION_STRING MPACK_VERSION_STRING_BASE "dev"
+#endif
+
+/**
+ * @def MPACK_LIBRARY_STRING
+ * @hideinitializer
+ *
+ * A string describing MPack, containing the library name, version and debug mode.
+ */
+#if MPACK_DEBUG
+#define MPACK_LIBRARY_STRING "MPack " MPACK_VERSION_STRING "-debug"
+#else
+#define MPACK_LIBRARY_STRING "MPack " MPACK_VERSION_STRING
+#endif
+
+/** @cond */
+/**
+ * @def MPACK_MAXIMUM_TAG_SIZE
+ *
+ * The maximum encoded size of a tag in bytes.
+ */
+#define MPACK_MAXIMUM_TAG_SIZE 9
+/** @endcond */
+
+#if MPACK_EXTENSIONS
+/**
+ * @def MPACK_TIMESTAMP_NANOSECONDS_MAX
+ *
+ * The maximum value of nanoseconds for a timestamp.
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ */
+#define MPACK_TIMESTAMP_NANOSECONDS_MAX 999999999
+#endif
+
+
+
+#if MPACK_COMPATIBILITY
+/**
+ * Versions of the MessagePack format.
+ *
+ * A reader, writer, or tree can be configured to serialize in an older
+ * version of the MessagePack spec. This is necessary to interface with
+ * older MessagePack libraries that do not support new MessagePack features.
+ *
+ * @note This requires @ref MPACK_COMPATIBILITY.
+ */
+typedef enum mpack_version_t {
+
+    /**
+     * Version 1.0/v4, supporting only the @c raw type without @c str8.
+     */
+    mpack_version_v4 = 4,
+
+    /**
+     * Version 2.0/v5, supporting the @c str8, @c bin and @c ext types.
+     */
+    mpack_version_v5 = 5,
+
+    /**
+     * The most recent supported version of MessagePack. This is the default.
+     */
+    mpack_version_current = mpack_version_v5,
+
+} mpack_version_t;
+#endif
+
+/**
+ * Error states for MPack objects.
+ *
+ * When a reader, writer, or tree is in an error state, all subsequent calls
+ * are ignored and their return values are nil/zero. You should check whether
+ * the source is in an error state before using such values.
+ */
+typedef enum mpack_error_t {
+    mpack_ok = 0,        /**< No error. */
+    mpack_error_io = 2,  /**< The reader or writer failed to fill or flush, or some other file or socket error occurred. */
+    mpack_error_invalid, /**< The data read is not valid MessagePack. */
+    mpack_error_unsupported, /**< The data read is not supported by this configuration of MPack. (See @ref MPACK_EXTENSIONS.) */
+    mpack_error_type,    /**< The type or value range did not match what was expected by the caller. */
+    mpack_error_too_big, /**< A read or write was bigger than the maximum size allowed for that operation. */
+    mpack_error_memory,  /**< An allocation failure occurred. */
+    mpack_error_bug,     /**< The MPack API was used incorrectly. (This will always assert in debug mode.) */
+    mpack_error_data,    /**< The contained data is not valid. */
+    mpack_error_eof,     /**< The reader failed to read because of file or socket EOF */
+} mpack_error_t;
+
+/**
+ * Converts an MPack error to a string. This function returns an empty
+ * string when MPACK_DEBUG is not set.
+ */
+const char* mpack_error_to_string(mpack_error_t error);
+
+/**
+ * Defines the type of a MessagePack tag.
+ *
+ * Note that extension types, both user defined and built-in, are represented
+ * in tags as @ref mpack_type_ext. The value for an extension type is stored
+ * separately.
+ */
+typedef enum mpack_type_t {
+    mpack_type_missing = 0, /**< Special type indicating a missing optional value. */
+    mpack_type_nil,         /**< A null value. */
+    mpack_type_bool,        /**< A boolean (true or false.) */
+    mpack_type_int,         /**< A 64-bit signed integer. */
+    mpack_type_uint,        /**< A 64-bit unsigned integer. */
+    mpack_type_float,       /**< A 32-bit IEEE 754 floating point number. */
+    mpack_type_double,      /**< A 64-bit IEEE 754 floating point number. */
+    mpack_type_str,         /**< A string. */
+    mpack_type_bin,         /**< A chunk of binary data. */
+    mpack_type_array,       /**< An array of MessagePack objects. */
+    mpack_type_map,         /**< An ordered map of key/value pairs of MessagePack objects. */
+
+    #if MPACK_EXTENSIONS
+    /**
+     * A typed MessagePack extension object containing a chunk of binary data.
+     *
+     * @note This requires @ref MPACK_EXTENSIONS.
+     */
+    mpack_type_ext,
+    #endif
+} mpack_type_t;
+
+/**
+ * Converts an MPack type to a string. This function returns an empty
+ * string when MPACK_DEBUG is not set.
+ */
+const char* mpack_type_to_string(mpack_type_t type);
+
+#if MPACK_EXTENSIONS
+/**
+ * A timestamp.
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ */
+typedef struct mpack_timestamp_t {
+    int64_t seconds; /*< The number of seconds (signed) since 1970-01-01T00:00:00Z. */
+    uint32_t nanoseconds; /*< The number of additional nanoseconds, between 0 and 999,999,999. */
+} mpack_timestamp_t;
+#endif
+
+/**
+ * An MPack tag is a MessagePack object header. It is a variant type
+ * representing any kind of object, and includes the length of compound types
+ * (e.g. map, array, string) or the value of non-compound types (e.g. boolean,
+ * integer, float.)
+ *
+ * If the type is compound (str, bin, ext, array or map), the contained
+ * elements or bytes are stored separately.
+ *
+ * This structure is opaque; its fields should not be accessed outside
+ * of MPack.
+ */
+typedef struct mpack_tag_t mpack_tag_t;
+
+/* Hide internals from documentation */
+/** @cond */
+struct mpack_tag_t {
+    mpack_type_t type; /*< The type of value. */
+
+    #if MPACK_EXTENSIONS
+    int8_t exttype; /*< The extension type if the type is @ref mpack_type_ext. */
+    #endif
+
+    /* The value for non-compound types. */
+    union {
+        uint64_t u; /*< The value if the type is unsigned int. */
+        int64_t  i; /*< The value if the type is signed int. */
+        bool     b; /*< The value if the type is bool. */
+
+        #if MPACK_FLOAT
+        float    f; /*< The value if the type is float. */
+        #else
+        uint32_t f; /*< The raw value if the type is float. */
+        #endif
+
+        #if MPACK_DOUBLE
+        double   d; /*< The value if the type is double. */
+        #else
+        uint64_t d; /*< The raw value if the type is double. */
+        #endif
+
+        /* The number of bytes if the type is str, bin or ext. */
+        uint32_t l;
+
+        /* The element count if the type is an array, or the number of
+            key/value pairs if the type is map. */
+        uint32_t n;
+    } v;
+};
+/** @endcond */
+
+/**
+ * @name Tag Generators
+ * @{
+ */
+
+/**
+ * @def MPACK_TAG_ZERO
+ *
+ * An @ref mpack_tag_t initializer that zeroes the given tag.
+ *
+ * @warning This does not make the tag nil! The tag's type is invalid when
+ * initialized this way. Use @ref mpack_tag_make_nil() to generate a nil tag.
+ */
+#if MPACK_EXTENSIONS
+#define MPACK_TAG_ZERO {(mpack_type_t)0, 0, {0}}
+#else
+#define MPACK_TAG_ZERO {(mpack_type_t)0, {0}}
+#endif
+
+/** Generates a nil tag. */
+MPACK_INLINE mpack_tag_t mpack_tag_make_nil(void) {
+    mpack_tag_t ret = MPACK_TAG_ZERO;
+    ret.type = mpack_type_nil;
+    return ret;
+}
+
+/** Generates a bool tag. */
+MPACK_INLINE mpack_tag_t mpack_tag_make_bool(bool value) {
+    mpack_tag_t ret = MPACK_TAG_ZERO;
+    ret.type = mpack_type_bool;
+    ret.v.b = value;
+    return ret;
+}
+
+/** Generates a bool tag with value true. */
+MPACK_INLINE mpack_tag_t mpack_tag_make_true(void) {
+    mpack_tag_t ret = MPACK_TAG_ZERO;
+    ret.type = mpack_type_bool;
+    ret.v.b = true;
+    return ret;
+}
+
+/** Generates a bool tag with value false. */
+MPACK_INLINE mpack_tag_t mpack_tag_make_false(void) {
+    mpack_tag_t ret = MPACK_TAG_ZERO;
+    ret.type = mpack_type_bool;
+    ret.v.b = false;
+    return ret;
+}
+
+/** Generates a signed int tag. */
+MPACK_INLINE mpack_tag_t mpack_tag_make_int(int64_t value) {
+    mpack_tag_t ret = MPACK_TAG_ZERO;
+    ret.type = mpack_type_int;
+    ret.v.i = value;
+    return ret;
+}
+
+/** Generates an unsigned int tag. */
+MPACK_INLINE mpack_tag_t mpack_tag_make_uint(uint64_t value) {
+    mpack_tag_t ret = MPACK_TAG_ZERO;
+    ret.type = mpack_type_uint;
+    ret.v.u = value;
+    return ret;
+}
+
+#if MPACK_FLOAT
+/** Generates a float tag. */
+MPACK_INLINE mpack_tag_t mpack_tag_make_float(float value)
+#else
+/** Generates a float tag from a raw uint32_t. */
+MPACK_INLINE mpack_tag_t mpack_tag_make_raw_float(uint32_t value)
+#endif
+{
+    mpack_tag_t ret = MPACK_TAG_ZERO;
+    ret.type = mpack_type_float;
+    ret.v.f = value;
+    return ret;
+}
+
+#if MPACK_DOUBLE
+/** Generates a double tag. */
+MPACK_INLINE mpack_tag_t mpack_tag_make_double(double value)
+#else
+/** Generates a double tag from a raw uint64_t. */
+MPACK_INLINE mpack_tag_t mpack_tag_make_raw_double(uint64_t value)
+#endif
+{
+    mpack_tag_t ret = MPACK_TAG_ZERO;
+    ret.type = mpack_type_double;
+    ret.v.d = value;
+    return ret;
+}
+
+/** Generates an array tag. */
+MPACK_INLINE mpack_tag_t mpack_tag_make_array(uint32_t count) {
+    mpack_tag_t ret = MPACK_TAG_ZERO;
+    ret.type = mpack_type_array;
+    ret.v.n = count;
+    return ret;
+}
+
+/** Generates a map tag. */
+MPACK_INLINE mpack_tag_t mpack_tag_make_map(uint32_t count) {
+    mpack_tag_t ret = MPACK_TAG_ZERO;
+    ret.type = mpack_type_map;
+    ret.v.n = count;
+    return ret;
+}
+
+/** Generates a str tag. */
+MPACK_INLINE mpack_tag_t mpack_tag_make_str(uint32_t length) {
+    mpack_tag_t ret = MPACK_TAG_ZERO;
+    ret.type = mpack_type_str;
+    ret.v.l = length;
+    return ret;
+}
+
+/** Generates a bin tag. */
+MPACK_INLINE mpack_tag_t mpack_tag_make_bin(uint32_t length) {
+    mpack_tag_t ret = MPACK_TAG_ZERO;
+    ret.type = mpack_type_bin;
+    ret.v.l = length;
+    return ret;
+}
+
+#if MPACK_EXTENSIONS
+/**
+ * Generates an ext tag.
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ */
+MPACK_INLINE mpack_tag_t mpack_tag_make_ext(int8_t exttype, uint32_t length) {
+    mpack_tag_t ret = MPACK_TAG_ZERO;
+    ret.type = mpack_type_ext;
+    ret.exttype = exttype;
+    ret.v.l = length;
+    return ret;
+}
+#endif
+
+/**
+ * @}
+ */
+
+/**
+ * @name Tag Querying Functions
+ * @{
+ */
+
+/**
+ * Gets the type of a tag.
+ */
+MPACK_INLINE mpack_type_t mpack_tag_type(mpack_tag_t* tag) {
+    return tag->type;
+}
+
+/**
+ * Gets the boolean value of a bool-type tag. The tag must be of type @ref
+ * mpack_type_bool.
+ *
+ * This asserts that the type in the tag is @ref mpack_type_bool. (No check is
+ * performed if MPACK_DEBUG is not set.)
+ */
+MPACK_INLINE bool mpack_tag_bool_value(mpack_tag_t* tag) {
+    mpack_assert(tag->type == mpack_type_bool, "tag is not a bool!");
+    return tag->v.b;
+}
+
+/**
+ * Gets the signed integer value of an int-type tag.
+ *
+ * This asserts that the type in the tag is @ref mpack_type_int. (No check is
+ * performed if MPACK_DEBUG is not set.)
+ *
+ * @warning This does not convert between signed and unsigned tags! A positive
+ * integer may be stored in a tag as either @ref mpack_type_int or @ref
+ * mpack_type_uint. You must check the type first; this can only be used if the
+ * type is @ref mpack_type_int.
+ *
+ * @see mpack_type_int
+ */
+MPACK_INLINE int64_t mpack_tag_int_value(mpack_tag_t* tag) {
+    mpack_assert(tag->type == mpack_type_int, "tag is not an int!");
+    return tag->v.i;
+}
+
+/**
+ * Gets the unsigned integer value of a uint-type tag.
+ *
+ * This asserts that the type in the tag is @ref mpack_type_uint. (No check is
+ * performed if MPACK_DEBUG is not set.)
+ *
+ * @warning This does not convert between signed and unsigned tags! A positive
+ * integer may be stored in a tag as either @ref mpack_type_int or @ref
+ * mpack_type_uint. You must check the type first; this can only be used if the
+ * type is @ref mpack_type_uint.
+ *
+ * @see mpack_type_uint
+ */
+MPACK_INLINE uint64_t mpack_tag_uint_value(mpack_tag_t* tag) {
+    mpack_assert(tag->type == mpack_type_uint, "tag is not a uint!");
+    return tag->v.u;
+}
+
+/**
+ * Gets the float value of a float-type tag.
+ *
+ * This asserts that the type in the tag is @ref mpack_type_float. (No check is
+ * performed if MPACK_DEBUG is not set.)
+ *
+ * @warning This does not convert between float and double tags! This can only
+ * be used if the type is @ref mpack_type_float.
+ *
+ * @see mpack_type_float
+ */
+MPACK_INLINE
+#if MPACK_FLOAT
+float mpack_tag_float_value(mpack_tag_t* tag)
+#else
+uint32_t mpack_tag_raw_float_value(mpack_tag_t* tag)
+#endif
+{
+    mpack_assert(tag->type == mpack_type_float, "tag is not a float!");
+    return tag->v.f;
+}
+
+/**
+ * Gets the double value of a double-type tag.
+ *
+ * This asserts that the type in the tag is @ref mpack_type_double. (No check
+ * is performed if MPACK_DEBUG is not set.)
+ *
+ * @warning This does not convert between float and double tags! This can only
+ * be used if the type is @ref mpack_type_double.
+ *
+ * @see mpack_type_double
+ */
+MPACK_INLINE
+#if MPACK_DOUBLE
+double mpack_tag_double_value(mpack_tag_t* tag)
+#else
+uint64_t mpack_tag_raw_double_value(mpack_tag_t* tag)
+#endif
+{
+    mpack_assert(tag->type == mpack_type_double, "tag is not a double!");
+    return tag->v.d;
+}
+
+/**
+ * Gets the number of elements in an array tag.
+ *
+ * This asserts that the type in the tag is @ref mpack_type_array. (No check is
+ * performed if MPACK_DEBUG is not set.)
+ *
+ * @see mpack_type_array
+ */
+MPACK_INLINE uint32_t mpack_tag_array_count(mpack_tag_t* tag) {
+    mpack_assert(tag->type == mpack_type_array, "tag is not an array!");
+    return tag->v.n;
+}
+
+/**
+ * Gets the number of key-value pairs in a map tag.
+ *
+ * This asserts that the type in the tag is @ref mpack_type_map. (No check is
+ * performed if MPACK_DEBUG is not set.)
+ *
+ * @see mpack_type_map
+ */
+MPACK_INLINE uint32_t mpack_tag_map_count(mpack_tag_t* tag) {
+    mpack_assert(tag->type == mpack_type_map, "tag is not a map!");
+    return tag->v.n;
+}
+
+/**
+ * Gets the length in bytes of a str-type tag.
+ *
+ * This asserts that the type in the tag is @ref mpack_type_str. (No check is
+ * performed if MPACK_DEBUG is not set.)
+ *
+ * @see mpack_type_str
+ */
+MPACK_INLINE uint32_t mpack_tag_str_length(mpack_tag_t* tag) {
+    mpack_assert(tag->type == mpack_type_str, "tag is not a str!");
+    return tag->v.l;
+}
+
+/**
+ * Gets the length in bytes of a bin-type tag.
+ *
+ * This asserts that the type in the tag is @ref mpack_type_bin. (No check is
+ * performed if MPACK_DEBUG is not set.)
+ *
+ * @see mpack_type_bin
+ */
+MPACK_INLINE uint32_t mpack_tag_bin_length(mpack_tag_t* tag) {
+    mpack_assert(tag->type == mpack_type_bin, "tag is not a bin!");
+    return tag->v.l;
+}
+
+#if MPACK_EXTENSIONS
+/**
+ * Gets the length in bytes of an ext-type tag.
+ *
+ * This asserts that the type in the tag is @ref mpack_type_ext. (No check is
+ * performed if MPACK_DEBUG is not set.)
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ *
+ * @see mpack_type_ext
+ */
+MPACK_INLINE uint32_t mpack_tag_ext_length(mpack_tag_t* tag) {
+    mpack_assert(tag->type == mpack_type_ext, "tag is not an ext!");
+    return tag->v.l;
+}
+
+/**
+ * Gets the extension type (exttype) of an ext-type tag.
+ *
+ * This asserts that the type in the tag is @ref mpack_type_ext. (No check is
+ * performed if MPACK_DEBUG is not set.)
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ *
+ * @see mpack_type_ext
+ */
+MPACK_INLINE int8_t mpack_tag_ext_exttype(mpack_tag_t* tag) {
+    mpack_assert(tag->type == mpack_type_ext, "tag is not an ext!");
+    return tag->exttype;
+}
+#endif
+
+/**
+ * Gets the length in bytes of a str-, bin- or ext-type tag.
+ *
+ * This asserts that the type in the tag is @ref mpack_type_str, @ref
+ * mpack_type_bin or @ref mpack_type_ext. (No check is performed if MPACK_DEBUG
+ * is not set.)
+ *
+ * @see mpack_type_str
+ * @see mpack_type_bin
+ * @see mpack_type_ext
+ */
+MPACK_INLINE uint32_t mpack_tag_bytes(mpack_tag_t* tag) {
+    #if MPACK_EXTENSIONS
+    mpack_assert(tag->type == mpack_type_str || tag->type == mpack_type_bin
+            || tag->type == mpack_type_ext, "tag is not a str, bin or ext!");
+    #else
+    mpack_assert(tag->type == mpack_type_str || tag->type == mpack_type_bin,
+            "tag is not a str or bin!");
+    #endif
+    return tag->v.l;
+}
+
+/**
+ * @}
+ */
+
+/**
+ * @name Other tag functions
+ * @{
+ */
+
+#if MPACK_EXTENSIONS
+/**
+ * The extension type for a timestamp.
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ */
+#define MPACK_EXTTYPE_TIMESTAMP ((int8_t)(-1))
+#endif
+
+/**
+ * Compares two tags with an arbitrary fixed ordering. Returns 0 if the tags are
+ * equal, a negative integer if left comes before right, or a positive integer
+ * otherwise.
+ *
+ * \warning The ordering is not guaranteed to be preserved across MPack versions; do
+ * not rely on it in persistent data.
+ *
+ * \warning Floating point numbers are compared bit-for-bit, not using the language's
+ * operator==. This means that NaNs with matching representation will compare equal.
+ * This behaviour is up for debate; see comments in the definition of mpack_tag_cmp().
+ *
+ * See mpack_tag_equal() for more information on when tags are considered equal.
+ */
+int mpack_tag_cmp(mpack_tag_t left, mpack_tag_t right);
+
+/**
+ * Compares two tags for equality. Tags are considered equal if the types are compatible
+ * and the values (for non-compound types) are equal.
+ *
+ * The field width of variable-width fields is ignored (and in fact is not stored
+ * in a tag), and positive numbers in signed integers are considered equal to their
+ * unsigned counterparts. So for example the value 1 stored as a positive fixint
+ * is equal to the value 1 stored in a 64-bit unsigned integer field.
+ *
+ * The "extension type" of an extension object is considered part of the value
+ * and must match exactly.
+ *
+ * \warning Floating point numbers are compared bit-for-bit, not using the language's
+ * operator==. This means that NaNs with matching representation will compare equal.
+ * This behaviour is up for debate; see comments in the definition of mpack_tag_cmp().
+ */
+MPACK_INLINE bool mpack_tag_equal(mpack_tag_t left, mpack_tag_t right) {
+    return mpack_tag_cmp(left, right) == 0;
+}
+
+#if MPACK_DEBUG && MPACK_STDIO
+/**
+ * Generates a json-like debug description of the given tag into the given buffer.
+ *
+ * This is only available in debug mode, and only if stdio is available (since
+ * it uses snprintf().) It's strictly for debugging purposes.
+ *
+ * The prefix is used to print the first few hexadecimal bytes of a bin or ext
+ * type. Pass NULL if not a bin or ext.
+ */
+void mpack_tag_debug_pseudo_json(mpack_tag_t tag, char* buffer, size_t buffer_size,
+        const char* prefix, size_t prefix_size);
+
+/**
+ * Generates a debug string description of the given tag into the given buffer.
+ *
+ * This is only available in debug mode, and only if stdio is available (since
+ * it uses snprintf().) It's strictly for debugging purposes.
+ */
+void mpack_tag_debug_describe(mpack_tag_t tag, char* buffer, size_t buffer_size);
+
+/** @cond */
+
+/*
+ * A callback function for printing pseudo-JSON for debugging purposes.
+ *
+ * @see mpack_node_print_callback
+ */
+typedef void (*mpack_print_callback_t)(void* context, const char* data, size_t count);
+
+// helpers for printing debug output
+// i feel a bit like i'm re-implementing a buffered writer again...
+typedef struct mpack_print_t {
+    char* buffer;
+    size_t size;
+    size_t count;
+    mpack_print_callback_t callback;
+    void* context;
+} mpack_print_t;
+
+void mpack_print_append(mpack_print_t* print, const char* data, size_t count);
+
+MPACK_INLINE void mpack_print_append_cstr(mpack_print_t* print, const char* cstr) {
+    mpack_print_append(print, cstr, mpack_strlen(cstr));
+}
+
+void mpack_print_flush(mpack_print_t* print);
+
+void mpack_print_file_callback(void* context, const char* data, size_t count);
+
+/** @endcond */
+
+#endif
+
+/**
+ * @}
+ */
+
+/**
+ * @name Deprecated Tag Generators
+ * @{
+ */
+
+/*
+ * "make" has been added to their names to disambiguate them from the
+ * value-fetching functions (e.g. mpack_tag_make_bool() vs
+ * mpack_tag_bool_value().)
+ *
+ * The length and count for all compound types was the wrong sign (int32_t
+ * instead of uint32_t.) These preserve the old behaviour; the new "make"
+ * functions have the correct sign.
+ */
+
+/** \deprecated Renamed to mpack_tag_make_nil(). */
+MPACK_INLINE mpack_tag_t mpack_tag_nil(void) {
+    return mpack_tag_make_nil();
+}
+
+/** \deprecated Renamed to mpack_tag_make_bool(). */
+MPACK_INLINE mpack_tag_t mpack_tag_bool(bool value) {
+    return mpack_tag_make_bool(value);
+}
+
+/** \deprecated Renamed to mpack_tag_make_true(). */
+MPACK_INLINE mpack_tag_t mpack_tag_true(void) {
+    return mpack_tag_make_true();
+}
+
+/** \deprecated Renamed to mpack_tag_make_false(). */
+MPACK_INLINE mpack_tag_t mpack_tag_false(void) {
+    return mpack_tag_make_false();
+}
+
+/** \deprecated Renamed to mpack_tag_make_int(). */
+MPACK_INLINE mpack_tag_t mpack_tag_int(int64_t value) {
+    return mpack_tag_make_int(value);
+}
+
+/** \deprecated Renamed to mpack_tag_make_uint(). */
+MPACK_INLINE mpack_tag_t mpack_tag_uint(uint64_t value) {
+    return mpack_tag_make_uint(value);
+}
+
+#if MPACK_FLOAT
+/** \deprecated Renamed to mpack_tag_make_float(). */
+MPACK_INLINE mpack_tag_t mpack_tag_float(float value) {
+    return mpack_tag_make_float(value);
+}
+#endif
+
+#if MPACK_DOUBLE
+/** \deprecated Renamed to mpack_tag_make_double(). */
+MPACK_INLINE mpack_tag_t mpack_tag_double(double value) {
+    return mpack_tag_make_double(value);
+}
+#endif
+
+/** \deprecated Renamed to mpack_tag_make_array(). */
+MPACK_INLINE mpack_tag_t mpack_tag_array(int32_t count) {
+    return mpack_tag_make_array((uint32_t)count);
+}
+
+/** \deprecated Renamed to mpack_tag_make_map(). */
+MPACK_INLINE mpack_tag_t mpack_tag_map(int32_t count) {
+    return mpack_tag_make_map((uint32_t)count);
+}
+
+/** \deprecated Renamed to mpack_tag_make_str(). */
+MPACK_INLINE mpack_tag_t mpack_tag_str(int32_t length) {
+    return mpack_tag_make_str((uint32_t)length);
+}
+
+/** \deprecated Renamed to mpack_tag_make_bin(). */
+MPACK_INLINE mpack_tag_t mpack_tag_bin(int32_t length) {
+    return mpack_tag_make_bin((uint32_t)length);
+}
+
+#if MPACK_EXTENSIONS
+/** \deprecated Renamed to mpack_tag_make_ext(). */
+MPACK_INLINE mpack_tag_t mpack_tag_ext(int8_t exttype, int32_t length) {
+    return mpack_tag_make_ext(exttype, (uint32_t)length);
+}
+#endif
+
+/**
+ * @}
+ */
+
+/** @cond */
+
+/*
+ * Helpers to perform unaligned network-endian loads and stores
+ * at arbitrary addresses. Byte-swapping builtins are used if they
+ * are available and if they improve performance.
+ *
+ * These will remain available in the public API so feel free to
+ * use them for other purposes, but they are undocumented.
+ */
+
+MPACK_INLINE uint8_t mpack_load_u8(const char* p) {
+    return (uint8_t)p[0];
+}
+
+MPACK_INLINE uint16_t mpack_load_u16(const char* p) {
+    #ifdef MPACK_NHSWAP16
+    uint16_t val;
+    mpack_memcpy(&val, p, sizeof(val));
+    return MPACK_NHSWAP16(val);
+    #else
+    return (uint16_t)((((uint16_t)(uint8_t)p[0]) << 8) |
+           ((uint16_t)(uint8_t)p[1]));
+    #endif
+}
+
+MPACK_INLINE uint32_t mpack_load_u32(const char* p) {
+    #ifdef MPACK_NHSWAP32
+    uint32_t val;
+    mpack_memcpy(&val, p, sizeof(val));
+    return MPACK_NHSWAP32(val);
+    #else
+    return (((uint32_t)(uint8_t)p[0]) << 24) |
+           (((uint32_t)(uint8_t)p[1]) << 16) |
+           (((uint32_t)(uint8_t)p[2]) <<  8) |
+            ((uint32_t)(uint8_t)p[3]);
+    #endif
+}
+
+MPACK_INLINE uint64_t mpack_load_u64(const char* p) {
+    #ifdef MPACK_NHSWAP64
+    uint64_t val;
+    mpack_memcpy(&val, p, sizeof(val));
+    return MPACK_NHSWAP64(val);
+    #else
+    return (((uint64_t)(uint8_t)p[0]) << 56) |
+           (((uint64_t)(uint8_t)p[1]) << 48) |
+           (((uint64_t)(uint8_t)p[2]) << 40) |
+           (((uint64_t)(uint8_t)p[3]) << 32) |
+           (((uint64_t)(uint8_t)p[4]) << 24) |
+           (((uint64_t)(uint8_t)p[5]) << 16) |
+           (((uint64_t)(uint8_t)p[6]) <<  8) |
+            ((uint64_t)(uint8_t)p[7]);
+    #endif
+}
+
+MPACK_INLINE void mpack_store_u8(char* p, uint8_t val) {
+    uint8_t* u = (uint8_t*)p;
+    u[0] = val;
+}
+
+MPACK_INLINE void mpack_store_u16(char* p, uint16_t val) {
+    #ifdef MPACK_NHSWAP16
+    val = MPACK_NHSWAP16(val);
+    mpack_memcpy(p, &val, sizeof(val));
+    #else
+    uint8_t* u = (uint8_t*)p;
+    u[0] = (uint8_t)((val >> 8) & 0xFF);
+    u[1] = (uint8_t)( val       & 0xFF);
+    #endif
+}
+
+MPACK_INLINE void mpack_store_u32(char* p, uint32_t val) {
+    #ifdef MPACK_NHSWAP32
+    val = MPACK_NHSWAP32(val);
+    mpack_memcpy(p, &val, sizeof(val));
+    #else
+    uint8_t* u = (uint8_t*)p;
+    u[0] = (uint8_t)((val >> 24) & 0xFF);
+    u[1] = (uint8_t)((val >> 16) & 0xFF);
+    u[2] = (uint8_t)((val >>  8) & 0xFF);
+    u[3] = (uint8_t)( val        & 0xFF);
+    #endif
+}
+
+MPACK_INLINE void mpack_store_u64(char* p, uint64_t val) {
+    #ifdef MPACK_NHSWAP64
+    val = MPACK_NHSWAP64(val);
+    mpack_memcpy(p, &val, sizeof(val));
+    #else
+    uint8_t* u = (uint8_t*)p;
+    u[0] = (uint8_t)((val >> 56) & 0xFF);
+    u[1] = (uint8_t)((val >> 48) & 0xFF);
+    u[2] = (uint8_t)((val >> 40) & 0xFF);
+    u[3] = (uint8_t)((val >> 32) & 0xFF);
+    u[4] = (uint8_t)((val >> 24) & 0xFF);
+    u[5] = (uint8_t)((val >> 16) & 0xFF);
+    u[6] = (uint8_t)((val >>  8) & 0xFF);
+    u[7] = (uint8_t)( val        & 0xFF);
+    #endif
+}
+
+MPACK_INLINE int8_t  mpack_load_i8 (const char* p) {return (int8_t) mpack_load_u8 (p);}
+MPACK_INLINE int16_t mpack_load_i16(const char* p) {return (int16_t)mpack_load_u16(p);}
+MPACK_INLINE int32_t mpack_load_i32(const char* p) {return (int32_t)mpack_load_u32(p);}
+MPACK_INLINE int64_t mpack_load_i64(const char* p) {return (int64_t)mpack_load_u64(p);}
+MPACK_INLINE void mpack_store_i8 (char* p, int8_t  val) {mpack_store_u8 (p, (uint8_t) val);}
+MPACK_INLINE void mpack_store_i16(char* p, int16_t val) {mpack_store_u16(p, (uint16_t)val);}
+MPACK_INLINE void mpack_store_i32(char* p, int32_t val) {mpack_store_u32(p, (uint32_t)val);}
+MPACK_INLINE void mpack_store_i64(char* p, int64_t val) {mpack_store_u64(p, (uint64_t)val);}
+
+#if MPACK_FLOAT
+MPACK_INLINE float mpack_load_float(const char* p) {
+    MPACK_CHECK_FLOAT_ORDER();
+    MPACK_STATIC_ASSERT(sizeof(float) == sizeof(uint32_t), "float is wrong size??");
+    union {
+        float f;
+        uint32_t u;
+    } v;
+    v.u = mpack_load_u32(p);
+    return v.f;
+}
+#endif
+
+#if MPACK_DOUBLE
+MPACK_INLINE double mpack_load_double(const char* p) {
+    MPACK_CHECK_FLOAT_ORDER();
+    MPACK_STATIC_ASSERT(sizeof(double) == sizeof(uint64_t), "double is wrong size??");
+    union {
+        double d;
+        uint64_t u;
+    } v;
+    v.u = mpack_load_u64(p);
+    return v.d;
+}
+#endif
+
+#if MPACK_FLOAT
+MPACK_INLINE void mpack_store_float(char* p, float value) {
+    MPACK_CHECK_FLOAT_ORDER();
+    union {
+        float f;
+        uint32_t u;
+    } v;
+    v.f = value;
+    mpack_store_u32(p, v.u);
+}
+#endif
+
+#if MPACK_DOUBLE
+MPACK_INLINE void mpack_store_double(char* p, double value) {
+    MPACK_CHECK_FLOAT_ORDER();
+    union {
+        double d;
+        uint64_t u;
+    } v;
+    v.d = value;
+    mpack_store_u64(p, v.u);
+}
+#endif
+
+#if MPACK_FLOAT && !MPACK_DOUBLE
+/**
+ * Performs a manual shortening conversion on the raw 64-bit representation of
+ * a double. This is useful for parsing doubles on platforms that only support
+ * floats (such as AVR.)
+ *
+ * The significand is truncated rather than rounded and subnormal numbers are
+ * set to 0 so this may not be quite as accurate as a real double-to-float
+ * conversion.
+ */
+MPACK_INLINE float mpack_shorten_raw_double_to_float(uint64_t d) {
+    MPACK_CHECK_FLOAT_ORDER();
+    union {
+        float f;
+        uint32_t u;
+    } v;
+
+    // float has  1 bit sign,  8 bits exponent, 23 bits significand
+    // double has 1 bit sign, 11 bits exponent, 52 bits significand
+
+    uint64_t d_sign = (uint64_t)(d >> 63);
+    uint64_t d_exponent = (uint32_t)(d >> 52) & ((1 << 11) - 1);
+    uint64_t d_significand = d & (((uint64_t)1 << 52) - 1);
+
+    uint32_t f_sign = (uint32_t)d_sign;
+    uint32_t f_exponent;
+    uint32_t f_significand;
+
+    if (MPACK_UNLIKELY(d_exponent == ((1 << 11) - 1))) {
+        // infinity or NAN. shift down to preserve the top bit since it
+        // indicates signaling NAN, but also set the low bit if any bits were
+        // set (that way we can't shift NAN to infinity.)
+        f_exponent = ((1 << 8) - 1);
+        f_significand = (uint32_t)(d_significand >> 29) | (d_significand ? 1 : 0);
+
+    } else {
+        int fix_bias = (int)d_exponent - ((1 << 10) - 1) + ((1 << 7) - 1);
+        if (MPACK_UNLIKELY(fix_bias <= 0)) {
+            // we don't currently handle subnormal numbers. just set it to zero.
+            f_exponent = 0;
+            f_significand = 0;
+        } else if (MPACK_UNLIKELY(fix_bias > 0xff)) {
+            // exponent is too large; saturate to infinity
+            f_exponent = 0xff;
+            f_significand = 0;
+        } else {
+            // a normal number that fits in a float. this is the usual case.
+            f_exponent = (uint32_t)fix_bias;
+            f_significand = (uint32_t)(d_significand >> 29);
+        }
+    }
+
+    #if 0
+    printf("\n===============\n");
+    for (size_t i = 0; i < 64; ++i)
+        printf("%i%s",(int)((d>>(63-i))&1),((i%8)==7)?" ":"");
+    printf("\n%lu %lu %lu\n", d_sign, d_exponent, d_significand);
+    printf("%u %u %u\n", f_sign, f_exponent, f_significand);
+    #endif
+
+    v.u = (f_sign << 31) | (f_exponent << 23) | f_significand;
+    return v.f;
+}
+#endif
+
+/** @endcond */
+
+
+
+/** @cond */
+
+// Sizes in bytes for the various possible tags
+#define MPACK_TAG_SIZE_FIXUINT  1
+#define MPACK_TAG_SIZE_U8       2
+#define MPACK_TAG_SIZE_U16      3
+#define MPACK_TAG_SIZE_U32      5
+#define MPACK_TAG_SIZE_U64      9
+#define MPACK_TAG_SIZE_FIXINT   1
+#define MPACK_TAG_SIZE_I8       2
+#define MPACK_TAG_SIZE_I16      3
+#define MPACK_TAG_SIZE_I32      5
+#define MPACK_TAG_SIZE_I64      9
+#define MPACK_TAG_SIZE_FLOAT    5
+#define MPACK_TAG_SIZE_DOUBLE   9
+#define MPACK_TAG_SIZE_FIXARRAY 1
+#define MPACK_TAG_SIZE_ARRAY16  3
+#define MPACK_TAG_SIZE_ARRAY32  5
+#define MPACK_TAG_SIZE_FIXMAP   1
+#define MPACK_TAG_SIZE_MAP16    3
+#define MPACK_TAG_SIZE_MAP32    5
+#define MPACK_TAG_SIZE_FIXSTR   1
+#define MPACK_TAG_SIZE_STR8     2
+#define MPACK_TAG_SIZE_STR16    3
+#define MPACK_TAG_SIZE_STR32    5
+#define MPACK_TAG_SIZE_BIN8     2
+#define MPACK_TAG_SIZE_BIN16    3
+#define MPACK_TAG_SIZE_BIN32    5
+#define MPACK_TAG_SIZE_FIXEXT1  2
+#define MPACK_TAG_SIZE_FIXEXT2  2
+#define MPACK_TAG_SIZE_FIXEXT4  2
+#define MPACK_TAG_SIZE_FIXEXT8  2
+#define MPACK_TAG_SIZE_FIXEXT16 2
+#define MPACK_TAG_SIZE_EXT8     3
+#define MPACK_TAG_SIZE_EXT16    4
+#define MPACK_TAG_SIZE_EXT32    6
+
+// size in bytes for complete ext types
+#define MPACK_EXT_SIZE_TIMESTAMP4 (MPACK_TAG_SIZE_FIXEXT4 + 4)
+#define MPACK_EXT_SIZE_TIMESTAMP8 (MPACK_TAG_SIZE_FIXEXT8 + 8)
+#define MPACK_EXT_SIZE_TIMESTAMP12 (MPACK_TAG_SIZE_EXT8 + 12)
+
+/** @endcond */
+
+
+
+#if MPACK_READ_TRACKING || MPACK_WRITE_TRACKING
+/* Tracks the write state of compound elements (maps, arrays, */
+/* strings, binary blobs and extension types) */
+/** @cond */
+
+typedef struct mpack_track_element_t {
+    mpack_type_t type;
+    uint32_t left;
+
+    // indicates that a value still needs to be read/written for an already
+    // read/written key. left is not decremented until both key and value are
+    // read/written.
+    bool key_needs_value;
+
+    // tracks whether the map/array being written is using a builder. if true,
+    // the number of elements is automatic, and left is 0.
+    bool builder;
+} mpack_track_element_t;
+
+typedef struct mpack_track_t {
+    size_t count;
+    size_t capacity;
+    mpack_track_element_t* elements;
+} mpack_track_t;
+
+#if MPACK_INTERNAL
+mpack_error_t mpack_track_init(mpack_track_t* track);
+mpack_error_t mpack_track_grow(mpack_track_t* track);
+mpack_error_t mpack_track_push(mpack_track_t* track, mpack_type_t type, uint32_t count);
+mpack_error_t mpack_track_push_builder(mpack_track_t* track, mpack_type_t type);
+mpack_error_t mpack_track_pop(mpack_track_t* track, mpack_type_t type);
+mpack_error_t mpack_track_pop_builder(mpack_track_t* track, mpack_type_t type);
+mpack_error_t mpack_track_element(mpack_track_t* track, bool read);
+mpack_error_t mpack_track_peek_element(mpack_track_t* track, bool read);
+mpack_error_t mpack_track_bytes(mpack_track_t* track, bool read, size_t count);
+mpack_error_t mpack_track_str_bytes_all(mpack_track_t* track, bool read, size_t count);
+mpack_error_t mpack_track_check_empty(mpack_track_t* track);
+mpack_error_t mpack_track_destroy(mpack_track_t* track, bool cancel);
+#endif
+
+/** @endcond */
+#endif
+
+
+
+#if MPACK_INTERNAL
+/** @cond */
+
+
+
+/* Miscellaneous string functions */
+
+/**
+ * Returns true if the given UTF-8 string is valid.
+ */
+bool mpack_utf8_check(const char* str, size_t bytes);
+
+/**
+ * Returns true if the given UTF-8 string is valid and contains no null characters.
+ */
+bool mpack_utf8_check_no_null(const char* str, size_t bytes);
+
+/**
+ * Returns true if the given string has no null bytes.
+ */
+bool mpack_str_check_no_null(const char* str, size_t bytes);
+
+
+
+/** @endcond */
+#endif
+
+
+
+/**
+ * @}
+ */
+
+MPACK_EXTERN_C_END
+MPACK_SILENCE_WARNINGS_END
+
+#endif
+
+
+/* mpack/mpack-writer.h.h */
+
+/**
+ * @file
+ *
+ * Declares the MPack Writer.
+ */
+
+#ifndef MPACK_WRITER_H
+#define MPACK_WRITER_H 1
+
+/* #include "mpack-common.h" */
+
+#if MPACK_WRITER
+
+MPACK_SILENCE_WARNINGS_BEGIN
+MPACK_EXTERN_C_BEGIN
+
+#if MPACK_WRITE_TRACKING
+struct mpack_track_t;
+#endif
+
+/**
+ * @defgroup writer Write API
+ *
+ * The MPack Write API encodes structured data of a fixed (hardcoded) schema to MessagePack.
+ *
+ * @{
+ */
+
+/**
+ * @def MPACK_WRITER_MINIMUM_BUFFER_SIZE
+ *
+ * The minimum buffer size for a writer with a flush function.
+ */
+#define MPACK_WRITER_MINIMUM_BUFFER_SIZE 32
+
+/**
+ * A buffered MessagePack encoder.
+ *
+ * The encoder wraps an existing buffer and, optionally, a flush function.
+ * This allows efficiently encoding to an in-memory buffer or to a stream.
+ *
+ * All write operations are synchronous; they will block until the
+ * data is fully written, or an error occurs.
+ */
+typedef struct mpack_writer_t mpack_writer_t;
+
+/**
+ * The MPack writer's flush function to flush the buffer to the output stream.
+ * It should flag an appropriate error on the writer if flushing fails (usually
+ * mpack_error_io or mpack_error_memory.)
+ *
+ * The specified context for callbacks is at writer->context.
+ */
+typedef void (*mpack_writer_flush_t)(mpack_writer_t* writer, const char* buffer, size_t count);
+
+/**
+ * An error handler function to be called when an error is flagged on
+ * the writer.
+ *
+ * The error handler will only be called once on the first error flagged;
+ * any subsequent writes and errors are ignored, and the writer is
+ * permanently in that error state.
+ *
+ * MPack is safe against non-local jumps out of error handler callbacks.
+ * This means you are allowed to longjmp or throw an exception (in C++,
+ * Objective-C, or with SEH) out of this callback.
+ *
+ * Bear in mind when using longjmp that local non-volatile variables that
+ * have changed are undefined when setjmp() returns, so you can't put the
+ * writer on the stack in the same activation frame as the setjmp without
+ * declaring it volatile.
+ *
+ * You must still eventually destroy the writer. It is not destroyed
+ * automatically when an error is flagged. It is safe to destroy the
+ * writer within this error callback, but you will either need to perform
+ * a non-local jump, or store something in your context to identify
+ * that the writer is destroyed since any future accesses to it cause
+ * undefined behavior.
+ */
+typedef void (*mpack_writer_error_t)(mpack_writer_t* writer, mpack_error_t error);
+
+/**
+ * A teardown function to be called when the writer is destroyed.
+ */
+typedef void (*mpack_writer_teardown_t)(mpack_writer_t* writer);
+
+/* Hide internals from documentation */
+/** @cond */
+
+#if MPACK_BUILDER
+/**
+ * Build buffer pages form a linked list.
+ *
+ * They don't always fill up. If there is not enough space within them to write
+ * a tag or place an mpack_build_t, a new page is allocated. For this reason
+ * they store the number of used bytes.
+ */
+typedef struct mpack_builder_page_t {
+    struct mpack_builder_page_t* next;
+    size_t bytes_used;
+} mpack_builder_page_t;
+
+/**
+ * Builds form a linked list of mpack_build_t, interleaved with their encoded
+ * contents directly in the paged builder buffer.
+ */
+typedef struct mpack_build_t {
+    //mpack_builder_page_t* page;
+    struct mpack_build_t* parent;
+    //struct mpack_build_t* next;
+
+    size_t bytes; // number of bytes between this build and the next one
+    uint32_t count; // number of elements (or key/value pairs) in this map/array
+    mpack_type_t type;
+
+    // depth of nested non-build compound elements within this
+    // build.
+    uint32_t nested_compound_elements;
+
+    // indicates that a value still needs to be written for an already
+    // written key. count is not incremented until both key and value are
+    // written.
+    bool key_needs_value;
+} mpack_build_t;
+
+/**
+ * The builder state. This is stored within mpack_writer_t.
+ */
+typedef struct mpack_builder_t {
+    mpack_build_t* current_build; // build which is accumulating elements
+    mpack_build_t* latest_build; // build which is accumulating bytes
+    mpack_builder_page_t* current_page;
+    mpack_builder_page_t* pages;
+    char* stash_buffer;
+    char* stash_position;
+    char* stash_end;
+    #if MPACK_BUILDER_INTERNAL_STORAGE
+    char internal[MPACK_BUILDER_INTERNAL_STORAGE_SIZE];
+    #endif
+} mpack_builder_t;
+#endif
+
+struct mpack_writer_t {
+    #if MPACK_COMPATIBILITY
+    mpack_version_t version;          /* Version of the MessagePack spec to write */
+    #endif
+    mpack_writer_flush_t flush;       /* Function to write bytes to the output stream */
+    mpack_writer_error_t error_fn;    /* Function to call on error */
+    mpack_writer_teardown_t teardown; /* Function to teardown the context on destroy */
+    void* context;                    /* Context for writer callbacks */
+
+    char* buffer;         /* Byte buffer */
+    char* position;       /* Current position within the buffer */
+    char* end;            /* The end of the buffer */
+    mpack_error_t error;  /* Error state */
+
+    #if MPACK_WRITE_TRACKING
+    mpack_track_t track; /* Stack of map/array/str/bin/ext writes */
+    #endif
+
+    #ifdef MPACK_MALLOC
+    /* Reserved. You can use this space to allocate a custom
+     * context in order to reduce heap allocations. */
+    void* reserved[2];
+    #endif
+
+    #if MPACK_BUILDER
+    mpack_builder_t builder;
+    #endif
+};
+
+
+#if MPACK_WRITE_TRACKING
+void mpack_writer_track_push(mpack_writer_t* writer, mpack_type_t type, uint32_t count);
+void mpack_writer_track_push_builder(mpack_writer_t* writer, mpack_type_t type);
+void mpack_writer_track_pop(mpack_writer_t* writer, mpack_type_t type);
+void mpack_writer_track_pop_builder(mpack_writer_t* writer, mpack_type_t type);
+void mpack_writer_track_bytes(mpack_writer_t* writer, size_t count);
+#else
+MPACK_INLINE void mpack_writer_track_push(mpack_writer_t* writer, mpack_type_t type, uint32_t count) {
+    MPACK_UNUSED(writer);
+    MPACK_UNUSED(type);
+    MPACK_UNUSED(count);
+}
+MPACK_INLINE void mpack_writer_track_push_builder(mpack_writer_t* writer, mpack_type_t type) {
+    MPACK_UNUSED(writer);
+    MPACK_UNUSED(type);
+}
+MPACK_INLINE void mpack_writer_track_pop(mpack_writer_t* writer, mpack_type_t type) {
+    MPACK_UNUSED(writer);
+    MPACK_UNUSED(type);
+}
+MPACK_INLINE void mpack_writer_track_pop_builder(mpack_writer_t* writer, mpack_type_t type) {
+    MPACK_UNUSED(writer);
+    MPACK_UNUSED(type);
+}
+MPACK_INLINE void mpack_writer_track_bytes(mpack_writer_t* writer, size_t count) {
+    MPACK_UNUSED(writer);
+    MPACK_UNUSED(count);
+}
+#endif
+
+/** @endcond */
+
+/**
+ * @name Lifecycle Functions
+ * @{
+ */
+
+/**
+ * Initializes an MPack writer with the given buffer. The writer
+ * does not assume ownership of the buffer.
+ *
+ * Trying to write past the end of the buffer will result in mpack_error_too_big
+ * unless a flush function is set with mpack_writer_set_flush(). To use the data
+ * without flushing, call mpack_writer_buffer_used() to determine the number of
+ * bytes written.
+ *
+ * @param writer The MPack writer.
+ * @param buffer The buffer into which to write MessagePack data.
+ * @param size The size of the buffer.
+ */
+void mpack_writer_init(mpack_writer_t* writer, char* buffer, size_t size);
+
+#ifdef MPACK_MALLOC
+/**
+ * Initializes an MPack writer using a growable buffer.
+ *
+ * The data is placed in the given data pointer if and when the writer
+ * is destroyed without error. The data pointer is NULL during writing,
+ * and will remain NULL if an error occurs.
+ *
+ * The allocated data must be freed with MPACK_FREE() (or simply free()
+ * if MPack's allocator hasn't been customized.)
+ *
+ * @throws mpack_error_memory if the buffer fails to grow when
+ * flushing.
+ *
+ * @param writer The MPack writer.
+ * @param data Where to place the allocated data.
+ * @param size Where to write the size of the data.
+ */
+void mpack_writer_init_growable(mpack_writer_t* writer, char** data, size_t* size);
+#endif
+
+/**
+ * Initializes an MPack writer directly into an error state. Use this if you
+ * are writing a wrapper to mpack_writer_init() which can fail its setup.
+ */
+void mpack_writer_init_error(mpack_writer_t* writer, mpack_error_t error);
+
+#if MPACK_STDIO
+/**
+ * Initializes an MPack writer that writes to a file.
+ *
+ * @throws mpack_error_memory if allocation fails
+ * @throws mpack_error_io if the file cannot be opened
+ */
+void mpack_writer_init_filename(mpack_writer_t* writer, const char* filename);
+
+/**
+ * Deprecated.
+ *
+ * \deprecated Renamed to mpack_writer_init_filename().
+ */
+MPACK_INLINE void mpack_writer_init_file(mpack_writer_t* writer, const char* filename) {
+    mpack_writer_init_filename(writer, filename);
+}
+
+/**
+ * Initializes an MPack writer that writes to a libc FILE. This can be used to
+ * write to stdout or stderr, or to a file opened separately.
+ *
+ * @param writer The MPack writer.
+ * @param stdfile The FILE.
+ * @param close_when_done If true, fclose() will be called on the FILE when it
+ *         is no longer needed. If false, the file will not be flushed or
+ *         closed when writing is done.
+ *
+ * @note The writer is buffered. If you want to write other data to the FILE in
+ *         between messages, you must flush it first.
+ *
+ * @see mpack_writer_flush_message
+ */
+void mpack_writer_init_stdfile(mpack_writer_t* writer, FILE* stdfile, bool close_when_done);
+#endif
+
+/** @cond */
+
+#define mpack_writer_init_stack_line_ex(line, writer) \
+    char mpack_buf_##line[MPACK_STACK_SIZE]; \
+    mpack_writer_init(writer, mpack_buf_##line, sizeof(mpack_buf_##line))
+
+#define mpack_writer_init_stack_line(line, writer) \
+    mpack_writer_init_stack_line_ex(line, writer)
+
+/*
+ * Initializes an MPack writer using stack space as a buffer. A flush function
+ * should be added to the writer to flush the buffer.
+ *
+ * This is currently undocumented since it's not entirely useful on its own.
+ */
+
+#define mpack_writer_init_stack(writer) \
+    mpack_writer_init_stack_line(__LINE__, (writer))
+
+/** @endcond */
+
+/**
+ * Cleans up the MPack writer, flushing and closing the underlying stream,
+ * if any. Returns the final error state of the writer.
+ *
+ * No flushing is performed if the writer is in an error state. The attached
+ * teardown function is called whether or not the writer is in an error state.
+ *
+ * This will assert in tracking mode if the writer is not in an error
+ * state and has any unclosed compound types. If you want to cancel
+ * writing in the middle of a document, you need to flag an error on
+ * the writer before destroying it (such as mpack_error_data).
+ *
+ * Note that a writer may raise an error and call your error handler during
+ * the final flush. It is safe to longjmp or throw out of this error handler,
+ * but if you do, the writer will not be destroyed, and the teardown function
+ * will not be called. You can still get the writer's error state, and you
+ * must call @ref mpack_writer_destroy() again. (The second call is guaranteed
+ * not to call your error handler again since the writer is already in an error
+ * state.)
+ *
+ * @see mpack_writer_set_error_handler
+ * @see mpack_writer_set_flush
+ * @see mpack_writer_set_teardown
+ * @see mpack_writer_flag_error
+ * @see mpack_error_data
+ */
+mpack_error_t mpack_writer_destroy(mpack_writer_t* writer);
+
+/**
+ * @}
+ */
+
+/**
+ * @name Configuration
+ * @{
+ */
+
+#if MPACK_COMPATIBILITY
+/**
+ * Sets the version of the MessagePack spec that will be generated.
+ *
+ * This can be used to interface with older libraries that do not support
+ * the newest MessagePack features (such as the @c str8 type.)
+ *
+ * @note This requires @ref MPACK_COMPATIBILITY.
+ */
+MPACK_INLINE void mpack_writer_set_version(mpack_writer_t* writer, mpack_version_t version) {
+    writer->version = version;
+}
+#endif
+
+/**
+ * Sets the custom pointer to pass to the writer callbacks, such as flush
+ * or teardown.
+ *
+ * @param writer The MPack writer.
+ * @param context User data to pass to the writer callbacks.
+ *
+ * @see mpack_writer_context()
+ */
+MPACK_INLINE void mpack_writer_set_context(mpack_writer_t* writer, void* context) {
+    writer->context = context;
+}
+
+/**
+ * Returns the custom context for writer callbacks.
+ *
+ * @see mpack_writer_set_context
+ * @see mpack_writer_set_flush
+ */
+MPACK_INLINE void* mpack_writer_context(mpack_writer_t* writer) {
+    return writer->context;
+}
+
+/**
+ * Sets the flush function to write out the data when the buffer is full.
+ *
+ * If no flush function is used, trying to write past the end of the
+ * buffer will result in mpack_error_too_big.
+ *
+ * This should normally be used with mpack_writer_set_context() to register
+ * a custom pointer to pass to the flush function.
+ *
+ * @param writer The MPack writer.
+ * @param flush The function to write out data from the buffer.
+ *
+ * @see mpack_writer_context()
+ */
+void mpack_writer_set_flush(mpack_writer_t* writer, mpack_writer_flush_t flush);
+
+/**
+ * Sets the error function to call when an error is flagged on the writer.
+ *
+ * This should normally be used with mpack_writer_set_context() to register
+ * a custom pointer to pass to the error function.
+ *
+ * See the definition of mpack_writer_error_t for more information about
+ * what you can do from an error callback.
+ *
+ * @see mpack_writer_error_t
+ * @param writer The MPack writer.
+ * @param error_fn The function to call when an error is flagged on the writer.
+ */
+MPACK_INLINE void mpack_writer_set_error_handler(mpack_writer_t* writer, mpack_writer_error_t error_fn) {
+    writer->error_fn = error_fn;
+}
+
+/**
+ * Sets the teardown function to call when the writer is destroyed.
+ *
+ * This should normally be used with mpack_writer_set_context() to register
+ * a custom pointer to pass to the teardown function.
+ *
+ * @param writer The MPack writer.
+ * @param teardown The function to call when the writer is destroyed.
+ */
+MPACK_INLINE void mpack_writer_set_teardown(mpack_writer_t* writer, mpack_writer_teardown_t teardown) {
+    writer->teardown = teardown;
+}
+
+/**
+ * @}
+ */
+
+/**
+ * @name Core Writer Functions
+ * @{
+ */
+
+/**
+ * Flushes any buffered data to the underlying stream.
+ *
+ * If the writer is connected to a socket and you are keeping it open,
+ * you will want to call this after writing a message (or set of
+ * messages) so that the data is actually sent.
+ *
+ * It is not necessary to call this if you are not keeping the writer
+ * open afterwards. You can just call `mpack_writer_destroy()` and it
+ * will flush before cleaning up.
+ *
+ * This will assert if no flush function is assigned to the writer.
+ *
+ * If write tracking is enabled, this will break and flag @ref
+ * mpack_error_bug if the writer has any open compound types, ensuring
+ * that no compound types are still open. This prevents a "missing
+ * finish" bug from causing a never-ending message.
+ */
+void mpack_writer_flush_message(mpack_writer_t* writer);
+
+/**
+ * Returns the number of bytes currently stored in the buffer. This
+ * may be less than the total number of bytes written if bytes have
+ * been flushed to an underlying stream.
+ */
+MPACK_INLINE size_t mpack_writer_buffer_used(mpack_writer_t* writer) {
+    return (size_t)(writer->position - writer->buffer);
+}
+
+/**
+ * Returns the amount of space left in the buffer. This may be reset
+ * after a write if bytes are flushed to an underlying stream.
+ */
+MPACK_INLINE size_t mpack_writer_buffer_left(mpack_writer_t* writer) {
+    return (size_t)(writer->end - writer->position);
+}
+
+/**
+ * Returns the (current) size of the buffer. This may change after a write if
+ * the flush callback changes the buffer.
+ */
+MPACK_INLINE size_t mpack_writer_buffer_size(mpack_writer_t* writer) {
+    return (size_t)(writer->end - writer->buffer);
+}
+
+/**
+ * Places the writer in the given error state, calling the error callback if one
+ * is set.
+ *
+ * This allows you to externally flag errors, for example if you are validating
+ * data as you write it, or if you want to cancel writing in the middle of a
+ * document. (The writer will assert if you try to destroy it without error and
+ * with unclosed compound types. In this case you should flag mpack_error_data
+ * before destroying it.)
+ *
+ * If the writer is already in an error state, this call is ignored and no
+ * error callback is called.
+ *
+ * @see mpack_writer_destroy
+ * @see mpack_error_data
+ */
+void mpack_writer_flag_error(mpack_writer_t* writer, mpack_error_t error);
+
+/**
+ * Queries the error state of the MPack writer.
+ *
+ * If a writer is in an error state, you should discard all data since the
+ * last time the error flag was checked. The error flag cannot be cleared.
+ */
+MPACK_INLINE mpack_error_t mpack_writer_error(mpack_writer_t* writer) {
+    return writer->error;
+}
+
+/**
+ * Writes a MessagePack object header (an MPack Tag.)
+ *
+ * If the value is a map, array, string, binary or extension type, the
+ * containing elements or bytes must be written separately and the
+ * appropriate finish function must be called (as though one of the
+ * mpack_start_*() functions was called.)
+ *
+ * @see mpack_write_bytes()
+ * @see mpack_finish_map()
+ * @see mpack_finish_array()
+ * @see mpack_finish_str()
+ * @see mpack_finish_bin()
+ * @see mpack_finish_ext()
+ * @see mpack_finish_type()
+ */
+void mpack_write_tag(mpack_writer_t* writer, mpack_tag_t tag);
+
+/**
+ * @}
+ */
+
+/**
+ * @name Integers
+ * @{
+ */
+
+/** Writes an 8-bit integer in the most efficient packing available. */
+void mpack_write_i8(mpack_writer_t* writer, int8_t value);
+
+/** Writes a 16-bit integer in the most efficient packing available. */
+void mpack_write_i16(mpack_writer_t* writer, int16_t value);
+
+/** Writes a 32-bit integer in the most efficient packing available. */
+void mpack_write_i32(mpack_writer_t* writer, int32_t value);
+
+/** Writes a 64-bit integer in the most efficient packing available. */
+void mpack_write_i64(mpack_writer_t* writer, int64_t value);
+
+/** Writes an integer in the most efficient packing available. */
+MPACK_INLINE void mpack_write_int(mpack_writer_t* writer, int64_t value) {
+    mpack_write_i64(writer, value);
+}
+
+/** Writes an 8-bit unsigned integer in the most efficient packing available. */
+void mpack_write_u8(mpack_writer_t* writer, uint8_t value);
+
+/** Writes an 16-bit unsigned integer in the most efficient packing available. */
+void mpack_write_u16(mpack_writer_t* writer, uint16_t value);
+
+/** Writes an 32-bit unsigned integer in the most efficient packing available. */
+void mpack_write_u32(mpack_writer_t* writer, uint32_t value);
+
+/** Writes an 64-bit unsigned integer in the most efficient packing available. */
+void mpack_write_u64(mpack_writer_t* writer, uint64_t value);
+
+/** Writes an unsigned integer in the most efficient packing available. */
+MPACK_INLINE void mpack_write_uint(mpack_writer_t* writer, uint64_t value) {
+    mpack_write_u64(writer, value);
+}
+
+/**
+ * @}
+ */
+
+/**
+ * @name Other Basic Types
+ * @{
+ */
+
+#if MPACK_FLOAT
+/** Writes a float. */
+void mpack_write_float(mpack_writer_t* writer, float value);
+#else
+/** Writes a float from a raw uint32_t. */
+void mpack_write_raw_float(mpack_writer_t* writer, uint32_t raw_value);
+#endif
+
+#if MPACK_DOUBLE
+/** Writes a double. */
+void mpack_write_double(mpack_writer_t* writer, double value);
+#else
+/** Writes a double from a raw uint64_t. */
+void mpack_write_raw_double(mpack_writer_t* writer, uint64_t raw_value);
+#endif
+
+/** Writes a boolean. */
+void mpack_write_bool(mpack_writer_t* writer, bool value);
+
+/** Writes a boolean with value true. */
+void mpack_write_true(mpack_writer_t* writer);
+
+/** Writes a boolean with value false. */
+void mpack_write_false(mpack_writer_t* writer);
+
+/** Writes a nil. */
+void mpack_write_nil(mpack_writer_t* writer);
+
+/** Write a pre-encoded messagepack object */
+void mpack_write_object_bytes(mpack_writer_t* writer, const char* data, size_t bytes);
+
+#if MPACK_EXTENSIONS
+/**
+ * Writes a timestamp.
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ *
+ * @param writer The writer
+ * @param seconds The (signed) number of seconds since 1970-01-01T00:00:00Z.
+ * @param nanoseconds The additional number of nanoseconds from 0 to 999,999,999 inclusive.
+ */
+void mpack_write_timestamp(mpack_writer_t* writer, int64_t seconds, uint32_t nanoseconds);
+
+/**
+ * Writes a timestamp with the given number of seconds (and zero nanoseconds).
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ *
+ * @param writer The writer
+ * @param seconds The (signed) number of seconds since 1970-01-01T00:00:00Z.
+ */
+MPACK_INLINE void mpack_write_timestamp_seconds(mpack_writer_t* writer, int64_t seconds) {
+    mpack_write_timestamp(writer, seconds, 0);
+}
+
+/**
+ * Writes a timestamp.
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ */
+MPACK_INLINE void mpack_write_timestamp_struct(mpack_writer_t* writer, mpack_timestamp_t timestamp) {
+    mpack_write_timestamp(writer, timestamp.seconds, timestamp.nanoseconds);
+}
+#endif
+
+/**
+ * @}
+ */
+
+/**
+ * @name Map and Array Functions
+ * @{
+ */
+
+/**
+ * Opens an array.
+ *
+ * `count` elements must follow, and mpack_finish_array() must be called
+ * when done.
+ *
+ * If you do not know the number of elements to be written ahead of time, call
+ * mpack_build_array() instead.
+ *
+ * @see mpack_finish_array()
+ * @see mpack_build_array() to count the number of elements automatically
+ */
+void mpack_start_array(mpack_writer_t* writer, uint32_t count);
+
+/**
+ * Opens a map.
+ *
+ * `count * 2` elements must follow, and mpack_finish_map() must be called
+ * when done.
+ *
+ * If you do not know the number of elements to be written ahead of time, call
+ * mpack_build_map() instead.
+ *
+ * Remember that while map elements in MessagePack are implicitly ordered,
+ * they are not ordered in JSON. If you need elements to be read back
+ * in the order they are written, consider use an array instead.
+ *
+ * @see mpack_finish_map()
+ * @see mpack_build_map() to count the number of key/value pairs automatically
+ */
+void mpack_start_map(mpack_writer_t* writer, uint32_t count);
+
+MPACK_INLINE void mpack_builder_compound_push(mpack_writer_t* writer) {
+    MPACK_UNUSED(writer);
+
+    #if MPACK_BUILDER
+    mpack_build_t* build = writer->builder.current_build;
+    if (build != NULL) {
+        ++build->nested_compound_elements;
+    }
+    #endif
+}
+
+MPACK_INLINE void mpack_builder_compound_pop(mpack_writer_t* writer) {
+    MPACK_UNUSED(writer);
+
+    #if MPACK_BUILDER
+    mpack_build_t* build = writer->builder.current_build;
+    if (build != NULL) {
+        mpack_assert(build->nested_compound_elements > 0);
+        --build->nested_compound_elements;
+    }
+    #endif
+}
+
+/**
+ * Finishes writing an array.
+ *
+ * This should be called only after a corresponding call to mpack_start_array()
+ * and after the array contents are written.
+ *
+ * In debug mode (or if MPACK_WRITE_TRACKING is not 0), this will track writes
+ * to ensure that the correct number of elements are written.
+ *
+ * @see mpack_start_array()
+ */
+MPACK_INLINE void mpack_finish_array(mpack_writer_t* writer) {
+    mpack_writer_track_pop(writer, mpack_type_array);
+    mpack_builder_compound_pop(writer);
+}
+
+/**
+ * Finishes writing a map.
+ *
+ * This should be called only after a corresponding call to mpack_start_map()
+ * and after the map contents are written.
+ *
+ * In debug mode (or if MPACK_WRITE_TRACKING is not 0), this will track writes
+ * to ensure that the correct number of elements are written.
+ *
+ * @see mpack_start_map()
+ */
+MPACK_INLINE void mpack_finish_map(mpack_writer_t* writer) {
+    mpack_writer_track_pop(writer, mpack_type_map);
+    mpack_builder_compound_pop(writer);
+}
+
+/**
+ * Starts building an array.
+ *
+ * Elements must follow, and mpack_complete_array() must be called when done. The
+ * number of elements is determined automatically.
+ *
+ * If you know ahead of time the number of elements in the array, it is more
+ * efficient to call mpack_start_array() instead, even if you are already
+ * within another open build.
+ *
+ * Builder containers can be nested within normal (known size) containers and
+ * vice versa. You can call mpack_build_array(), then mpack_start_array()
+ * inside it, then mpack_build_array() inside that, and so forth.
+ *
+ * @see mpack_complete_array() to complete this array
+ * @see mpack_start_array() if you already know the size of the array
+ * @see mpack_build_map() for implementation details
+ */
+void mpack_build_array(struct mpack_writer_t* writer);
+
+/**
+ * Starts building a map.
+ *
+ * An even number of elements must follow, and mpack_complete_map() must be
+ * called when done. The number of elements is determined automatically.
+ *
+ * If you know ahead of time the number of elements in the map, it is more
+ * efficient to call mpack_start_map() instead, even if you are already within
+ * another open build.
+ *
+ * Builder containers can be nested within normal (known size) containers and
+ * vice versa. You can call mpack_build_map(), then mpack_start_map() inside
+ * it, then mpack_build_map() inside that, and so forth.
+ *
+ * A writer in build mode diverts writes to a builder buffer that allocates as
+ * needed. Once the last map or array being built is completed, the deferred
+ * message is composed with computed array and map sizes into the writer.
+ * Builder maps and arrays are encoded exactly the same as ordinary maps and
+ * arrays in the final message.
+ *
+ * This indirect encoding is costly, as it incurs at least an extra copy of all
+ * data written within a builder (but not additional copies for nested
+ * builders.) Expect a speed penalty of half or more.
+ *
+ * A good strategy is to use this during early development when your messages
+ * are constantly changing, and then closer to release when your message
+ * formats have stabilized, replace all your build calls with start calls with
+ * pre-computed sizes. Or don't, if you find the builder has little impact on
+ * performance, because even with builders MPack is extremely fast.
+ *
+ * @note When an array or map starts being built, nothing will be flushed
+ *       until it is completed. If you are building a large message that
+ *       does not fit in the output stream, you won't get an error about it
+ *       until everything is written.
+ *
+ * @see mpack_complete_map() to complete this map
+ * @see mpack_start_map() if you already know the size of the map
+ */
+void mpack_build_map(struct mpack_writer_t* writer);
+
+/**
+ * Completes an array being built.
+ *
+ * @see mpack_build_array()
+ */
+void mpack_complete_array(struct mpack_writer_t* writer);
+
+/**
+ * Completes a map being built.
+ *
+ * @see mpack_build_map()
+ */
+void mpack_complete_map(struct mpack_writer_t* writer);
+
+/**
+ * @}
+ */
+
+/**
+ * @name Data Helpers
+ * @{
+ */
+
+/**
+ * Writes a string.
+ *
+ * To stream a string in chunks, use mpack_start_str() instead.
+ *
+ * MPack does not care about the underlying encoding, but UTF-8 is highly
+ * recommended, especially for compatibility with JSON. You should consider
+ * calling mpack_write_utf8() instead, especially if you will be reading
+ * it back as UTF-8.
+ *
+ * You should not call mpack_finish_str() after calling this; this
+ * performs both start and finish.
+ */
+void mpack_write_str(mpack_writer_t* writer, const char* str, uint32_t length);
+
+/**
+ * Writes a string, ensuring that it is valid UTF-8.
+ *
+ * This does not accept any UTF-8 variant such as Modified UTF-8, CESU-8 or
+ * WTF-8. Only pure UTF-8 is allowed.
+ *
+ * You should not call mpack_finish_str() after calling this; this
+ * performs both start and finish.
+ *
+ * @throws mpack_error_invalid if the string is not valid UTF-8
+ */
+void mpack_write_utf8(mpack_writer_t* writer, const char* str, uint32_t length);
+
+/**
+ * Writes a null-terminated string. (The null-terminator is not written.)
+ *
+ * MPack does not care about the underlying encoding, but UTF-8 is highly
+ * recommended, especially for compatibility with JSON. You should consider
+ * calling mpack_write_utf8_cstr() instead, especially if you will be reading
+ * it back as UTF-8.
+ *
+ * You should not call mpack_finish_str() after calling this; this
+ * performs both start and finish.
+ */
+void mpack_write_cstr(mpack_writer_t* writer, const char* cstr);
+
+/**
+ * Writes a null-terminated string, or a nil node if the given cstr pointer
+ * is NULL. (The null-terminator is not written.)
+ *
+ * MPack does not care about the underlying encoding, but UTF-8 is highly
+ * recommended, especially for compatibility with JSON. You should consider
+ * calling mpack_write_utf8_cstr_or_nil() instead, especially if you will
+ * be reading it back as UTF-8.
+ *
+ * You should not call mpack_finish_str() after calling this; this
+ * performs both start and finish.
+ */
+void mpack_write_cstr_or_nil(mpack_writer_t* writer, const char* cstr);
+
+/**
+ * Writes a null-terminated string, ensuring that it is valid UTF-8. (The
+ * null-terminator is not written.)
+ *
+ * This does not accept any UTF-8 variant such as Modified UTF-8, CESU-8 or
+ * WTF-8. Only pure UTF-8 is allowed.
+ *
+ * You should not call mpack_finish_str() after calling this; this
+ * performs both start and finish.
+ *
+ * @throws mpack_error_invalid if the string is not valid UTF-8
+ */
+void mpack_write_utf8_cstr(mpack_writer_t* writer, const char* cstr);
+
+/**
+ * Writes a null-terminated string ensuring that it is valid UTF-8, or
+ * writes nil if the given cstr pointer is NULL. (The null-terminator
+ * is not written.)
+ *
+ * This does not accept any UTF-8 variant such as Modified UTF-8, CESU-8 or
+ * WTF-8. Only pure UTF-8 is allowed.
+ *
+ * You should not call mpack_finish_str() after calling this; this
+ * performs both start and finish.
+ *
+ * @throws mpack_error_invalid if the string is not valid UTF-8
+ */
+void mpack_write_utf8_cstr_or_nil(mpack_writer_t* writer, const char* cstr);
+
+/**
+ * Writes a binary blob.
+ *
+ * To stream a binary blob in chunks, use mpack_start_bin() instead.
+ *
+ * You should not call mpack_finish_bin() after calling this; this
+ * performs both start and finish.
+ */
+void mpack_write_bin(mpack_writer_t* writer, const char* data, uint32_t count);
+
+#if MPACK_EXTENSIONS
+/**
+ * Writes an extension type.
+ *
+ * To stream an extension blob in chunks, use mpack_start_ext() instead.
+ *
+ * Extension types [0, 127] are available for application-specific types. Extension
+ * types [-128, -1] are reserved for future extensions of MessagePack.
+ *
+ * You should not call mpack_finish_ext() after calling this; this
+ * performs both start and finish.
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ */
+void mpack_write_ext(mpack_writer_t* writer, int8_t exttype, const char* data, uint32_t count);
+#endif
+
+/**
+ * @}
+ */
+
+/**
+ * @name Chunked Data Functions
+ * @{
+ */
+
+/**
+ * Opens a string. `count` bytes should be written with calls to
+ * mpack_write_bytes(), and mpack_finish_str() should be called
+ * when done.
+ *
+ * To write an entire string at once, use mpack_write_str() or
+ * mpack_write_cstr() instead.
+ *
+ * MPack does not care about the underlying encoding, but UTF-8 is highly
+ * recommended, especially for compatibility with JSON.
+ */
+void mpack_start_str(mpack_writer_t* writer, uint32_t count);
+
+/**
+ * Opens a binary blob. `count` bytes should be written with calls to
+ * mpack_write_bytes(), and mpack_finish_bin() should be called
+ * when done.
+ */
+void mpack_start_bin(mpack_writer_t* writer, uint32_t count);
+
+#if MPACK_EXTENSIONS
+/**
+ * Opens an extension type. `count` bytes should be written with calls
+ * to mpack_write_bytes(), and mpack_finish_ext() should be called
+ * when done.
+ *
+ * Extension types [0, 127] are available for application-specific types. Extension
+ * types [-128, -1] are reserved for future extensions of MessagePack.
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ */
+void mpack_start_ext(mpack_writer_t* writer, int8_t exttype, uint32_t count);
+#endif
+
+/**
+ * Writes a portion of bytes for a string, binary blob or extension type which
+ * was opened by mpack_write_tag() or one of the mpack_start_*() functions.
+ *
+ * This can be called multiple times to write the data in chunks, as long as
+ * the total amount of bytes written matches the count given when the compound
+ * type was started.
+ *
+ * The corresponding mpack_finish_*() function must be called when done.
+ *
+ * To write an entire string, binary blob or extension type at
+ * once, use one of the mpack_write_*() functions instead.
+ *
+ * @see mpack_write_tag()
+ * @see mpack_start_str()
+ * @see mpack_start_bin()
+ * @see mpack_start_ext()
+ * @see mpack_finish_str()
+ * @see mpack_finish_bin()
+ * @see mpack_finish_ext()
+ * @see mpack_finish_type()
+ */
+void mpack_write_bytes(mpack_writer_t* writer, const char* data, size_t count);
+
+/**
+ * Finishes writing a string.
+ *
+ * This should be called only after a corresponding call to mpack_start_str()
+ * and after the string bytes are written with mpack_write_bytes().
+ *
+ * This will track writes to ensure that the correct number of elements are written.
+ *
+ * @see mpack_start_str()
+ * @see mpack_write_bytes()
+ */
+MPACK_INLINE void mpack_finish_str(mpack_writer_t* writer) {
+    mpack_writer_track_pop(writer, mpack_type_str);
+}
+
+/**
+ * Finishes writing a binary blob.
+ *
+ * This should be called only after a corresponding call to mpack_start_bin()
+ * and after the binary bytes are written with mpack_write_bytes().
+ *
+ * This will track writes to ensure that the correct number of bytes are written.
+ *
+ * @see mpack_start_bin()
+ * @see mpack_write_bytes()
+ */
+MPACK_INLINE void mpack_finish_bin(mpack_writer_t* writer) {
+    mpack_writer_track_pop(writer, mpack_type_bin);
+}
+
+#if MPACK_EXTENSIONS
+/**
+ * Finishes writing an extended type binary data blob.
+ *
+ * This should be called only after a corresponding call to mpack_start_bin()
+ * and after the binary bytes are written with mpack_write_bytes().
+ *
+ * This will track writes to ensure that the correct number of bytes are written.
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ *
+ * @see mpack_start_ext()
+ * @see mpack_write_bytes()
+ */
+MPACK_INLINE void mpack_finish_ext(mpack_writer_t* writer) {
+    mpack_writer_track_pop(writer, mpack_type_ext);
+}
+#endif
+
+/**
+ * Finishes writing the given compound type.
+ *
+ * This will track writes to ensure that the correct number of elements
+ * or bytes are written.
+ *
+ * This can be called with the appropriate type instead the corresponding
+ * mpack_finish_*() function if you want to finish a dynamic type.
+ */
+MPACK_INLINE void mpack_finish_type(mpack_writer_t* writer, mpack_type_t type) {
+    mpack_writer_track_pop(writer, type);
+}
+
+/**
+ * @}
+ */
+
+#if MPACK_HAS_GENERIC && !defined(__cplusplus)
+
+/**
+ * @name Type-Generic Writers
+ * @{
+ */
+
+/**
+ * @def mpack_write(writer, value)
+ *
+ * Type-generic writer for primitive types.
+ *
+ * The compiler will dispatch to an appropriate write function based
+ * on the type of the @a value parameter.
+ *
+ * @note This requires C11 `_Generic` support. (A set of inline overloads
+ * are used in C++ to provide the same functionality.)
+ *
+ * @warning In C11, the indentifiers `true`, `false` and `NULL` are
+ * all of type `int`, not `bool` or `void*`! They will emit unexpected
+ * types when passed uncast, so be careful when using them.
+ */
+#if MPACK_FLOAT
+    #define MPACK_WRITE_GENERIC_FLOAT float: mpack_write_float,
+#else
+    #define MPACK_WRITE_GENERIC_FLOAT /*nothing*/
+#endif
+#if MPACK_DOUBLE
+    #define MPACK_WRITE_GENERIC_DOUBLE double: mpack_write_double,
+#else
+    #define MPACK_WRITE_GENERIC_DOUBLE /*nothing*/
+#endif
+#define mpack_write(writer, value) \
+    _Generic(((void)0, value),                      \
+              int8_t: mpack_write_i8,               \
+             int16_t: mpack_write_i16,              \
+             int32_t: mpack_write_i32,              \
+             int64_t: mpack_write_i64,              \
+             uint8_t: mpack_write_u8,               \
+            uint16_t: mpack_write_u16,              \
+            uint32_t: mpack_write_u32,              \
+            uint64_t: mpack_write_u64,              \
+                bool: mpack_write_bool,             \
+            MPACK_WRITE_GENERIC_FLOAT               \
+            MPACK_WRITE_GENERIC_DOUBLE              \
+              char *: mpack_write_cstr_or_nil,      \
+        const char *: mpack_write_cstr_or_nil       \
+    )(writer, value)
+
+/**
+ * @def mpack_write_kv(writer, key, value)
+ *
+ * Type-generic writer for key-value pairs of null-terminated string
+ * keys and primitive values.
+ *
+ * @warning @a writer may be evaluated multiple times.
+ *
+ * @warning In C11, the indentifiers `true`, `false` and `NULL` are
+ * all of type `int`, not `bool` or `void*`! They will emit unexpected
+ * types when passed uncast, so be careful when using them.
+ *
+ * @param writer The writer.
+ * @param key A null-terminated C string.
+ * @param value A primitive type supported by mpack_write().
+ */
+#define mpack_write_kv(writer, key, value) do {     \
+    mpack_write_cstr(writer, key);                  \
+    mpack_write(writer, value);                     \
+} while (0)
+
+/**
+ * @}
+ */
+
+#endif // MPACK_HAS_GENERIC && !defined(__cplusplus)
+
+// The rest of this file contains C++ overloads, so we end extern "C" here.
+MPACK_EXTERN_C_END
+
+#if defined(__cplusplus) || defined(MPACK_DOXYGEN)
+
+/**
+ * @name C++ write overloads
+ * @{
+ */
+
+/*
+ * C++ generic writers for primitive values
+ */
+
+#ifdef MPACK_DOXYGEN
+#undef mpack_write
+#undef mpack_write_kv
+#endif
+
+MPACK_INLINE void mpack_write(mpack_writer_t* writer, int8_t value) {
+    mpack_write_i8(writer, value);
+}
+
+MPACK_INLINE void mpack_write(mpack_writer_t* writer, int16_t value) {
+    mpack_write_i16(writer, value);
+}
+
+MPACK_INLINE void mpack_write(mpack_writer_t* writer, int32_t value) {
+    mpack_write_i32(writer, value);
+}
+
+MPACK_INLINE void mpack_write(mpack_writer_t* writer, int64_t value) {
+    mpack_write_i64(writer, value);
+}
+
+MPACK_INLINE void mpack_write(mpack_writer_t* writer, uint8_t value) {
+    mpack_write_u8(writer, value);
+}
+
+MPACK_INLINE void mpack_write(mpack_writer_t* writer, uint16_t value) {
+    mpack_write_u16(writer, value);
+}
+
+MPACK_INLINE void mpack_write(mpack_writer_t* writer, uint32_t value) {
+    mpack_write_u32(writer, value);
+}
+
+MPACK_INLINE void mpack_write(mpack_writer_t* writer, uint64_t value) {
+    mpack_write_u64(writer, value);
+}
+
+MPACK_INLINE void mpack_write(mpack_writer_t* writer, bool value) {
+    mpack_write_bool(writer, value);
+}
+
+MPACK_INLINE void mpack_write(mpack_writer_t* writer, float value) {
+    mpack_write_float(writer, value);
+}
+
+MPACK_INLINE void mpack_write(mpack_writer_t* writer, double value) {
+    mpack_write_double(writer, value);
+}
+
+MPACK_INLINE void mpack_write(mpack_writer_t* writer, char *value) {
+    mpack_write_cstr_or_nil(writer, value);
+}
+
+MPACK_INLINE void mpack_write(mpack_writer_t* writer, const char *value) {
+    mpack_write_cstr_or_nil(writer, value);
+}
+
+/* C++ generic write for key-value pairs */
+
+MPACK_INLINE void mpack_write_kv(mpack_writer_t* writer, const char *key, int8_t value) {
+    mpack_write_cstr(writer, key);
+    mpack_write_i8(writer, value);
+}
+
+MPACK_INLINE void mpack_write_kv(mpack_writer_t* writer, const char *key, int16_t value) {
+    mpack_write_cstr(writer, key);
+    mpack_write_i16(writer, value);
+}
+
+MPACK_INLINE void mpack_write_kv(mpack_writer_t* writer, const char *key, int32_t value) {
+    mpack_write_cstr(writer, key);
+    mpack_write_i32(writer, value);
+}
+
+MPACK_INLINE void mpack_write_kv(mpack_writer_t* writer, const char *key, int64_t value) {
+    mpack_write_cstr(writer, key);
+    mpack_write_i64(writer, value);
+}
+
+MPACK_INLINE void mpack_write_kv(mpack_writer_t* writer, const char *key, uint8_t value) {
+    mpack_write_cstr(writer, key);
+    mpack_write_u8(writer, value);
+}
+
+MPACK_INLINE void mpack_write_kv(mpack_writer_t* writer, const char *key, uint16_t value) {
+    mpack_write_cstr(writer, key);
+    mpack_write_u16(writer, value);
+}
+
+MPACK_INLINE void mpack_write_kv(mpack_writer_t* writer, const char *key, uint32_t value) {
+    mpack_write_cstr(writer, key);
+    mpack_write_u32(writer, value);
+}
+
+MPACK_INLINE void mpack_write_kv(mpack_writer_t* writer, const char *key, uint64_t value) {
+    mpack_write_cstr(writer, key);
+    mpack_write_u64(writer, value);
+}
+
+MPACK_INLINE void mpack_write_kv(mpack_writer_t* writer, const char *key, bool value) {
+    mpack_write_cstr(writer, key);
+    mpack_write_bool(writer, value);
+}
+
+MPACK_INLINE void mpack_write_kv(mpack_writer_t* writer, const char *key, float value) {
+    mpack_write_cstr(writer, key);
+    mpack_write_float(writer, value);
+}
+
+MPACK_INLINE void mpack_write_kv(mpack_writer_t* writer, const char *key, double value) {
+    mpack_write_cstr(writer, key);
+    mpack_write_double(writer, value);
+}
+
+MPACK_INLINE void mpack_write_kv(mpack_writer_t* writer, const char *key, char *value) {
+    mpack_write_cstr(writer, key);
+    mpack_write_cstr_or_nil(writer, value);
+}
+
+MPACK_INLINE void mpack_write_kv(mpack_writer_t* writer, const char *key, const char *value) {
+    mpack_write_cstr(writer, key);
+    mpack_write_cstr_or_nil(writer, value);
+}
+
+/**
+ * @}
+ */
+
+#endif /* __cplusplus */
+
+/**
+ * @}
+ */
+
+MPACK_SILENCE_WARNINGS_END
+
+#endif // MPACK_WRITER
+
+#endif
+
+/* mpack/mpack-reader.h.h */
+
+/**
+ * @file
+ *
+ * Declares the core MPack Tag Reader.
+ */
+
+#ifndef MPACK_READER_H
+#define MPACK_READER_H 1
+
+/* #include "mpack-common.h" */
+
+MPACK_SILENCE_WARNINGS_BEGIN
+MPACK_EXTERN_C_BEGIN
+
+#if MPACK_READER
+
+#if MPACK_READ_TRACKING
+struct mpack_track_t;
+#endif
+
+// The denominator to determine whether a read is a small
+// fraction of the buffer size.
+#define MPACK_READER_SMALL_FRACTION_DENOMINATOR 32
+
+/**
+ * @defgroup reader Reader API
+ *
+ * The MPack Reader API contains functions for imperatively reading dynamically
+ * typed data from a MessagePack stream.
+ *
+ * See @ref docs/reader.md for examples.
+ *
+ * @note If you are not writing code for an embedded device (or otherwise do
+ * not need maximum performance with minimal memory usage), you should not use
+ * this. You probably want to use the @link node Node API@endlink instead.
+ *
+ * This forms the basis of the @link expect Expect API@endlink, which can be
+ * used to interpret the stream of elements in expected types and value ranges.
+ *
+ * @{
+ */
+
+/**
+ * @def MPACK_READER_MINIMUM_BUFFER_SIZE
+ *
+ * The minimum buffer size for a reader with a fill function.
+ */
+#define MPACK_READER_MINIMUM_BUFFER_SIZE 32
+
+/**
+ * A buffered MessagePack decoder.
+ *
+ * The decoder wraps an existing buffer and, optionally, a fill function.
+ * This allows efficiently decoding data from existing memory buffers, files,
+ * streams, etc.
+ *
+ * All read operations are synchronous; they will block until the
+ * requested data is fully read, or an error occurs.
+ *
+ * This structure is opaque; its fields should not be accessed outside
+ * of MPack.
+ */
+typedef struct mpack_reader_t mpack_reader_t;
+
+/**
+ * The MPack reader's fill function. It should fill the buffer with at
+ * least one byte and at most the given @c count, returning the number
+ * of bytes written to the buffer.
+ *
+ * In case of error, it should flag an appropriate error on the reader
+ * (usually @ref mpack_error_io), or simply return zero. If zero is
+ * returned, mpack_error_io is raised.
+ *
+ * @note When reading from a stream, you should only copy and return
+ * the bytes that are immediately available. It is always safe to return
+ * less than the requested count as long as some non-zero number of bytes
+ * are read; if more bytes are needed, the read function will simply be
+ * called again.
+ *
+ * @see mpack_reader_context()
+ */
+typedef size_t (*mpack_reader_fill_t)(mpack_reader_t* reader, char* buffer, size_t count);
+
+/**
+ * The MPack reader's skip function. It should discard the given number
+ * of bytes from the source (for example by seeking forward.)
+ *
+ * In case of error, it should flag an appropriate error on the reader.
+ *
+ * @see mpack_reader_context()
+ */
+typedef void (*mpack_reader_skip_t)(mpack_reader_t* reader, size_t count);
+
+/**
+ * An error handler function to be called when an error is flagged on
+ * the reader.
+ *
+ * The error handler will only be called once on the first error flagged;
+ * any subsequent reads and errors are ignored, and the reader is
+ * permanently in that error state.
+ *
+ * MPack is safe against non-local jumps out of error handler callbacks.
+ * This means you are allowed to longjmp or throw an exception (in C++,
+ * Objective-C, or with SEH) out of this callback.
+ *
+ * Bear in mind when using longjmp that local non-volatile variables that
+ * have changed are undefined when setjmp() returns, so you can't put the
+ * reader on the stack in the same activation frame as the setjmp without
+ * declaring it volatile.
+ *
+ * You must still eventually destroy the reader. It is not destroyed
+ * automatically when an error is flagged. It is safe to destroy the
+ * reader within this error callback, but you will either need to perform
+ * a non-local jump, or store something in your context to identify
+ * that the reader is destroyed since any future accesses to it cause
+ * undefined behavior.
+ */
+typedef void (*mpack_reader_error_t)(mpack_reader_t* reader, mpack_error_t error);
+
+/**
+ * A teardown function to be called when the reader is destroyed.
+ */
+typedef void (*mpack_reader_teardown_t)(mpack_reader_t* reader);
+
+/* Hide internals from documentation */
+/** @cond */
+
+struct mpack_reader_t {
+    void* context;                    /* Context for reader callbacks */
+    mpack_reader_fill_t fill;         /* Function to read bytes into the buffer */
+    mpack_reader_error_t error_fn;    /* Function to call on error */
+    mpack_reader_teardown_t teardown; /* Function to teardown the context on destroy */
+    mpack_reader_skip_t skip;         /* Function to skip bytes from the source */
+
+    char* buffer;       /* Writeable byte buffer */
+    size_t size;        /* Size of the buffer */
+
+    const char* data;   /* Current data pointer (in the buffer, if it is used) */
+    const char* end;    /* The end of available data (in the buffer, if it is used) */
+
+    mpack_error_t error;  /* Error state */
+
+    #if MPACK_READ_TRACKING
+    mpack_track_t track; /* Stack of map/array/str/bin/ext reads */
+    #endif
+};
+
+/** @endcond */
+
+/**
+ * @name Lifecycle Functions
+ * @{
+ */
+
+/**
+ * Initializes an MPack reader with the given buffer. The reader does
+ * not assume ownership of the buffer, but the buffer must be writeable
+ * if a fill function will be used to refill it.
+ *
+ * @param reader The MPack reader.
+ * @param buffer The buffer with which to read MessagePack data.
+ * @param size The size of the buffer.
+ * @param count The number of bytes already in the buffer.
+ */
+void mpack_reader_init(mpack_reader_t* reader, char* buffer, size_t size, size_t count);
+
+/**
+ * Initializes an MPack reader directly into an error state. Use this if you
+ * are writing a wrapper to mpack_reader_init() which can fail its setup.
+ */
+void mpack_reader_init_error(mpack_reader_t* reader, mpack_error_t error);
+
+/**
+ * Initializes an MPack reader to parse a pre-loaded contiguous chunk of data. The
+ * reader does not assume ownership of the data.
+ *
+ * @param reader The MPack reader.
+ * @param data The data to parse.
+ * @param count The number of bytes pointed to by data.
+ */
+void mpack_reader_init_data(mpack_reader_t* reader, const char* data, size_t count);
+
+#if MPACK_STDIO
+/**
+ * Initializes an MPack reader that reads from a file.
+ *
+ * The file will be automatically opened and closed by the reader.
+ */
+void mpack_reader_init_filename(mpack_reader_t* reader, const char* filename);
+
+/**
+ * Deprecated.
+ *
+ * \deprecated Renamed to mpack_reader_init_filename().
+ */
+MPACK_INLINE void mpack_reader_init_file(mpack_reader_t* reader, const char* filename) {
+    mpack_reader_init_filename(reader, filename);
+}
+
+/**
+ * Initializes an MPack reader that reads from a libc FILE. This can be used to
+ * read from stdin, or from a file opened separately.
+ *
+ * @param reader The MPack reader.
+ * @param stdfile The FILE.
+ * @param close_when_done If true, fclose() will be called on the FILE when it
+ *         is no longer needed. If false, the file will not be closed when
+ *         reading is done.
+ *
+ * @warning The reader is buffered. It will read data in advance of parsing it,
+ * and it may read more data than it parsed. See mpack_reader_remaining() to
+ * access the extra data.
+ */
+void mpack_reader_init_stdfile(mpack_reader_t* reader, FILE* stdfile, bool close_when_done);
+#endif
+
+/**
+ * @def mpack_reader_init_stack(reader)
+ * @hideinitializer
+ *
+ * Initializes an MPack reader using stack space as a buffer. A fill function
+ * should be added to the reader to fill the buffer.
+ *
+ * @see mpack_reader_set_fill
+ */
+
+/** @cond */
+#define mpack_reader_init_stack_line_ex(line, reader) \
+    char mpack_buf_##line[MPACK_STACK_SIZE]; \
+    mpack_reader_init((reader), mpack_buf_##line, sizeof(mpack_buf_##line), 0)
+
+#define mpack_reader_init_stack_line(line, reader) \
+    mpack_reader_init_stack_line_ex(line, reader)
+/** @endcond */
+
+#define mpack_reader_init_stack(reader) \
+    mpack_reader_init_stack_line(__LINE__, (reader))
+
+/**
+ * Cleans up the MPack reader, ensuring that all compound elements
+ * have been completely read. Returns the final error state of the
+ * reader.
+ *
+ * This will assert in tracking mode if the reader is not in an error
+ * state and has any incomplete reads. If you want to cancel reading
+ * in the middle of a document, you need to flag an error on the reader
+ * before destroying it (such as mpack_error_data).
+ *
+ * @see mpack_read_tag()
+ * @see mpack_reader_flag_error()
+ * @see mpack_error_data
+ */
+mpack_error_t mpack_reader_destroy(mpack_reader_t* reader);
+
+/**
+ * @}
+ */
+
+/**
+ * @name Callbacks
+ * @{
+ */
+
+/**
+ * Sets the custom pointer to pass to the reader callbacks, such as fill
+ * or teardown.
+ *
+ * @param reader The MPack reader.
+ * @param context User data to pass to the reader callbacks.
+ *
+ * @see mpack_reader_context()
+ */
+MPACK_INLINE void mpack_reader_set_context(mpack_reader_t* reader, void* context) {
+    reader->context = context;
+}
+
+/**
+ * Returns the custom context for reader callbacks.
+ *
+ * @see mpack_reader_set_context
+ * @see mpack_reader_set_fill
+ * @see mpack_reader_set_skip
+ */
+MPACK_INLINE void* mpack_reader_context(mpack_reader_t* reader) {
+    return reader->context;
+}
+
+/**
+ * Sets the fill function to refill the data buffer when it runs out of data.
+ *
+ * If no fill function is used, truncated MessagePack data results in
+ * mpack_error_invalid (since the buffer is assumed to contain a
+ * complete MessagePack object.)
+ *
+ * If a fill function is used, truncated MessagePack data usually
+ * results in mpack_error_io (since the fill function fails to get
+ * the missing data.)
+ *
+ * This should normally be used with mpack_reader_set_context() to register
+ * a custom pointer to pass to the fill function.
+ *
+ * @param reader The MPack reader.
+ * @param fill The function to fetch additional data into the buffer.
+ */
+void mpack_reader_set_fill(mpack_reader_t* reader, mpack_reader_fill_t fill);
+
+/**
+ * Sets the skip function to discard bytes from the source stream.
+ *
+ * It's not necessary to implement this function. If the stream is not
+ * seekable, don't set a skip callback. The reader will fall back to
+ * using the fill function instead.
+ *
+ * This should normally be used with mpack_reader_set_context() to register
+ * a custom pointer to pass to the skip function.
+ *
+ * The skip function is ignored in size-optimized builds to reduce code
+ * size. Data will be skipped with the fill function when necessary.
+ *
+ * @param reader The MPack reader.
+ * @param skip The function to discard bytes from the source stream.
+ */
+void mpack_reader_set_skip(mpack_reader_t* reader, mpack_reader_skip_t skip);
+
+/**
+ * Sets the error function to call when an error is flagged on the reader.
+ *
+ * This should normally be used with mpack_reader_set_context() to register
+ * a custom pointer to pass to the error function.
+ *
+ * See the definition of mpack_reader_error_t for more information about
+ * what you can do from an error callback.
+ *
+ * @see mpack_reader_error_t
+ * @param reader The MPack reader.
+ * @param error_fn The function to call when an error is flagged on the reader.
+ */
+MPACK_INLINE void mpack_reader_set_error_handler(mpack_reader_t* reader, mpack_reader_error_t error_fn) {
+    reader->error_fn = error_fn;
+}
+
+/**
+ * Sets the teardown function to call when the reader is destroyed.
+ *
+ * This should normally be used with mpack_reader_set_context() to register
+ * a custom pointer to pass to the teardown function.
+ *
+ * @param reader The MPack reader.
+ * @param teardown The function to call when the reader is destroyed.
+ */
+MPACK_INLINE void mpack_reader_set_teardown(mpack_reader_t* reader, mpack_reader_teardown_t teardown) {
+    reader->teardown = teardown;
+}
+
+/**
+ * @}
+ */
+
+/**
+ * @name Core Reader Functions
+ * @{
+ */
+
+/**
+ * Queries the error state of the MPack reader.
+ *
+ * If a reader is in an error state, you should discard all data since the
+ * last time the error flag was checked. The error flag cannot be cleared.
+ */
+MPACK_INLINE mpack_error_t mpack_reader_error(mpack_reader_t* reader) {
+    return reader->error;
+}
+
+/**
+ * Places the reader in the given error state, calling the error callback if one
+ * is set.
+ *
+ * This allows you to externally flag errors, for example if you are validating
+ * data as you read it.
+ *
+ * If the reader is already in an error state, this call is ignored and no
+ * error callback is called.
+ */
+void mpack_reader_flag_error(mpack_reader_t* reader, mpack_error_t error);
+
+/**
+ * Places the reader in the given error state if the given error is not mpack_ok,
+ * returning the resulting error state of the reader.
+ *
+ * This allows you to externally flag errors, for example if you are validating
+ * data as you read it.
+ *
+ * If the given error is mpack_ok or if the reader is already in an error state,
+ * this call is ignored and the actual error state of the reader is returned.
+ */
+MPACK_INLINE mpack_error_t mpack_reader_flag_if_error(mpack_reader_t* reader, mpack_error_t error) {
+    if (error != mpack_ok)
+        mpack_reader_flag_error(reader, error);
+    return mpack_reader_error(reader);
+}
+
+/**
+ * Returns bytes left in the reader's buffer.
+ *
+ * If you are done reading MessagePack data but there is other interesting data
+ * following it, the reader may have buffered too much data. The number of bytes
+ * remaining in the buffer and a pointer to the position of those bytes can be
+ * queried here.
+ *
+ * If you know the length of the MPack chunk beforehand, it's better to instead
+ * have your fill function limit the data it reads so that the reader does not
+ * have extra data. In this case you can simply check that this returns zero.
+ *
+ * Returns 0 if the reader is in an error state.
+ *
+ * @param reader The MPack reader from which to query remaining data.
+ * @param data [out] A pointer to the remaining data, or NULL.
+ * @return The number of bytes remaining in the buffer.
+ */
+size_t mpack_reader_remaining(mpack_reader_t* reader, const char** data);
+
+/**
+ * Reads a MessagePack object header (an MPack tag.)
+ *
+ * If an error occurs, the reader is placed in an error state and a
+ * nil tag is returned. If the reader is already in an error state,
+ * a nil tag is returned.
+ *
+ * If the type is compound (i.e. is a map, array, string, binary or
+ * extension type), additional reads are required to get the contained
+ * data, and the corresponding done function must be called when done.
+ *
+ * @note Maps in JSON are unordered, so it is recommended not to expect
+ * a specific ordering for your map values in case your data is converted
+ * to/from JSON.
+ *
+ * @see mpack_read_bytes()
+ * @see mpack_done_array()
+ * @see mpack_done_map()
+ * @see mpack_done_str()
+ * @see mpack_done_bin()
+ * @see mpack_done_ext()
+ */
+mpack_tag_t mpack_read_tag(mpack_reader_t* reader);
+
+/**
+ * Parses the next MessagePack object header (an MPack tag) without
+ * advancing the reader.
+ *
+ * If an error occurs, the reader is placed in an error state and a
+ * nil tag is returned. If the reader is already in an error state,
+ * a nil tag is returned.
+ *
+ * @note Maps in JSON are unordered, so it is recommended not to expect
+ * a specific ordering for your map values in case your data is converted
+ * to/from JSON.
+ *
+ * @see mpack_read_tag()
+ * @see mpack_discard()
+ */
+mpack_tag_t mpack_peek_tag(mpack_reader_t* reader);
+
+/**
+ * @}
+ */
+
+/**
+ * @name String and Data Functions
+ * @{
+ */
+
+/**
+ * Skips bytes from the underlying stream. This is used only to
+ * skip the contents of a string, binary blob or extension object.
+ */
+void mpack_skip_bytes(mpack_reader_t* reader, size_t count);
+
+/**
+ * Reads bytes from a string, binary blob or extension object, copying
+ * them into the given buffer.
+ *
+ * A str, bin or ext must have been opened by a call to mpack_read_tag()
+ * which yielded one of these types, or by a call to an expect function
+ * such as mpack_expect_str() or mpack_expect_bin().
+ *
+ * If an error occurs, the buffer contents are undefined.
+ *
+ * This can be called multiple times for a single str, bin or ext
+ * to read the data in chunks. The total data read must add up
+ * to the size of the object.
+ *
+ * @param reader The MPack reader
+ * @param p The buffer in which to copy the bytes
+ * @param count The number of bytes to read
+ */
+void mpack_read_bytes(mpack_reader_t* reader, char* p, size_t count);
+
+/**
+ * Reads bytes from a string, ensures that the string is valid UTF-8,
+ * and copies the bytes into the given buffer.
+ *
+ * A string must have been opened by a call to mpack_read_tag() which
+ * yielded a string, or by a call to an expect function such as
+ * mpack_expect_str().
+ *
+ * The given byte count must match the complete size of the string as
+ * returned by the tag or expect function. You must ensure that the
+ * buffer fits the data.
+ *
+ * This does not accept any UTF-8 variant such as Modified UTF-8, CESU-8 or
+ * WTF-8. Only pure UTF-8 is allowed.
+ *
+ * If an error occurs, the buffer contents are undefined.
+ *
+ * Unlike mpack_read_bytes(), this cannot be used to read the data in
+ * chunks (since this might split a character's UTF-8 bytes, and the
+ * reader does not keep track of the UTF-8 decoding state between reads.)
+ *
+ * @throws mpack_error_type if the string contains invalid UTF-8.
+ */
+void mpack_read_utf8(mpack_reader_t* reader, char* p, size_t byte_count);
+
+/**
+ * Reads bytes from a string, ensures that the string contains no NUL
+ * bytes, copies the bytes into the given buffer and adds a null-terminator.
+ *
+ * A string must have been opened by a call to mpack_read_tag() which
+ * yielded a string, or by a call to an expect function such as
+ * mpack_expect_str().
+ *
+ * The given byte count must match the size of the string as returned
+ * by the tag or expect function. The string will only be copied if
+ * the buffer is large enough to store it.
+ *
+ * If an error occurs, the buffer will contain an empty string.
+ *
+ * @note If you know the object will be a string before reading it,
+ * it is highly recommended to use mpack_expect_cstr() instead.
+ * Alternatively you could use mpack_peek_tag() and call
+ * mpack_expect_cstr() if it's a string.
+ *
+ * @throws mpack_error_too_big if the string plus null-terminator is larger than the given buffer size
+ * @throws mpack_error_type if the string contains a null byte.
+ *
+ * @see mpack_peek_tag()
+ * @see mpack_expect_cstr()
+ * @see mpack_expect_utf8_cstr()
+ */
+void mpack_read_cstr(mpack_reader_t* reader, char* buf, size_t buffer_size, size_t byte_count);
+
+/**
+ * Reads bytes from a string, ensures that the string is valid UTF-8
+ * with no NUL bytes, copies the bytes into the given buffer and adds a
+ * null-terminator.
+ *
+ * A string must have been opened by a call to mpack_read_tag() which
+ * yielded a string, or by a call to an expect function such as
+ * mpack_expect_str().
+ *
+ * The given byte count must match the size of the string as returned
+ * by the tag or expect function. The string will only be copied if
+ * the buffer is large enough to store it.
+ *
+ * This does not accept any UTF-8 variant such as Modified UTF-8, CESU-8 or
+ * WTF-8. Only pure UTF-8 is allowed, but without the NUL character, since
+ * it cannot be represented in a null-terminated string.
+ *
+ * If an error occurs, the buffer will contain an empty string.
+ *
+ * @note If you know the object will be a string before reading it,
+ * it is highly recommended to use mpack_expect_utf8_cstr() instead.
+ * Alternatively you could use mpack_peek_tag() and call
+ * mpack_expect_utf8_cstr() if it's a string.
+ *
+ * @throws mpack_error_too_big if the string plus null-terminator is larger than the given buffer size
+ * @throws mpack_error_type if the string contains invalid UTF-8 or a null byte.
+ *
+ * @see mpack_peek_tag()
+ * @see mpack_expect_utf8_cstr()
+ */
+void mpack_read_utf8_cstr(mpack_reader_t* reader, char* buf, size_t buffer_size, size_t byte_count);
+
+#ifdef MPACK_MALLOC
+/** @cond */
+// This can optionally add a null-terminator, but it does not check
+// whether the data contains null bytes. This must be done separately
+// in a cstring read function (possibly as part of a UTF-8 check.)
+char* mpack_read_bytes_alloc_impl(mpack_reader_t* reader, size_t count, bool null_terminated);
+/** @endcond */
+
+/**
+ * Reads bytes from a string, binary blob or extension object, allocating
+ * storage for them and returning the allocated pointer.
+ *
+ * The allocated string must be freed with MPACK_FREE() (or simply free()
+ * if MPack's allocator hasn't been customized.)
+ *
+ * Returns NULL if any error occurs, or if count is zero.
+ */
+MPACK_INLINE char* mpack_read_bytes_alloc(mpack_reader_t* reader, size_t count) {
+    return mpack_read_bytes_alloc_impl(reader, count, false);
+}
+#endif
+
+/**
+ * Reads bytes from a string, binary blob or extension object in-place in
+ * the buffer. This can be used to avoid copying the data.
+ *
+ * A str, bin or ext must have been opened by a call to mpack_read_tag()
+ * which yielded one of these types, or by a call to an expect function
+ * such as mpack_expect_str() or mpack_expect_bin().
+ *
+ * If the bytes are from a string, the string is not null-terminated! Use
+ * mpack_read_cstr() to copy the string into a buffer and add a null-terminator.
+ *
+ * The returned pointer is invalidated on the next read, or when the buffer
+ * is destroyed.
+ *
+ * The reader will move data around in the buffer if needed to ensure that
+ * the pointer can always be returned, so this should only be used if
+ * count is very small compared to the buffer size. If you need to check
+ * whether a small size is reasonable (for example you intend to handle small and
+ * large sizes differently), you can call mpack_should_read_bytes_inplace().
+ *
+ * This can be called multiple times for a single str, bin or ext
+ * to read the data in chunks. The total data read must add up
+ * to the size of the object.
+ *
+ * NULL is returned if the reader is in an error state.
+ *
+ * @throws mpack_error_too_big if the requested size is larger than the buffer size
+ *
+ * @see mpack_should_read_bytes_inplace()
+ */
+const char* mpack_read_bytes_inplace(mpack_reader_t* reader, size_t count);
+
+/**
+ * Reads bytes from a string in-place in the buffer and ensures they are
+ * valid UTF-8. This can be used to avoid copying the data.
+ *
+ * A string must have been opened by a call to mpack_read_tag() which
+ * yielded a string, or by a call to an expect function such as
+ * mpack_expect_str().
+ *
+ * The string is not null-terminated! Use mpack_read_utf8_cstr() to
+ * copy the string into a buffer and add a null-terminator.
+ *
+ * The returned pointer is invalidated on the next read, or when the buffer
+ * is destroyed.
+ *
+ * The reader will move data around in the buffer if needed to ensure that
+ * the pointer can always be returned, so this should only be used if
+ * count is very small compared to the buffer size. If you need to check
+ * whether a small size is reasonable (for example you intend to handle small and
+ * large sizes differently), you can call mpack_should_read_bytes_inplace().
+ *
+ * This does not accept any UTF-8 variant such as Modified UTF-8, CESU-8 or
+ * WTF-8. Only pure UTF-8 is allowed.
+ *
+ * Unlike mpack_read_bytes_inplace(), this cannot be used to read the data in
+ * chunks (since this might split a character's UTF-8 bytes, and the
+ * reader does not keep track of the UTF-8 decoding state between reads.)
+ *
+ * NULL is returned if the reader is in an error state.
+ *
+ * @throws mpack_error_type if the string contains invalid UTF-8
+ * @throws mpack_error_too_big if the requested size is larger than the buffer size
+ *
+ * @see mpack_should_read_bytes_inplace()
+ */
+const char* mpack_read_utf8_inplace(mpack_reader_t* reader, size_t count);
+
+/**
+ * Returns true if it's a good idea to read the given number of bytes
+ * in-place.
+ *
+ * If the read will be larger than some small fraction of the buffer size,
+ * this will return false to avoid shuffling too much data back and forth
+ * in the buffer.
+ *
+ * Use this if you're expecting arbitrary size data, and you want to read
+ * in-place for the best performance when possible but will fall back to
+ * a normal read if the data is too large.
+ *
+ * @see mpack_read_bytes_inplace()
+ */
+MPACK_INLINE bool mpack_should_read_bytes_inplace(mpack_reader_t* reader, size_t count) {
+    return (reader->size == 0 || count <= reader->size / MPACK_READER_SMALL_FRACTION_DENOMINATOR);
+}
+
+#if MPACK_EXTENSIONS
+/**
+ * Reads a timestamp contained in an ext object of the given size, closing the
+ * ext type.
+ *
+ * An ext object of exttype @ref MPACK_EXTTYPE_TIMESTAMP must have been opened
+ * by a call to e.g. mpack_read_tag() or mpack_expect_ext().
+ *
+ * You must NOT call mpack_done_ext() after calling this. A timestamp ext
+ * object can only contain a single timestamp value, so this calls
+ * mpack_done_ext() automatically.
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ *
+ * @throws mpack_error_invalid if the size is not one of the supported
+ * timestamp sizes, or if the nanoseconds are out of range.
+ */
+mpack_timestamp_t mpack_read_timestamp(mpack_reader_t* reader, size_t size);
+#endif
+
+/**
+ * @}
+ */
+
+/**
+ * @name Core Reader Functions
+ * @{
+ */
+
+#if MPACK_READ_TRACKING
+/**
+ * Finishes reading the given type.
+ *
+ * This will track reads to ensure that the correct number of elements
+ * or bytes are read.
+ */
+void mpack_done_type(mpack_reader_t* reader, mpack_type_t type);
+#else
+MPACK_INLINE void mpack_done_type(mpack_reader_t* reader, mpack_type_t type) {
+    MPACK_UNUSED(reader);
+    MPACK_UNUSED(type);
+}
+#endif
+
+/**
+ * Finishes reading an array.
+ *
+ * This will track reads to ensure that the correct number of elements are read.
+ */
+MPACK_INLINE void mpack_done_array(mpack_reader_t* reader) {
+    mpack_done_type(reader, mpack_type_array);
+}
+
+/**
+ * @fn mpack_done_map(mpack_reader_t* reader)
+ *
+ * Finishes reading a map.
+ *
+ * This will track reads to ensure that the correct number of elements are read.
+ */
+MPACK_INLINE void mpack_done_map(mpack_reader_t* reader) {
+    mpack_done_type(reader, mpack_type_map);
+}
+
+/**
+ * @fn mpack_done_str(mpack_reader_t* reader)
+ *
+ * Finishes reading a string.
+ *
+ * This will track reads to ensure that the correct number of bytes are read.
+ */
+MPACK_INLINE void mpack_done_str(mpack_reader_t* reader) {
+    mpack_done_type(reader, mpack_type_str);
+}
+
+/**
+ * @fn mpack_done_bin(mpack_reader_t* reader)
+ *
+ * Finishes reading a binary data blob.
+ *
+ * This will track reads to ensure that the correct number of bytes are read.
+ */
+MPACK_INLINE void mpack_done_bin(mpack_reader_t* reader) {
+    mpack_done_type(reader, mpack_type_bin);
+}
+
+#if MPACK_EXTENSIONS
+/**
+ * @fn mpack_done_ext(mpack_reader_t* reader)
+ *
+ * Finishes reading an extended type binary data blob.
+ *
+ * This will track reads to ensure that the correct number of bytes are read.
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ */
+MPACK_INLINE void mpack_done_ext(mpack_reader_t* reader) {
+    mpack_done_type(reader, mpack_type_ext);
+}
+#endif
+
+/**
+ * Reads and discards the next object. This will read and discard all
+ * contained data as well if it is a compound type.
+ */
+void mpack_discard(mpack_reader_t* reader);
+
+/**
+ * @}
+ */
+
+/** @cond */
+
+#if MPACK_DEBUG && MPACK_STDIO
+/**
+ * @name Debugging Functions
+ * @{
+ */
+/*
+ * Converts a blob of MessagePack to a pseudo-JSON string for debugging
+ * purposes, placing the result in the given buffer with a null-terminator.
+ *
+ * If the buffer does not have enough space, the result will be truncated (but
+ * it is guaranteed to be null-terminated.)
+ *
+ * This is only available in debug mode, and only if stdio is available (since
+ * it uses snprintf().) It's strictly for debugging purposes.
+ */
+void mpack_print_data_to_buffer(const char* data, size_t data_size, char* buffer, size_t buffer_size);
+
+/*
+ * Converts a node to pseudo-JSON for debugging purposes, calling the given
+ * callback as many times as is necessary to output the character data.
+ *
+ * No null-terminator or trailing newline will be written.
+ *
+ * This is only available in debug mode, and only if stdio is available (since
+ * it uses snprintf().) It's strictly for debugging purposes.
+ */
+void mpack_print_data_to_callback(const char* data, size_t size, mpack_print_callback_t callback, void* context);
+
+/*
+ * Converts a blob of MessagePack to pseudo-JSON for debugging purposes
+ * and pretty-prints it to the given file.
+ */
+void mpack_print_data_to_file(const char* data, size_t len, FILE* file);
+
+/*
+ * Converts a blob of MessagePack to pseudo-JSON for debugging purposes
+ * and pretty-prints it to stdout.
+ */
+MPACK_INLINE void mpack_print_data_to_stdout(const char* data, size_t len) {
+    mpack_print_data_to_file(data, len, stdout);
+}
+
+/*
+ * Converts the MessagePack contained in the given `FILE*` to pseudo-JSON for
+ * debugging purposes, calling the given callback as many times as is necessary
+ * to output the character data.
+ */
+void mpack_print_stdfile_to_callback(FILE* file, mpack_print_callback_t callback, void* context);
+
+/*
+ * Deprecated.
+ *
+ * \deprecated Renamed to mpack_print_data_to_stdout().
+ */
+MPACK_INLINE void mpack_print(const char* data, size_t len) {
+    mpack_print_data_to_stdout(data, len);
+}
+
+/**
+ * @}
+ */
+#endif
+
+/** @endcond */
+
+/**
+ * @}
+ */
+
+
+
+#if MPACK_INTERNAL
+
+bool mpack_reader_ensure_straddle(mpack_reader_t* reader, size_t count);
+
+/*
+ * Ensures there are at least @c count bytes left in the
+ * data, raising an error and returning false if more
+ * data cannot be made available.
+ */
+MPACK_INLINE bool mpack_reader_ensure(mpack_reader_t* reader, size_t count) {
+    mpack_assert(count != 0, "cannot ensure zero bytes!");
+    mpack_assert(reader->error == mpack_ok, "reader cannot be in an error state!");
+
+    if (count <= (size_t)(reader->end - reader->data))
+        return true;
+    return mpack_reader_ensure_straddle(reader, count);
+}
+
+void mpack_read_native_straddle(mpack_reader_t* reader, char* p, size_t count);
+
+// Reads count bytes into p, deferring to mpack_read_native_straddle() if more
+// bytes are needed than are available in the buffer.
+MPACK_INLINE void mpack_read_native(mpack_reader_t* reader, char* p, size_t count) {
+    mpack_assert(count == 0 || p != NULL, "data pointer for %i bytes is NULL", (int)count);
+
+    if (count > (size_t)(reader->end - reader->data)) {
+        mpack_read_native_straddle(reader, p, count);
+    } else {
+        mpack_memcpy(p, reader->data, count);
+        reader->data += count;
+    }
+}
+
+#if MPACK_READ_TRACKING
+#define MPACK_READER_TRACK(reader, error_expr) \
+    (((reader)->error == mpack_ok) ? mpack_reader_flag_if_error((reader), (error_expr)) : (reader)->error)
+#else
+#define MPACK_READER_TRACK(reader, error_expr) (MPACK_UNUSED(reader), mpack_ok)
+#endif
+
+MPACK_INLINE mpack_error_t mpack_reader_track_element(mpack_reader_t* reader) {
+    return MPACK_READER_TRACK(reader, mpack_track_element(&reader->track, true));
+}
+
+MPACK_INLINE mpack_error_t mpack_reader_track_peek_element(mpack_reader_t* reader) {
+    return MPACK_READER_TRACK(reader, mpack_track_peek_element(&reader->track, true));
+}
+
+MPACK_INLINE mpack_error_t mpack_reader_track_bytes(mpack_reader_t* reader, size_t count) {
+    MPACK_UNUSED(count);
+    return MPACK_READER_TRACK(reader, mpack_track_bytes(&reader->track, true, count));
+}
+
+MPACK_INLINE mpack_error_t mpack_reader_track_str_bytes_all(mpack_reader_t* reader, size_t count) {
+    MPACK_UNUSED(count);
+    return MPACK_READER_TRACK(reader, mpack_track_str_bytes_all(&reader->track, true, count));
+}
+
+#endif
+
+
+
+#endif
+
+MPACK_EXTERN_C_END
+MPACK_SILENCE_WARNINGS_END
+
+#endif
+
+
+/* mpack/mpack-expect.h.h */
+
+/**
+ * @file
+ *
+ * Declares the MPack static Expect API.
+ */
+
+#ifndef MPACK_EXPECT_H
+#define MPACK_EXPECT_H 1
+
+/* #include "mpack-reader.h" */
+
+MPACK_SILENCE_WARNINGS_BEGIN
+MPACK_EXTERN_C_BEGIN
+
+#if MPACK_EXPECT
+
+#if !MPACK_READER
+#error "MPACK_EXPECT requires MPACK_READER."
+#endif
+
+/**
+ * @defgroup expect Expect API
+ *
+ * The MPack Expect API allows you to easily read MessagePack data when you
+ * expect it to follow a predefined schema.
+ *
+ * @note If you are not writing code for an embedded device (or otherwise do
+ * not need maximum performance with minimal memory usage), you should not use
+ * this. You probably want to use the @link node Node API@endlink instead.
+ *
+ * See @ref docs/expect.md for examples.
+ *
+ * The main purpose of the Expect API is convenience, so the API is lax. It
+ * automatically converts between similar types where there is no loss of
+ * precision.
+ *
+ * When using any of the expect functions, if the type or value of what was
+ * read does not match what is expected, @ref mpack_error_type is raised.
+ *
+ * @{
+ */
+
+/**
+ * @name Basic Number Functions
+ * @{
+ */
+
+/**
+ * Reads an 8-bit unsigned integer.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in an 8-bit unsigned int.
+ *
+ * Returns zero if an error occurs.
+ */
+uint8_t mpack_expect_u8(mpack_reader_t* reader);
+
+/**
+ * Reads a 16-bit unsigned integer.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in a 16-bit unsigned int.
+ *
+ * Returns zero if an error occurs.
+ */
+uint16_t mpack_expect_u16(mpack_reader_t* reader);
+
+/**
+ * Reads a 32-bit unsigned integer.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in a 32-bit unsigned int.
+ *
+ * Returns zero if an error occurs.
+ */
+uint32_t mpack_expect_u32(mpack_reader_t* reader);
+
+/**
+ * Reads a 64-bit unsigned integer.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in a 64-bit unsigned int.
+ *
+ * Returns zero if an error occurs.
+ */
+uint64_t mpack_expect_u64(mpack_reader_t* reader);
+
+/**
+ * Reads an 8-bit signed integer.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in an 8-bit signed int.
+ *
+ * Returns zero if an error occurs.
+ */
+int8_t mpack_expect_i8(mpack_reader_t* reader);
+
+/**
+ * Reads a 16-bit signed integer.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in a 16-bit signed int.
+ *
+ * Returns zero if an error occurs.
+ */
+int16_t mpack_expect_i16(mpack_reader_t* reader);
+
+/**
+ * Reads a 32-bit signed integer.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in a 32-bit signed int.
+ *
+ * Returns zero if an error occurs.
+ */
+int32_t mpack_expect_i32(mpack_reader_t* reader);
+
+/**
+ * Reads a 64-bit signed integer.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in a 64-bit signed int.
+ *
+ * Returns zero if an error occurs.
+ */
+int64_t mpack_expect_i64(mpack_reader_t* reader);
+
+#if MPACK_FLOAT
+/**
+ * Reads a number, returning the value as a float. The underlying value can be an
+ * integer, float or double; the value is converted to a float.
+ *
+ * @note Reading a double or a large integer with this function can incur a
+ * loss of precision.
+ *
+ * @throws mpack_error_type if the underlying value is not a float, double or integer.
+ */
+float mpack_expect_float(mpack_reader_t* reader);
+#endif
+
+#if MPACK_DOUBLE
+/**
+ * Reads a number, returning the value as a double. The underlying value can be an
+ * integer, float or double; the value is converted to a double.
+ *
+ * @note Reading a very large integer with this function can incur a
+ * loss of precision.
+ *
+ * @throws mpack_error_type if the underlying value is not a float, double or integer.
+ */
+double mpack_expect_double(mpack_reader_t* reader);
+#endif
+
+#if MPACK_FLOAT
+/**
+ * Reads a float. The underlying value must be a float, not a double or an integer.
+ * This ensures no loss of precision can occur.
+ *
+ * @throws mpack_error_type if the underlying value is not a float.
+ */
+float mpack_expect_float_strict(mpack_reader_t* reader);
+#endif
+
+#if MPACK_DOUBLE
+/**
+ * Reads a double. The underlying value must be a float or double, not an integer.
+ * This ensures no loss of precision can occur.
+ *
+ * @throws mpack_error_type if the underlying value is not a float or double.
+ */
+double mpack_expect_double_strict(mpack_reader_t* reader);
+#endif
+
+#if !MPACK_FLOAT
+/**
+ * Reads a float as a raw uint32_t. The underlying value must be a float, not a
+ * double or an integer.
+ *
+ * @throws mpack_error_type if the underlying value is not a float.
+ */
+uint32_t mpack_expect_raw_float(mpack_reader_t* reader);
+#endif
+
+#if !MPACK_DOUBLE
+/**
+ * Reads a double as a raw uint64_t. The underlying value must be a double, not a
+ * float or an integer.
+ *
+ * @throws mpack_error_type if the underlying value is not a double.
+ */
+uint64_t mpack_expect_raw_double(mpack_reader_t* reader);
+#endif
+
+/**
+ * @}
+ */
+
+/**
+ * @name Ranged Number Functions
+ * @{
+ */
+
+/**
+ * Reads an 8-bit unsigned integer, ensuring that it falls within the given range.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in an 8-bit unsigned int.
+ *
+ * Returns min_value if an error occurs.
+ */
+uint8_t mpack_expect_u8_range(mpack_reader_t* reader, uint8_t min_value, uint8_t max_value);
+
+/**
+ * Reads a 16-bit unsigned integer, ensuring that it falls within the given range.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in a 16-bit unsigned int.
+ *
+ * Returns min_value if an error occurs.
+ */
+uint16_t mpack_expect_u16_range(mpack_reader_t* reader, uint16_t min_value, uint16_t max_value);
+
+/**
+ * Reads a 32-bit unsigned integer, ensuring that it falls within the given range.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in a 32-bit unsigned int.
+ *
+ * Returns min_value if an error occurs.
+ */
+uint32_t mpack_expect_u32_range(mpack_reader_t* reader, uint32_t min_value, uint32_t max_value);
+
+/**
+ * Reads a 64-bit unsigned integer, ensuring that it falls within the given range.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in a 64-bit unsigned int.
+ *
+ * Returns min_value if an error occurs.
+ */
+uint64_t mpack_expect_u64_range(mpack_reader_t* reader, uint64_t min_value, uint64_t max_value);
+
+/**
+ * Reads an unsigned integer, ensuring that it falls within the given range.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in an unsigned int.
+ *
+ * Returns min_value if an error occurs.
+ */
+MPACK_INLINE unsigned int mpack_expect_uint_range(mpack_reader_t* reader, unsigned int min_value, unsigned int max_value) {
+    // This should be true at compile-time, so this just wraps the 32-bit
+    // function. We fallback to 64-bit if for some reason sizeof(int) isn't 4.
+    if (sizeof(unsigned int) == 4)
+        return (unsigned int)mpack_expect_u32_range(reader, (uint32_t)min_value, (uint32_t)max_value);
+    return (unsigned int)mpack_expect_u64_range(reader, min_value, max_value);
+}
+
+/**
+ * Reads an 8-bit unsigned integer, ensuring that it is at most @a max_value.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in an 8-bit unsigned int.
+ *
+ * Returns 0 if an error occurs.
+ */
+MPACK_INLINE uint8_t mpack_expect_u8_max(mpack_reader_t* reader, uint8_t max_value) {
+    return mpack_expect_u8_range(reader, 0, max_value);
+}
+
+/**
+ * Reads a 16-bit unsigned integer, ensuring that it is at most @a max_value.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in a 16-bit unsigned int.
+ *
+ * Returns 0 if an error occurs.
+ */
+MPACK_INLINE uint16_t mpack_expect_u16_max(mpack_reader_t* reader, uint16_t max_value) {
+    return mpack_expect_u16_range(reader, 0, max_value);
+}
+
+/**
+ * Reads a 32-bit unsigned integer, ensuring that it is at most @a max_value.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in a 32-bit unsigned int.
+ *
+ * Returns 0 if an error occurs.
+ */
+MPACK_INLINE uint32_t mpack_expect_u32_max(mpack_reader_t* reader, uint32_t max_value) {
+    return mpack_expect_u32_range(reader, 0, max_value);
+}
+
+/**
+ * Reads a 64-bit unsigned integer, ensuring that it is at most @a max_value.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in a 64-bit unsigned int.
+ *
+ * Returns 0 if an error occurs.
+ */
+MPACK_INLINE uint64_t mpack_expect_u64_max(mpack_reader_t* reader, uint64_t max_value) {
+    return mpack_expect_u64_range(reader, 0, max_value);
+}
+
+/**
+ * Reads an unsigned integer, ensuring that it is at most @a max_value.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in an unsigned int.
+ *
+ * Returns 0 if an error occurs.
+ */
+MPACK_INLINE unsigned int mpack_expect_uint_max(mpack_reader_t* reader, unsigned int max_value) {
+    return mpack_expect_uint_range(reader, 0, max_value);
+}
+
+/**
+ * Reads an 8-bit signed integer, ensuring that it falls within the given range.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in an 8-bit signed int.
+ *
+ * Returns min_value if an error occurs.
+ */
+int8_t mpack_expect_i8_range(mpack_reader_t* reader, int8_t min_value, int8_t max_value);
+
+/**
+ * Reads a 16-bit signed integer, ensuring that it falls within the given range.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in a 16-bit signed int.
+ *
+ * Returns min_value if an error occurs.
+ */
+int16_t mpack_expect_i16_range(mpack_reader_t* reader, int16_t min_value, int16_t max_value);
+
+/**
+ * Reads a 32-bit signed integer, ensuring that it falls within the given range.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in a 32-bit signed int.
+ *
+ * Returns min_value if an error occurs.
+ */
+int32_t mpack_expect_i32_range(mpack_reader_t* reader, int32_t min_value, int32_t max_value);
+
+/**
+ * Reads a 64-bit signed integer, ensuring that it falls within the given range.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in a 64-bit signed int.
+ *
+ * Returns min_value if an error occurs.
+ */
+int64_t mpack_expect_i64_range(mpack_reader_t* reader, int64_t min_value, int64_t max_value);
+
+/**
+ * Reads a signed integer, ensuring that it falls within the given range.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in a signed int.
+ *
+ * Returns min_value if an error occurs.
+ */
+MPACK_INLINE int mpack_expect_int_range(mpack_reader_t* reader, int min_value, int max_value) {
+    // This should be true at compile-time, so this just wraps the 32-bit
+    // function. We fallback to 64-bit if for some reason sizeof(int) isn't 4.
+    if (sizeof(int) == 4)
+        return (int)mpack_expect_i32_range(reader, (int32_t)min_value, (int32_t)max_value);
+    return (int)mpack_expect_i64_range(reader, min_value, max_value);
+}
+
+/**
+ * Reads an 8-bit signed integer, ensuring that it is at least zero and at
+ * most @a max_value.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in an 8-bit signed int.
+ *
+ * Returns 0 if an error occurs.
+ */
+MPACK_INLINE int8_t mpack_expect_i8_max(mpack_reader_t* reader, int8_t max_value) {
+    return mpack_expect_i8_range(reader, 0, max_value);
+}
+
+/**
+ * Reads a 16-bit signed integer, ensuring that it is at least zero and at
+ * most @a max_value.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in a 16-bit signed int.
+ *
+ * Returns 0 if an error occurs.
+ */
+MPACK_INLINE int16_t mpack_expect_i16_max(mpack_reader_t* reader, int16_t max_value) {
+    return mpack_expect_i16_range(reader, 0, max_value);
+}
+
+/**
+ * Reads a 32-bit signed integer, ensuring that it is at least zero and at
+ * most @a max_value.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in a 32-bit signed int.
+ *
+ * Returns 0 if an error occurs.
+ */
+MPACK_INLINE int32_t mpack_expect_i32_max(mpack_reader_t* reader, int32_t max_value) {
+    return mpack_expect_i32_range(reader, 0, max_value);
+}
+
+/**
+ * Reads a 64-bit signed integer, ensuring that it is at least zero and at
+ * most @a max_value.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in a 64-bit signed int.
+ *
+ * Returns 0 if an error occurs.
+ */
+MPACK_INLINE int64_t mpack_expect_i64_max(mpack_reader_t* reader, int64_t max_value) {
+    return mpack_expect_i64_range(reader, 0, max_value);
+}
+
+/**
+ * Reads an int, ensuring that it is at least zero and at most @a max_value.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in a signed int.
+ *
+ * Returns 0 if an error occurs.
+ */
+MPACK_INLINE int mpack_expect_int_max(mpack_reader_t* reader, int max_value) {
+    return mpack_expect_int_range(reader, 0, max_value);
+}
+
+#if MPACK_FLOAT
+/**
+ * Reads a number, ensuring that it falls within the given range and returning
+ * the value as a float. The underlying value can be an integer, float or
+ * double; the value is converted to a float.
+ *
+ * @note Reading a double or a large integer with this function can incur a
+ * loss of precision.
+ *
+ * @throws mpack_error_type if the underlying value is not a float, double or integer.
+ */
+float mpack_expect_float_range(mpack_reader_t* reader, float min_value, float max_value);
+#endif
+
+#if MPACK_DOUBLE
+/**
+ * Reads a number, ensuring that it falls within the given range and returning
+ * the value as a double. The underlying value can be an integer, float or
+ * double; the value is converted to a double.
+ *
+ * @note Reading a very large integer with this function can incur a
+ * loss of precision.
+ *
+ * @throws mpack_error_type if the underlying value is not a float, double or integer.
+ */
+double mpack_expect_double_range(mpack_reader_t* reader, double min_value, double max_value);
+#endif
+
+/**
+ * @}
+ */
+
+
+
+// These are additional Basic Number functions that wrap inline range functions.
+
+/**
+ * @name Basic Number Functions
+ * @{
+ */
+
+/**
+ * Reads an unsigned int.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in an unsigned int.
+ *
+ * Returns zero if an error occurs.
+ */
+MPACK_INLINE unsigned int mpack_expect_uint(mpack_reader_t* reader) {
+
+    // This should be true at compile-time, so this just wraps the 32-bit function.
+    if (sizeof(unsigned int) == 4)
+        return (unsigned int)mpack_expect_u32(reader);
+
+    // Otherwise we wrap the max function to ensure it fits.
+    return (unsigned int)mpack_expect_u64_max(reader, MPACK_UINT_MAX);
+
+}
+
+/**
+ * Reads a signed int.
+ *
+ * The underlying type may be an integer type of any size and signedness,
+ * as long as the value can be represented in a signed int.
+ *
+ * Returns zero if an error occurs.
+ */
+MPACK_INLINE int mpack_expect_int(mpack_reader_t* reader) {
+
+    // This should be true at compile-time, so this just wraps the 32-bit function.
+    if (sizeof(int) == 4)
+        return (int)mpack_expect_i32(reader);
+
+    // Otherwise we wrap the range function to ensure it fits.
+    return (int)mpack_expect_i64_range(reader, MPACK_INT_MIN, MPACK_INT_MAX);
+
+}
+
+/**
+ * @}
+ */
+
+
+
+/**
+ * @name Matching Number Functions
+ * @{
+ */
+
+/**
+ * Reads an unsigned integer, ensuring that it exactly matches the given value.
+ *
+ * mpack_error_type is raised if the value is not representable as an unsigned
+ * integer or if it does not exactly match the given value.
+ */
+void mpack_expect_uint_match(mpack_reader_t* reader, uint64_t value);
+
+/**
+ * Reads a signed integer, ensuring that it exactly matches the given value.
+ *
+ * mpack_error_type is raised if the value is not representable as a signed
+ * integer or if it does not exactly match the given value.
+ */
+void mpack_expect_int_match(mpack_reader_t* reader, int64_t value);
+
+/**
+ * @}
+ */
+
+/**
+ * @name Other Basic Types
+ * @{
+ */
+
+/**
+ * Reads a nil, raising @ref mpack_error_type if the value is not nil.
+ */
+void mpack_expect_nil(mpack_reader_t* reader);
+
+/**
+ * Reads a boolean.
+ *
+ * @note Integers will raise mpack_error_type; the value must be strictly a boolean.
+ */
+bool mpack_expect_bool(mpack_reader_t* reader);
+
+/**
+ * Reads a boolean, raising @ref mpack_error_type if its value is not @c true.
+ */
+void mpack_expect_true(mpack_reader_t* reader);
+
+/**
+ * Reads a boolean, raising @ref mpack_error_type if its value is not @c false.
+ */
+void mpack_expect_false(mpack_reader_t* reader);
+
+/**
+ * @}
+ */
+
+/**
+ * @name Extension Functions
+ * @{
+ */
+
+#if MPACK_EXTENSIONS
+/**
+ * Reads a timestamp.
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ */
+mpack_timestamp_t mpack_expect_timestamp(mpack_reader_t* reader);
+
+/**
+ * Reads a timestamp in seconds, truncating the nanoseconds (if any).
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ */
+int64_t mpack_expect_timestamp_truncate(mpack_reader_t* reader);
+#endif
+
+/**
+ * @}
+ */
+
+/**
+ * @name Compound Types
+ * @{
+ */
+
+/**
+ * Reads the start of a map, returning its element count.
+ *
+ * A number of values follow equal to twice the element count of the map,
+ * alternating between keys and values. @ref mpack_done_map() must be called
+ * once all elements have been read.
+ *
+ * @note Maps in JSON are unordered, so it is recommended not to expect
+ * a specific ordering for your map values in case your data is converted
+ * to/from JSON.
+ *
+ * @warning This call is dangerous! It does not have a size limit, and it
+ * does not have any way of checking whether there is enough data in the
+ * message (since the data could be coming from a stream.) When looping
+ * through the map's contents, you must check for errors on each iteration
+ * of the loop. Otherwise an attacker could craft a message declaring a map
+ * of a billion elements which would throw your parsing code into an
+ * infinite loop! You should strongly consider using mpack_expect_map_max()
+ * with a safe maximum size instead.
+ *
+ * @throws mpack_error_type if the value is not a map.
+ */
+uint32_t mpack_expect_map(mpack_reader_t* reader);
+
+/**
+ * Reads the start of a map with a number of elements in the given range, returning
+ * its element count.
+ *
+ * A number of values follow equal to twice the element count of the map,
+ * alternating between keys and values. @ref mpack_done_map() must be called
+ * once all elements have been read.
+ *
+ * @note Maps in JSON are unordered, so it is recommended not to expect
+ * a specific ordering for your map values in case your data is converted
+ * to/from JSON.
+ *
+ * min_count is returned if an error occurs.
+ *
+ * @throws mpack_error_type if the value is not a map or if its size does
+ * not fall within the given range.
+ */
+uint32_t mpack_expect_map_range(mpack_reader_t* reader, uint32_t min_count, uint32_t max_count);
+
+/**
+ * Reads the start of a map with a number of elements at most @a max_count,
+ * returning its element count.
+ *
+ * A number of values follow equal to twice the element count of the map,
+ * alternating between keys and values. @ref mpack_done_map() must be called
+ * once all elements have been read.
+ *
+ * @note Maps in JSON are unordered, so it is recommended not to expect
+ * a specific ordering for your map values in case your data is converted
+ * to/from JSON.
+ *
+ * Zero is returned if an error occurs.
+ *
+ * @throws mpack_error_type if the value is not a map or if its size is
+ * greater than max_count.
+ */
+MPACK_INLINE uint32_t mpack_expect_map_max(mpack_reader_t* reader, uint32_t max_count) {
+    return mpack_expect_map_range(reader, 0, max_count);
+}
+
+/**
+ * Reads the start of a map of the exact size given.
+ *
+ * A number of values follow equal to twice the element count of the map,
+ * alternating between keys and values. @ref mpack_done_map() must be called
+ * once all elements have been read.
+ *
+ * @note Maps in JSON are unordered, so it is recommended not to expect
+ * a specific ordering for your map values in case your data is converted
+ * to/from JSON.
+ *
+ * @throws mpack_error_type if the value is not a map or if its size
+ * does not match the given count.
+ */
+void mpack_expect_map_match(mpack_reader_t* reader, uint32_t count);
+
+/**
+ * Reads a nil node or the start of a map, returning whether a map was
+ * read and placing its number of key/value pairs in count.
+ *
+ * If a map was read, a number of values follow equal to twice the element count
+ * of the map, alternating between keys and values. @ref mpack_done_map() should
+ * also be called once all elements have been read (only if a map was read.)
+ *
+ * @note Maps in JSON are unordered, so it is recommended not to expect
+ * a specific ordering for your map values in case your data is converted
+ * to/from JSON.
+ *
+ * @warning This call is dangerous! It does not have a size limit, and it
+ * does not have any way of checking whether there is enough data in the
+ * message (since the data could be coming from a stream.) When looping
+ * through the map's contents, you must check for errors on each iteration
+ * of the loop. Otherwise an attacker could craft a message declaring a map
+ * of a billion elements which would throw your parsing code into an
+ * infinite loop! You should strongly consider using mpack_expect_map_max_or_nil()
+ * with a safe maximum size instead.
+ *
+ * @returns @c true if a map was read successfully; @c false if nil was read
+ *     or an error occurred.
+ * @throws mpack_error_type if the value is not a nil or map.
+ */
+bool mpack_expect_map_or_nil(mpack_reader_t* reader, uint32_t* count);
+
+/**
+ * Reads a nil node or the start of a map with a number of elements at most
+ * max_count, returning whether a map was read and placing its number of
+ * key/value pairs in count.
+ *
+ * If a map was read, a number of values follow equal to twice the element count
+ * of the map, alternating between keys and values. @ref mpack_done_map() should
+ * anlso be called once all elements have been read (only if a map was read.)
+ *
+ * @note Maps in JSON are unordered, so it is recommended not to expect
+ * a specific ordering for your map values in case your data is converted
+ * to/from JSON. Consider using mpack_expect_key_cstr() or mpack_expect_key_uint()
+ * to switch on the key; see @ref docs/expect.md for examples.
+ *
+ * @returns @c true if a map was read successfully; @c false if nil was read
+ *     or an error occurred.
+ * @throws mpack_error_type if the value is not a nil or map.
+ */
+bool mpack_expect_map_max_or_nil(mpack_reader_t* reader, uint32_t max_count, uint32_t* count);
+
+/**
+ * Reads the start of an array, returning its element count.
+ *
+ * A number of values follow equal to the element count of the array.
+ * @ref mpack_done_array() must be called once all elements have been read.
+ *
+ * @warning This call is dangerous! It does not have a size limit, and it
+ * does not have any way of checking whether there is enough data in the
+ * message (since the data could be coming from a stream.) When looping
+ * through the array's contents, you must check for errors on each iteration
+ * of the loop. Otherwise an attacker could craft a message declaring an array
+ * of a billion elements which would throw your parsing code into an
+ * infinite loop! You should strongly consider using mpack_expect_array_max()
+ * with a safe maximum size instead.
+ */
+uint32_t mpack_expect_array(mpack_reader_t* reader);
+
+/**
+ * Reads the start of an array with a number of elements in the given range,
+ * returning its element count.
+ *
+ * A number of values follow equal to the element count of the array.
+ * @ref mpack_done_array() must be called once all elements have been read.
+ *
+ * min_count is returned if an error occurs.
+ *
+ * @throws mpack_error_type if the value is not an array or if its size does
+ * not fall within the given range.
+ */
+uint32_t mpack_expect_array_range(mpack_reader_t* reader, uint32_t min_count, uint32_t max_count);
+
+/**
+ * Reads the start of an array with a number of elements at most @a max_count,
+ * returning its element count.
+ *
+ * A number of values follow equal to the element count of the array.
+ * @ref mpack_done_array() must be called once all elements have been read.
+ *
+ * Zero is returned if an error occurs.
+ *
+ * @throws mpack_error_type if the value is not an array or if its size is
+ * greater than max_count.
+ */
+MPACK_INLINE uint32_t mpack_expect_array_max(mpack_reader_t* reader, uint32_t max_count) {
+    return mpack_expect_array_range(reader, 0, max_count);
+}
+
+/**
+ * Reads the start of an array of the exact size given.
+ *
+ * A number of values follow equal to the element count of the array.
+ * @ref mpack_done_array() must be called once all elements have been read.
+ *
+ * @throws mpack_error_type if the value is not an array or if its size does
+ * not match the given count.
+ */
+void mpack_expect_array_match(mpack_reader_t* reader, uint32_t count);
+
+/**
+ * Reads a nil node or the start of an array, returning whether an array was
+ * read and placing its number of elements in count.
+ *
+ * If an array was read, a number of values follow equal to the element count
+ * of the array. @ref mpack_done_array() should also be called once all elements
+ * have been read (only if an array was read.)
+ *
+ * @warning This call is dangerous! It does not have a size limit, and it
+ * does not have any way of checking whether there is enough data in the
+ * message (since the data could be coming from a stream.) When looping
+ * through the array's contents, you must check for errors on each iteration
+ * of the loop. Otherwise an attacker could craft a message declaring an array
+ * of a billion elements which would throw your parsing code into an
+ * infinite loop! You should strongly consider using mpack_expect_array_max_or_nil()
+ * with a safe maximum size instead.
+ *
+ * @returns @c true if an array was read successfully; @c false if nil was read
+ *     or an error occurred.
+ * @throws mpack_error_type if the value is not a nil or array.
+ */
+bool mpack_expect_array_or_nil(mpack_reader_t* reader, uint32_t* count);
+
+/**
+ * Reads a nil node or the start of an array with a number of elements at most
+ * max_count, returning whether an array was read and placing its number of
+ * key/value pairs in count.
+ *
+ * If an array was read, a number of values follow equal to the element count
+ * of the array. @ref mpack_done_array() should also be called once all elements
+ * have been read (only if an array was read.)
+ *
+ * @returns @c true if an array was read successfully; @c false if nil was read
+ *     or an error occurred.
+ * @throws mpack_error_type if the value is not a nil or array.
+ */
+bool mpack_expect_array_max_or_nil(mpack_reader_t* reader, uint32_t max_count, uint32_t* count);
+
+#ifdef MPACK_MALLOC
+/**
+ * @hideinitializer
+ *
+ * Reads the start of an array and allocates storage for it, placing its
+ * size in out_count. A number of objects follow equal to the element count
+ * of the array. You must call @ref mpack_done_array() when done (even
+ * if the element count is zero.)
+ *
+ * If an error occurs, NULL is returned and the reader is placed in an
+ * error state.
+ *
+ * If the count is zero, NULL is returned. This does not indicate error.
+ * You should not check the return value for NULL to check for errors; only
+ * check the reader's error state.
+ *
+ * The allocated array must be freed with MPACK_FREE() (or simply free()
+ * if MPack's allocator hasn't been customized.)
+ *
+ * @throws mpack_error_type if the value is not an array or if its size is
+ * greater than max_count.
+ */
+#define mpack_expect_array_alloc(reader, Type, max_count, out_count) \
+    ((Type*)mpack_expect_array_alloc_impl(reader, sizeof(Type), max_count, out_count, false))
+
+/**
+ * @hideinitializer
+ *
+ * Reads a nil node or the start of an array and allocates storage for it,
+ * placing its size in out_count. A number of objects follow equal to the element
+ * count of the array if a non-empty array was read.
+ *
+ * If an error occurs, NULL is returned and the reader is placed in an
+ * error state.
+ *
+ * If a nil node was read, NULL is returned. If an empty array was read,
+ * mpack_done_array() is called automatically and NULL is returned. These
+ * do not indicate error. You should not check the return value for NULL
+ * to check for errors; only check the reader's error state.
+ *
+ * The allocated array must be freed with MPACK_FREE() (or simply free()
+ * if MPack's allocator hasn't been customized.)
+ *
+ * @warning You must call @ref mpack_done_array() if and only if a non-zero
+ * element count is read. This function does not differentiate between nil
+ * and an empty array.
+ *
+ * @throws mpack_error_type if the value is not an array or if its size is
+ * greater than max_count.
+ */
+#define mpack_expect_array_or_nil_alloc(reader, Type, max_count, out_count) \
+    ((Type*)mpack_expect_array_alloc_impl(reader, sizeof(Type), max_count, out_count, true))
+#endif
+
+/**
+ * @}
+ */
+
+/** @cond */
+#ifdef MPACK_MALLOC
+void* mpack_expect_array_alloc_impl(mpack_reader_t* reader,
+        size_t element_size, uint32_t max_count, uint32_t* out_count, bool allow_nil);
+#endif
+/** @endcond */
+
+
+/**
+ * @name String Functions
+ * @{
+ */
+
+/**
+ * Reads the start of a string, returning its size in bytes.
+ *
+ * The bytes follow and must be read separately with mpack_read_bytes()
+ * or mpack_read_bytes_inplace(). mpack_done_str() must be called
+ * once all bytes have been read.
+ *
+ * NUL bytes are allowed in the string, and no encoding checks are done.
+ *
+ * mpack_error_type is raised if the value is not a string.
+ */
+uint32_t mpack_expect_str(mpack_reader_t* reader);
+
+/**
+ * Reads a string of at most the given size, writing it into the
+ * given buffer and returning its size in bytes.
+ *
+ * This does not add a null-terminator! Use mpack_expect_cstr() to
+ * add a null-terminator.
+ *
+ * NUL bytes are allowed in the string, and no encoding checks are done.
+ */
+size_t mpack_expect_str_buf(mpack_reader_t* reader, char* buf, size_t bufsize);
+
+/**
+ * Reads a string into the given buffer, ensuring it is a valid UTF-8 string
+ * and returning its size in bytes.
+ *
+ * This does not add a null-terminator! Use mpack_expect_utf8_cstr() to
+ * add a null-terminator.
+ *
+ * This does not accept any UTF-8 variant such as Modified UTF-8, CESU-8 or
+ * WTF-8. Only pure UTF-8 is allowed.
+ *
+ * NUL bytes are allowed in the string (as they are in UTF-8.)
+ *
+ * Raises mpack_error_too_big if there is not enough room for the string.
+ * Raises mpack_error_type if the value is not a string or is not a valid UTF-8 string.
+ */
+size_t mpack_expect_utf8(mpack_reader_t* reader, char* buf, size_t bufsize);
+
+/**
+ * Reads the start of a string, raising an error if its length is not
+ * at most the given number of bytes (not including any null-terminator.)
+ *
+ * The bytes follow and must be read separately with mpack_read_bytes()
+ * or mpack_read_bytes_inplace(). @ref mpack_done_str() must be called
+ * once all bytes have been read.
+ *
+ * @throws mpack_error_type If the value is not a string.
+ * @throws mpack_error_too_big If the string's length in bytes is larger than the given maximum size.
+ */
+MPACK_INLINE uint32_t mpack_expect_str_max(mpack_reader_t* reader, uint32_t maxsize) {
+    uint32_t length = mpack_expect_str(reader);
+    if (length > maxsize) {
+        mpack_reader_flag_error(reader, mpack_error_too_big);
+        return 0;
+    }
+    return length;
+}
+
+/**
+ * Reads the start of a string, raising an error if its length is not
+ * exactly the given number of bytes (not including any null-terminator.)
+ *
+ * The bytes follow and must be read separately with mpack_read_bytes()
+ * or mpack_read_bytes_inplace(). @ref mpack_done_str() must be called
+ * once all bytes have been read.
+ *
+ * mpack_error_type is raised if the value is not a string or if its
+ * length does not match.
+ */
+MPACK_INLINE void mpack_expect_str_length(mpack_reader_t* reader, uint32_t count) {
+    if (mpack_expect_str(reader) != count)
+        mpack_reader_flag_error(reader, mpack_error_type);
+}
+
+/**
+ * Reads a string, ensuring it exactly matches the given string.
+ *
+ * Remember that maps are unordered in JSON. Don't use this for map keys
+ * unless the map has only a single key!
+ */
+void mpack_expect_str_match(mpack_reader_t* reader, const char* str, size_t length);
+
+/**
+ * Reads a string into the given buffer, ensures it has no null bytes,
+ * and adds a null-terminator at the end.
+ *
+ * Raises mpack_error_too_big if there is not enough room for the string and null-terminator.
+ * Raises mpack_error_type if the value is not a string or contains a null byte.
+ */
+void mpack_expect_cstr(mpack_reader_t* reader, char* buf, size_t size);
+
+/**
+ * Reads a string into the given buffer, ensures it is a valid UTF-8 string
+ * without NUL characters, and adds a null-terminator at the end.
+ *
+ * This does not accept any UTF-8 variant such as Modified UTF-8, CESU-8 or
+ * WTF-8. Only pure UTF-8 is allowed, but without the NUL character, since
+ * it cannot be represented in a null-terminated string.
+ *
+ * Raises mpack_error_too_big if there is not enough room for the string and null-terminator.
+ * Raises mpack_error_type if the value is not a string or is not a valid UTF-8 string.
+ */
+void mpack_expect_utf8_cstr(mpack_reader_t* reader, char* buf, size_t size);
+
+#ifdef MPACK_MALLOC
+/**
+ * Reads a string with the given total maximum size (including space for a
+ * null-terminator), allocates storage for it, ensures it has no null-bytes,
+ * and adds a null-terminator at the end. You assume ownership of the
+ * returned pointer if reading succeeds.
+ *
+ * The allocated string must be freed with MPACK_FREE() (or simply free()
+ * if MPack's allocator hasn't been customized.)
+ *
+ * @throws mpack_error_too_big If the string plus null-terminator is larger than the given maxsize.
+ * @throws mpack_error_type If the value is not a string or contains a null byte.
+ */
+char* mpack_expect_cstr_alloc(mpack_reader_t* reader, size_t maxsize);
+
+/**
+ * Reads a string with the given total maximum size (including space for a
+ * null-terminator), allocates storage for it, ensures it is valid UTF-8
+ * with no null-bytes, and adds a null-terminator at the end. You assume
+ * ownership of the returned pointer if reading succeeds.
+ *
+ * The length in bytes of the string, not including the null-terminator,
+ * will be written to size.
+ *
+ * This does not accept any UTF-8 variant such as Modified UTF-8, CESU-8 or
+ * WTF-8. Only pure UTF-8 is allowed, but without the NUL character, since
+ * it cannot be represented in a null-terminated string.
+ *
+ * The allocated string must be freed with MPACK_FREE() (or simply free()
+ * if MPack's allocator hasn't been customized.)
+ * if you want a null-terminator.
+ *
+ * @throws mpack_error_too_big If the string plus null-terminator is larger
+ *     than the given maxsize.
+ * @throws mpack_error_type If the value is not a string or contains
+ *     invalid UTF-8 or a null byte.
+ */
+char* mpack_expect_utf8_cstr_alloc(mpack_reader_t* reader, size_t maxsize);
+#endif
+
+/**
+ * Reads a string, ensuring it exactly matches the given null-terminated
+ * string.
+ *
+ * Remember that maps are unordered in JSON. Don't use this for map keys
+ * unless the map has only a single key!
+ */
+MPACK_INLINE void mpack_expect_cstr_match(mpack_reader_t* reader, const char* cstr) {
+    mpack_assert(cstr != NULL, "cstr pointer is NULL");
+    mpack_expect_str_match(reader, cstr, mpack_strlen(cstr));
+}
+
+/**
+ * @}
+ */
+
+/**
+ * @name Binary Data
+ * @{
+ */
+
+/**
+ * Reads the start of a binary blob, returning its size in bytes.
+ *
+ * The bytes follow and must be read separately with mpack_read_bytes()
+ * or mpack_read_bytes_inplace(). @ref mpack_done_bin() must be called
+ * once all bytes have been read.
+ *
+ * mpack_error_type is raised if the value is not a binary blob.
+ */
+uint32_t mpack_expect_bin(mpack_reader_t* reader);
+
+/**
+ * Reads the start of a binary blob, raising an error if its length is not
+ * at most the given number of bytes.
+ *
+ * The bytes follow and must be read separately with mpack_read_bytes()
+ * or mpack_read_bytes_inplace(). @ref mpack_done_bin() must be called
+ * once all bytes have been read.
+ *
+ * mpack_error_type is raised if the value is not a binary blob or if its
+ * length does not match.
+ */
+MPACK_INLINE uint32_t mpack_expect_bin_max(mpack_reader_t* reader, uint32_t maxsize) {
+    uint32_t length = mpack_expect_bin(reader);
+    if (length > maxsize) {
+        mpack_reader_flag_error(reader, mpack_error_type);
+        return 0;
+    }
+    return length;
+}
+
+/**
+ * Reads the start of a binary blob, raising an error if its length is not
+ * exactly the given number of bytes.
+ *
+ * The bytes follow and must be read separately with mpack_read_bytes()
+ * or mpack_read_bytes_inplace(). @ref mpack_done_bin() must be called
+ * once all bytes have been read.
+ *
+ * @throws mpack_error_type if the value is not a binary blob or if its size
+ * does not match.
+ */
+MPACK_INLINE void mpack_expect_bin_size(mpack_reader_t* reader, uint32_t count) {
+    if (mpack_expect_bin(reader) != count)
+        mpack_reader_flag_error(reader, mpack_error_type);
+}
+
+/**
+ * Reads a binary blob into the given buffer, returning its size in bytes.
+ *
+ * For compatibility, this will accept if the underlying type is string or
+ * binary (since in MessagePack 1.0, strings and binary data were combined
+ * under the "raw" type which became string in 1.1.)
+ */
+size_t mpack_expect_bin_buf(mpack_reader_t* reader, char* buf, size_t size);
+
+/**
+ * Reads a binary blob with the exact given size into the given buffer.
+ *
+ * For compatibility, this will accept if the underlying type is string or
+ * binary (since in MessagePack 1.0, strings and binary data were combined
+ * under the "raw" type which became string in 1.1.)
+ *
+ * @throws mpack_error_type if the value is not a binary blob or if its size
+ * does not match.
+ */
+void mpack_expect_bin_size_buf(mpack_reader_t* reader, char* buf, uint32_t size);
+
+/**
+ * Reads a binary blob with the given total maximum size, allocating storage for it.
+ */
+char* mpack_expect_bin_alloc(mpack_reader_t* reader, size_t maxsize, size_t* size);
+
+/**
+ * @}
+ */
+
+/**
+ * @name Extension Functions
+ * @{
+ */
+
+#if MPACK_EXTENSIONS
+/**
+ * Reads the start of an extension blob, returning its size in bytes and
+ * placing the type into @p type.
+ *
+ * The bytes follow and must be read separately with mpack_read_bytes()
+ * or mpack_read_bytes_inplace(). @ref mpack_done_ext() must be called
+ * once all bytes have been read.
+ *
+ * @p type will be a user-defined type in the range [0,127] or a reserved type
+ * in the range [-128,-2].
+ *
+ * mpack_error_type is raised if the value is not an extension blob. The @p
+ * type value is zero if an error occurs.
+ *
+ * @note This cannot be used to match a timestamp. @ref mpack_error_type will
+ * be flagged if the value is a timestamp. Use mpack_expect_timestamp() or
+ * mpack_expect_timestamp_truncate() instead.
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ *
+ * @warning Be careful when using reserved types. They may no longer be ext
+ * types in the future, and previously valid data containing reserved types may
+ * become invalid in the future.
+ */
+uint32_t mpack_expect_ext(mpack_reader_t* reader, int8_t* type);
+
+/**
+ * Reads the start of an extension blob, raising an error if its length is not
+ * at most the given number of bytes and placing the type into @p type.
+ *
+ * The bytes follow and must be read separately with mpack_read_bytes()
+ * or mpack_read_bytes_inplace(). @ref mpack_done_ext() must be called
+ * once all bytes have been read.
+ *
+ * mpack_error_type is raised if the value is not an extension blob or if its
+ * length does not match. The @p type value is zero if an error is raised.
+ *
+ * @p type will be a user-defined type in the range [0,127] or a reserved type
+ * in the range [-128,-2].
+ *
+ * @note This cannot be used to match a timestamp. @ref mpack_error_type will
+ * be flagged if the value is a timestamp. Use mpack_expect_timestamp() or
+ * mpack_expect_timestamp_truncate() instead.
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ *
+ * @warning Be careful when using reserved types. They may no longer be ext
+ * types in the future, and previously valid data containing reserved types may
+ * become invalid in the future.
+ *
+ * @see mpack_expect_ext()
+ */
+MPACK_INLINE uint32_t mpack_expect_ext_max(mpack_reader_t* reader, int8_t* type, uint32_t maxsize) {
+    uint32_t length = mpack_expect_ext(reader, type);
+    if (length > maxsize) {
+        mpack_reader_flag_error(reader, mpack_error_type);
+        return 0;
+    }
+    return length;
+}
+
+/**
+ * Reads the start of an extension blob, raising an error if its length is not
+ * exactly the given number of bytes and placing the type into @p type.
+ *
+ * The bytes follow and must be read separately with mpack_read_bytes()
+ * or mpack_read_bytes_inplace(). @ref mpack_done_ext() must be called
+ * once all bytes have been read.
+ *
+ * mpack_error_type is raised if the value is not an extension blob or if its
+ * length does not match. The @p type value is zero if an error is raised.
+ *
+ * @p type will be a user-defined type in the range [0,127] or a reserved type
+ * in the range [-128,-2].
+ *
+ * @note This cannot be used to match a timestamp. @ref mpack_error_type will
+ * be flagged if the value is a timestamp. Use mpack_expect_timestamp() or
+ * mpack_expect_timestamp_truncate() instead.
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ *
+ * @warning Be careful when using reserved types. They may no longer be ext
+ * types in the future, and previously valid data containing reserved types may
+ * become invalid in the future.
+ *
+ * @see mpack_expect_ext()
+ */
+MPACK_INLINE void mpack_expect_ext_size(mpack_reader_t* reader, int8_t* type, uint32_t count) {
+    if (mpack_expect_ext(reader, type) != count) {
+        *type = 0;
+        mpack_reader_flag_error(reader, mpack_error_type);
+    }
+}
+
+/**
+ * Reads an extension blob into the given buffer, returning its size in bytes
+ * and placing the type into @p type.
+ *
+ * mpack_error_type is raised if the value is not an extension blob or if its
+ * length does not match. The @p type value is zero if an error is raised.
+ *
+ * @p type will be a user-defined type in the range [0,127] or a reserved type
+ * in the range [-128,-2].
+ *
+ * @note This cannot be used to match a timestamp. @ref mpack_error_type will
+ * be flagged if the value is a timestamp. Use mpack_expect_timestamp() or
+ * mpack_expect_timestamp_truncate() instead.
+ *
+ * @warning Be careful when using reserved types. They may no longer be ext
+ * types in the future, and previously valid data containing reserved types may
+ * become invalid in the future.
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ *
+ * @see mpack_expect_ext()
+ */
+size_t mpack_expect_ext_buf(mpack_reader_t* reader, int8_t* type, char* buf, size_t size);
+#endif
+
+#if MPACK_EXTENSIONS && defined(MPACK_MALLOC)
+/**
+ * Reads an extension blob with the given total maximum size, allocating
+ * storage for it, and placing the type into @p type.
+ *
+ * mpack_error_type is raised if the value is not an extension blob or if its
+ * length does not match. The @p type value is zero if an error is raised.
+ *
+ * @p type will be a user-defined type in the range [0,127] or a reserved type
+ * in the range [-128,-2].
+ *
+ * @note This cannot be used to match a timestamp. @ref mpack_error_type will
+ * be flagged if the value is a timestamp. Use mpack_expect_timestamp() or
+ * mpack_expect_timestamp_truncate() instead.
+ *
+ * @warning Be careful when using reserved types. They may no longer be ext
+ * types in the future, and previously valid data containing reserved types may
+ * become invalid in the future.
+ *
+ * @note This requires @ref MPACK_EXTENSIONS and @ref MPACK_MALLOC.
+ *
+ * @see mpack_expect_ext()
+ */
+char* mpack_expect_ext_alloc(mpack_reader_t* reader, int8_t* type, size_t maxsize, size_t* size);
+#endif
+
+/**
+ * @}
+ */
+
+/**
+ * @name Special Functions
+ * @{
+ */
+
+/**
+ * Reads a MessagePack object header (an MPack tag), expecting it to exactly
+ * match the given tag.
+ *
+ * If the type is compound (i.e. is a map, array, string, binary or
+ * extension type), additional reads are required to get the contained
+ * data, and the corresponding done function must be called when done.
+ *
+ * @throws mpack_error_type if the tag does not match
+ *
+ * @see mpack_read_bytes()
+ * @see mpack_done_array()
+ * @see mpack_done_map()
+ * @see mpack_done_str()
+ * @see mpack_done_bin()
+ * @see mpack_done_ext()
+ */
+void mpack_expect_tag(mpack_reader_t* reader, mpack_tag_t tag);
+
+/**
+ * Expects a string matching one of the strings in the given array,
+ * returning its array index.
+ *
+ * If the value does not match any of the given strings,
+ * @ref mpack_error_type is flagged. Use mpack_expect_enum_optional()
+ * if you want to allow other values than the given strings.
+ *
+ * If any error occurs or the reader is in an error state, @a count
+ * is returned.
+ *
+ * This can be used to quickly parse a string into an enum when the
+ * enum values range from 0 to @a count-1. If the last value in the
+ * enum is a special "count" value, it can be passed as the count,
+ * and the return value can be cast directly to the enum type.
+ *
+ * @code{.c}
+ * typedef enum           { APPLE ,  BANANA ,  ORANGE , COUNT} fruit_t;
+ * const char* fruits[] = {"apple", "banana", "orange"};
+ *
+ * fruit_t fruit = (fruit_t)mpack_expect_enum(reader, fruits, COUNT);
+ * @endcode
+ *
+ * See @ref docs/expect.md for more examples.
+ *
+ * The maximum string length is the size of the buffer (strings are read in-place.)
+ *
+ * @param reader The reader
+ * @param strings An array of expected strings of length count
+ * @param count The number of strings
+ * @return The index of the matched string, or @a count in case of error
+ */
+size_t mpack_expect_enum(mpack_reader_t* reader, const char* strings[], size_t count);
+
+/**
+ * Expects a string matching one of the strings in the given array
+ * returning its array index, or @a count if no strings match.
+ *
+ * If the value is not a string, or it does not match any of the
+ * given strings, @a count is returned and no error is flagged.
+ *
+ * If any error occurs or the reader is in an error state, @a count
+ * is returned.
+ *
+ * This can be used to quickly parse a string into an enum when the
+ * enum values range from 0 to @a count-1. If the last value in the
+ * enum is a special "count" value, it can be passed as the count,
+ * and the return value can be cast directly to the enum type.
+ *
+ * @code{.c}
+ * typedef enum           { APPLE ,  BANANA ,  ORANGE , COUNT} fruit_t;
+ * const char* fruits[] = {"apple", "banana", "orange"};
+ *
+ * fruit_t fruit = (fruit_t)mpack_expect_enum_optional(reader, fruits, COUNT);
+ * @endcode
+ *
+ * See @ref docs/expect.md for more examples.
+ *
+ * The maximum string length is the size of the buffer (strings are read in-place.)
+ *
+ * @param reader The reader
+ * @param strings An array of expected strings of length count
+ * @param count The number of strings
+ *
+ * @return The index of the matched string, or @a count if it does not
+ * match or an error occurs
+ */
+size_t mpack_expect_enum_optional(mpack_reader_t* reader, const char* strings[], size_t count);
+
+/**
+ * Expects an unsigned integer map key between 0 and count-1, marking it
+ * as found in the given bool array and returning it.
+ *
+ * This is a helper for switching among int keys in a map. It is
+ * typically used with an enum to define the key values. It should
+ * be called in the expression of a switch() statement. See @ref
+ * docs/expect.md for an example.
+ *
+ * The found array must be cleared before expecting the first key. If the
+ * flag for a given key is already set when found (i.e. the map contains a
+ * duplicate key), mpack_error_invalid is flagged.
+ *
+ * If the key is not a non-negative integer, or if the key is @a count or
+ * larger, @a count is returned and no error is flagged. If you want an error
+ * on unrecognized keys, flag an error in the default case in your switch;
+ * otherwise you must call mpack_discard() to discard its content.
+ *
+ * @param reader The reader
+ * @param found An array of bool flags of length count
+ * @param count The number of values in the found array, and one more than the
+ *              maximum allowed key
+ *
+ * @see @ref docs/expect.md
+ */
+size_t mpack_expect_key_uint(mpack_reader_t* reader, bool found[], size_t count);
+
+/**
+ * Expects a string map key matching one of the strings in the given key list,
+ * marking it as found in the given bool array and returning its index.
+ *
+ * This is a helper for switching among string keys in a map. It is
+ * typically used with an enum with names matching the strings in the
+ * array to define the key indices. It should be called in the expression
+ * of a switch() statement. See @ref docs/expect.md for an example.
+ *
+ * The found array must be cleared before expecting the first key. If the
+ * flag for a given key is already set when found (i.e. the map contains a
+ * duplicate key), mpack_error_invalid is flagged.
+ *
+ * If the key is unrecognized, count is returned and no error is flagged. If
+ * you want an error on unrecognized keys, flag an error in the default case
+ * in your switch; otherwise you must call mpack_discard() to discard its content.
+ *
+ * The maximum key length is the size of the buffer (keys are read in-place.)
+ *
+ * @param reader The reader
+ * @param keys An array of expected string keys of length count
+ * @param found An array of bool flags of length count
+ * @param count The number of values in the keys and found arrays
+ *
+ * @see @ref docs/expect.md
+ */
+size_t mpack_expect_key_cstr(mpack_reader_t* reader, const char* keys[],
+        bool found[], size_t count);
+
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
+
+#endif
+
+MPACK_EXTERN_C_END
+MPACK_SILENCE_WARNINGS_END
+
+#endif
+
+
+
+/* mpack/mpack-node.h.h */
+
+/**
+ * @file
+ *
+ * Declares the MPack dynamic Node API.
+ */
+
+#ifndef MPACK_NODE_H
+#define MPACK_NODE_H 1
+
+/* #include "mpack-reader.h" */
+
+MPACK_SILENCE_WARNINGS_BEGIN
+MPACK_EXTERN_C_BEGIN
+
+#if MPACK_NODE
+
+/**
+ * @defgroup node Node API
+ *
+ * The MPack Node API allows you to parse a chunk of MessagePack into a
+ * dynamically typed data structure, providing random access to the parsed
+ * data.
+ *
+ * See @ref docs/node.md for examples.
+ *
+ * @{
+ */
+
+/**
+ * A handle to node data in a parsed MPack tree.
+ *
+ * Nodes represent either primitive values or compound types. If a
+ * node is a compound type, it contains a pointer to its child nodes,
+ * or a pointer to its underlying data.
+ *
+ * Nodes are immutable.
+ *
+ * @note @ref mpack_node_t is an opaque reference to the node data, not the
+ * node data itself. (It contains pointers to both the node data and the tree.)
+ * It is passed by value in the Node API.
+ */
+typedef struct mpack_node_t mpack_node_t;
+
+/**
+ * The storage for nodes in an MPack tree.
+ *
+ * You only need to use this if you intend to provide your own storage
+ * for nodes instead of letting the tree allocate it.
+ *
+ * @ref mpack_node_data_t is 16 bytes on most common architectures (32-bit
+ * and 64-bit.)
+ */
+typedef struct mpack_node_data_t mpack_node_data_t;
+
+/**
+ * An MPack tree parser to parse a blob or stream of MessagePack.
+ *
+ * When a message is parsed, the tree contains a single root node which
+ * contains all parsed data. The tree and its nodes are immutable.
+ */
+typedef struct mpack_tree_t mpack_tree_t;
+
+/**
+ * An error handler function to be called when an error is flagged on
+ * the tree.
+ *
+ * The error handler will only be called once on the first error flagged;
+ * any subsequent node reads and errors are ignored, and the tree is
+ * permanently in that error state.
+ *
+ * MPack is safe against non-local jumps out of error handler callbacks.
+ * This means you are allowed to longjmp or throw an exception (in C++,
+ * Objective-C, or with SEH) out of this callback.
+ *
+ * Bear in mind when using longjmp that local non-volatile variables that
+ * have changed are undefined when setjmp() returns, so you can't put the
+ * tree on the stack in the same activation frame as the setjmp without
+ * declaring it volatile.
+ *
+ * You must still eventually destroy the tree. It is not destroyed
+ * automatically when an error is flagged. It is safe to destroy the
+ * tree within this error callback, but you will either need to perform
+ * a non-local jump, or store something in your context to identify
+ * that the tree is destroyed since any future accesses to it cause
+ * undefined behavior.
+ */
+typedef void (*mpack_tree_error_t)(mpack_tree_t* tree, mpack_error_t error);
+
+/**
+ * The MPack tree's read function. It should fill the buffer with as many bytes
+ * as are immediately available up to the given @c count, returning the number
+ * of bytes written to the buffer.
+ *
+ * In case of error, it should flag an appropriate error on the reader
+ * (usually @ref mpack_error_io.)
+ *
+ * The blocking or non-blocking behaviour of the read should match whether you
+ * are using mpack_tree_parse() or mpack_tree_try_parse().
+ *
+ * If you are using mpack_tree_parse(), the read should block until at least
+ * one byte is read. If you return 0, mpack_tree_parse() will raise @ref
+ * mpack_error_io.
+ *
+ * If you are using mpack_tree_try_parse(), the read function can always
+ * return 0, and must never block waiting for data (otherwise
+ * mpack_tree_try_parse() would be equivalent to mpack_tree_parse().)
+ * When you return 0, mpack_tree_try_parse() will return false without flagging
+ * an error.
+ */
+typedef size_t (*mpack_tree_read_t)(mpack_tree_t* tree, char* buffer, size_t count);
+
+/**
+ * A teardown function to be called when the tree is destroyed.
+ */
+typedef void (*mpack_tree_teardown_t)(mpack_tree_t* tree);
+
+
+
+/* Hide internals from documentation */
+/** @cond */
+
+struct mpack_node_t {
+    mpack_node_data_t* data;
+    mpack_tree_t* tree;
+};
+
+struct mpack_node_data_t {
+    mpack_type_t type;
+
+    /*
+     * The element count if the type is an array;
+     * the number of key/value pairs if the type is map;
+     * or the number of bytes if the type is str, bin or ext.
+     */
+    uint32_t len;
+
+    union {
+        bool     b; /* The value if the type is bool. */
+
+        #if MPACK_FLOAT
+        float    f; /* The value if the type is float. */
+        #else
+        uint32_t f; /*< The raw value if the type is float. */
+        #endif
+
+        #if MPACK_DOUBLE
+        double   d; /* The value if the type is double. */
+        #else
+        uint64_t d; /*< The raw value if the type is double. */
+        #endif
+
+        int64_t  i; /* The value if the type is signed int. */
+        uint64_t u; /* The value if the type is unsigned int. */
+        size_t offset; /* The byte offset for str, bin and ext */
+
+        mpack_node_data_t* children; /* The children for map or array */
+    } value;
+};
+
+typedef struct mpack_tree_page_t {
+    struct mpack_tree_page_t* next;
+    mpack_node_data_t nodes[1]; // variable size
+} mpack_tree_page_t;
+
+typedef enum mpack_tree_parse_state_t {
+    mpack_tree_parse_state_not_started,
+    mpack_tree_parse_state_in_progress,
+    mpack_tree_parse_state_parsed,
+} mpack_tree_parse_state_t;
+
+typedef struct mpack_level_t {
+    mpack_node_data_t* child;
+    size_t left; // children left in level
+} mpack_level_t;
+
+typedef struct mpack_tree_parser_t {
+    mpack_tree_parse_state_t state;
+
+    // We keep track of the number of "possible nodes" left in the data rather
+    // than the number of bytes.
+    //
+    // When a map or array is parsed, we ensure at least one byte for each child
+    // exists and subtract them right away. This ensures that if ever a map or
+    // array declares more elements than could possibly be contained in the data,
+    // we will error out immediately rather than allocating storage for them.
+    //
+    // For example malicious data that repeats 0xDE 0xFF 0xFF (start of a map
+    // with 65536 key-value pairs) would otherwise cause us to run out of
+    // memory. With this, the parser can allocate at most as many nodes as
+    // there are bytes in the data (plus the paging overhead, 12%.) An error
+    // will be flagged immediately if and when there isn't enough data left to
+    // fully read all children of all open compound types on the parsing stack.
+    //
+    // Once an entire message has been parsed (and there are no nodes left to
+    // parse whose bytes have been subtracted), this matches the number of left
+    // over bytes in the data.
+    size_t possible_nodes_left;
+
+    mpack_node_data_t* nodes; // next node in current page/pool
+    size_t nodes_left; // nodes left in current page/pool
+
+    size_t current_node_reserved;
+    size_t level;
+
+    #ifdef MPACK_MALLOC
+    // It's much faster to allocate the initial parsing stack inline within the
+    // parser. We replace it with a heap allocation if we need to grow it.
+    mpack_level_t* stack;
+    size_t stack_capacity;
+    bool stack_owned;
+    mpack_level_t stack_local[MPACK_NODE_INITIAL_DEPTH];
+    #else
+    // Without malloc(), we have to reserve a parsing stack the maximum allowed
+    // parsing depth.
+    mpack_level_t stack[MPACK_NODE_MAX_DEPTH_WITHOUT_MALLOC];
+    #endif
+} mpack_tree_parser_t;
+
+struct mpack_tree_t {
+    mpack_tree_error_t error_fn;    /* Function to call on error */
+    mpack_tree_read_t read_fn;      /* Function to call to read more data */
+    mpack_tree_teardown_t teardown; /* Function to teardown the context on destroy */
+    void* context;                  /* Context for tree callbacks */
+
+    mpack_node_data_t nil_node;     /* a nil node to be returned in case of error */
+    mpack_node_data_t missing_node; /* a missing node to be returned in optional lookups */
+    mpack_error_t error;
+
+    #ifdef MPACK_MALLOC
+    char* buffer;
+    size_t buffer_capacity;
+    #endif
+
+    const char* data;
+    size_t data_length; // length of data (and content of buffer, if used)
+
+    size_t size; // size in bytes of tree (usually matches data_length, but not if tree has trailing data)
+    size_t node_count; // total number of nodes in tree (across all pages)
+
+    size_t max_size;  // maximum message size
+    size_t max_nodes; // maximum nodes in a message
+
+    mpack_tree_parser_t parser;
+    mpack_node_data_t* root;
+
+    mpack_node_data_t* pool; // pool, or NULL if no pool provided
+    size_t pool_count;
+
+    #ifdef MPACK_MALLOC
+    mpack_tree_page_t* next;
+    #endif
+};
+
+// internal functions
+
+MPACK_INLINE mpack_node_t mpack_node(mpack_tree_t* tree, mpack_node_data_t* data) {
+    mpack_node_t node;
+    node.data = data;
+    node.tree = tree;
+    return node;
+}
+
+MPACK_INLINE mpack_node_data_t* mpack_node_child(mpack_node_t node, size_t child) {
+    return node.data->value.children + child;
+}
+
+MPACK_INLINE mpack_node_t mpack_tree_nil_node(mpack_tree_t* tree) {
+    return mpack_node(tree, &tree->nil_node);
+}
+
+MPACK_INLINE mpack_node_t mpack_tree_missing_node(mpack_tree_t* tree) {
+    return mpack_node(tree, &tree->missing_node);
+}
+
+/** @endcond */
+
+
+
+/**
+ * @name Tree Initialization
+ * @{
+ */
+
+#ifdef MPACK_MALLOC
+/**
+ * Initializes a tree parser with the given data.
+ *
+ * Configure the tree if desired, then call mpack_tree_parse() to parse it. The
+ * tree will allocate pages of nodes as needed and will free them when
+ * destroyed.
+ *
+ * The tree must be destroyed with mpack_tree_destroy().
+ *
+ * Any string or blob data types reference the original data, so the given data
+ * pointer must remain valid until after the tree is destroyed.
+ */
+void mpack_tree_init_data(mpack_tree_t* tree, const char* data, size_t length);
+
+/**
+ * Deprecated.
+ *
+ * \deprecated Renamed to mpack_tree_init_data().
+ */
+MPACK_INLINE void mpack_tree_init(mpack_tree_t* tree, const char* data, size_t length) {
+    mpack_tree_init_data(tree, data, length);
+}
+
+/**
+ * Initializes a tree parser from an unbounded stream, or a stream of
+ * unknown length.
+ *
+ * The parser can be used to read a single message from a stream of unknown
+ * length, or multiple messages from an unbounded stream, allowing it to
+ * be used for RPC communication. Call @ref mpack_tree_parse() to parse
+ * a message from a blocking stream, or @ref mpack_tree_try_parse() for a
+ * non-blocking stream.
+ *
+ * The stream will use a growable internal buffer to store the most recent
+ * message, as well as allocated pages of nodes for the parse tree.
+ *
+ * Maximum allowances for message size and node count must be specified in this
+ * function (since the stream is unbounded.) They can be changed later with
+ * @ref mpack_tree_set_limits().
+ *
+ * @param tree The tree parser
+ * @param read_fn The read function
+ * @param context The context for the read function
+ * @param max_message_size The maximum size of a message in bytes
+ * @param max_message_nodes The maximum number of nodes per message. See
+ *        @ref mpack_node_data_t for the size of nodes.
+ *
+ * @see mpack_tree_read_t
+ * @see mpack_reader_context()
+ */
+void mpack_tree_init_stream(mpack_tree_t* tree, mpack_tree_read_t read_fn, void* context,
+        size_t max_message_size, size_t max_message_nodes);
+#endif
+
+/**
+ * Initializes a tree parser with the given data, using the given node data
+ * pool to store the results.
+ *
+ * Configure the tree if desired, then call mpack_tree_parse() to parse it.
+ *
+ * If the data does not fit in the pool, @ref mpack_error_too_big will be flagged
+ * on the tree.
+ *
+ * The tree must be destroyed with mpack_tree_destroy(), even if parsing fails.
+ */
+void mpack_tree_init_pool(mpack_tree_t* tree, const char* data, size_t length,
+        mpack_node_data_t* node_pool, size_t node_pool_count);
+
+/**
+ * Initializes an MPack tree directly into an error state. Use this if you
+ * are writing a wrapper to another <tt>mpack_tree_init*()</tt> function which
+ * can fail its setup.
+ */
+void mpack_tree_init_error(mpack_tree_t* tree, mpack_error_t error);
+
+#if MPACK_STDIO
+/**
+ * Initializes a tree to parse the given file. The tree must be destroyed with
+ * mpack_tree_destroy(), even if parsing fails.
+ *
+ * The file is opened, loaded fully into memory, and closed before this call
+ * returns.
+ *
+ * @param tree The tree to initialize
+ * @param filename The filename passed to fopen() to read the file
+ * @param max_bytes The maximum size of file to load, or 0 for unlimited size.
+ */
+void mpack_tree_init_filename(mpack_tree_t* tree, const char* filename, size_t max_bytes);
+
+/**
+ * Deprecated.
+ *
+ * \deprecated Renamed to mpack_tree_init_filename().
+ */
+MPACK_INLINE void mpack_tree_init_file(mpack_tree_t* tree, const char* filename, size_t max_bytes) {
+    mpack_tree_init_filename(tree, filename, max_bytes);
+}
+
+/**
+ * Initializes a tree to parse the given libc FILE. This can be used to
+ * read from stdin, or from a file opened separately.
+ *
+ * The tree must be destroyed with mpack_tree_destroy(), even if parsing fails.
+ *
+ * The FILE is fully loaded fully into memory (and closed if requested) before
+ * this call returns.
+ *
+ * @param tree The tree to initialize.
+ * @param stdfile The FILE.
+ * @param max_bytes The maximum size of file to load, or 0 for unlimited size.
+ * @param close_when_done If true, fclose() will be called on the FILE when it
+ *         is no longer needed. If false, the file will not be closed when
+ *         reading is done.
+ *
+ * @warning The tree will read all data in the FILE before parsing it. If this
+ *          is used on stdin, the parser will block until it is closed, even if
+ *          a complete message has been written to it!
+ */
+void mpack_tree_init_stdfile(mpack_tree_t* tree, FILE* stdfile, size_t max_bytes, bool close_when_done);
+#endif
+
+/**
+ * @}
+ */
+
+/**
+ * @name Tree Functions
+ * @{
+ */
+
+/**
+ * Sets the maximum byte size and maximum number of nodes allowed per message.
+ *
+ * The default is SIZE_MAX (no limit) unless @ref mpack_tree_init_stream() is
+ * called (where maximums are required.)
+ *
+ * If a pool of nodes is used, the node limit is the lesser of this limit and
+ * the pool size.
+ *
+ * @param tree The tree parser
+ * @param max_message_size The maximum size of a message in bytes
+ * @param max_message_nodes The maximum number of nodes per message. See
+ *        @ref mpack_node_data_t for the size of nodes.
+ */
+void mpack_tree_set_limits(mpack_tree_t* tree, size_t max_message_size,
+        size_t max_message_nodes);
+
+/**
+ * Parses a MessagePack message into a tree of immutable nodes.
+ *
+ * If successful, the root node will be available under @ref mpack_tree_root().
+ * If not, an appropriate error will be flagged.
+ *
+ * This can be called repeatedly to parse a series of messages from a data
+ * source. When this is called, all previous nodes from this tree and their
+ * contents (including the root node) are invalidated.
+ *
+ * If this is called with a stream (see @ref mpack_tree_init_stream()), the
+ * stream must block until data is available. (Otherwise, if this is called on
+ * a non-blocking stream, parsing will fail with @ref mpack_error_io when the
+ * fill function returns 0.)
+ *
+ * There is no way to recover a tree in an error state. It must be destroyed.
+ */
+void mpack_tree_parse(mpack_tree_t* tree);
+
+/**
+ * Attempts to parse a MessagePack message from a non-blocking stream into a
+ * tree of immutable nodes.
+ *
+ * A non-blocking read function must have been passed to the tree in
+ * mpack_tree_init_stream().
+ *
+ * If this returns true, a message is available under
+ * @ref mpack_tree_root(). The tree nodes and data will be valid until
+ * the next time a parse is started.
+ *
+ * If this returns false, no message is available, because either not enough
+ * data is available yet or an error has occurred. You must check the tree for
+ * errors whenever this returns false. If there is no error, you should try
+ * again later when more data is available. (You will want to select()/poll()
+ * on the underlying socket or use some other asynchronous mechanism to
+ * determine when it has data.)
+ *
+ * There is no way to recover a tree in an error state. It must be destroyed.
+ *
+ * @see mpack_tree_init_stream()
+ */
+bool mpack_tree_try_parse(mpack_tree_t* tree);
+
+/**
+ * Returns the root node of the tree, if the tree is not in an error state.
+ * Returns a nil node otherwise.
+ *
+ * @warning You must call mpack_tree_parse() before calling this. If
+ * @ref mpack_tree_parse() was never called, the tree will assert.
+ */
+mpack_node_t mpack_tree_root(mpack_tree_t* tree);
+
+/**
+ * Returns the error state of the tree.
+ */
+MPACK_INLINE mpack_error_t mpack_tree_error(mpack_tree_t* tree) {
+    return tree->error;
+}
+
+/**
+ * Returns the size in bytes of the current parsed message.
+ *
+ * If there is something in the buffer after the MessagePack object, this can
+ * be used to find it.
+ *
+ * This is zero if an error occurred during tree parsing (since the
+ * portion of the data that the first complete object occupies cannot
+ * be determined if the data is invalid or corrupted.)
+ */
+MPACK_INLINE size_t mpack_tree_size(mpack_tree_t* tree) {
+    return tree->size;
+}
+
+/**
+ * Destroys the tree.
+ */
+mpack_error_t mpack_tree_destroy(mpack_tree_t* tree);
+
+/**
+ * Sets the custom pointer to pass to the tree callbacks, such as teardown.
+ *
+ * @param tree The MPack tree.
+ * @param context User data to pass to the tree callbacks.
+ *
+ * @see mpack_reader_context()
+ */
+MPACK_INLINE void mpack_tree_set_context(mpack_tree_t* tree, void* context) {
+    tree->context = context;
+}
+
+/**
+ * Returns the custom context for tree callbacks.
+ *
+ * @see mpack_tree_set_context
+ * @see mpack_tree_init_stream
+ */
+MPACK_INLINE void* mpack_tree_context(mpack_tree_t* tree) {
+    return tree->context;
+}
+
+/**
+ * Sets the error function to call when an error is flagged on the tree.
+ *
+ * This should normally be used with mpack_tree_set_context() to register
+ * a custom pointer to pass to the error function.
+ *
+ * See the definition of mpack_tree_error_t for more information about
+ * what you can do from an error callback.
+ *
+ * @see mpack_tree_error_t
+ * @param tree The MPack tree.
+ * @param error_fn The function to call when an error is flagged on the tree.
+ */
+MPACK_INLINE void mpack_tree_set_error_handler(mpack_tree_t* tree, mpack_tree_error_t error_fn) {
+    tree->error_fn = error_fn;
+}
+
+/**
+ * Sets the teardown function to call when the tree is destroyed.
+ *
+ * This should normally be used with mpack_tree_set_context() to register
+ * a custom pointer to pass to the teardown function.
+ *
+ * @param tree The MPack tree.
+ * @param teardown The function to call when the tree is destroyed.
+ */
+MPACK_INLINE void mpack_tree_set_teardown(mpack_tree_t* tree, mpack_tree_teardown_t teardown) {
+    tree->teardown = teardown;
+}
+
+/**
+ * Places the tree in the given error state, calling the error callback if one
+ * is set.
+ *
+ * This allows you to externally flag errors, for example if you are validating
+ * data as you read it.
+ *
+ * If the tree is already in an error state, this call is ignored and no
+ * error callback is called.
+ */
+void mpack_tree_flag_error(mpack_tree_t* tree, mpack_error_t error);
+
+/**
+ * @}
+ */
+
+/**
+ * @name Node Core Functions
+ * @{
+ */
+
+/**
+ * Places the node's tree in the given error state, calling the error callback
+ * if one is set.
+ *
+ * This allows you to externally flag errors, for example if you are validating
+ * data as you read it.
+ *
+ * If the tree is already in an error state, this call is ignored and no
+ * error callback is called.
+ */
+void mpack_node_flag_error(mpack_node_t node, mpack_error_t error);
+
+/**
+ * Returns the error state of the node's tree.
+ */
+MPACK_INLINE mpack_error_t mpack_node_error(mpack_node_t node) {
+    return mpack_tree_error(node.tree);
+}
+
+/**
+ * Returns a tag describing the given node, or a nil tag if the
+ * tree is in an error state.
+ */
+mpack_tag_t mpack_node_tag(mpack_node_t node);
+
+/** @cond */
+
+#if MPACK_DEBUG && MPACK_STDIO
+/*
+ * Converts a node to a pseudo-JSON string for debugging purposes, placing the
+ * result in the given buffer with a null-terminator.
+ *
+ * If the buffer does not have enough space, the result will be truncated (but
+ * it is guaranteed to be null-terminated.)
+ *
+ * This is only available in debug mode, and only if stdio is available (since
+ * it uses snprintf().) It's strictly for debugging purposes.
+ */
+void mpack_node_print_to_buffer(mpack_node_t node, char* buffer, size_t buffer_size);
+
+/*
+ * Converts a node to pseudo-JSON for debugging purposes, calling the given
+ * callback as many times as is necessary to output the character data.
+ *
+ * No null-terminator or trailing newline will be written.
+ *
+ * This is only available in debug mode, and only if stdio is available (since
+ * it uses snprintf().) It's strictly for debugging purposes.
+ */
+void mpack_node_print_to_callback(mpack_node_t node, mpack_print_callback_t callback, void* context);
+
+/*
+ * Converts a node to pseudo-JSON for debugging purposes
+ * and pretty-prints it to the given file.
+ *
+ * This is only available in debug mode, and only if stdio is available (since
+ * it uses snprintf().) It's strictly for debugging purposes.
+ */
+void mpack_node_print_to_file(mpack_node_t node, FILE* file);
+
+/*
+ * Converts a node to pseudo-JSON for debugging purposes
+ * and pretty-prints it to stdout.
+ *
+ * This is only available in debug mode, and only if stdio is available (since
+ * it uses snprintf().) It's strictly for debugging purposes.
+ */
+MPACK_INLINE void mpack_node_print_to_stdout(mpack_node_t node) {
+    mpack_node_print_to_file(node, stdout);
+}
+
+/*
+ * Deprecated.
+ *
+ * \deprecated Renamed to mpack_node_print_to_stdout().
+ */
+MPACK_INLINE void mpack_node_print(mpack_node_t node) {
+    mpack_node_print_to_stdout(node);
+}
+#endif
+
+/** @endcond */
+
+/**
+ * @}
+ */
+
+/**
+ * @name Node Primitive Value Functions
+ * @{
+ */
+
+/**
+ * Returns the type of the node.
+ */
+mpack_type_t mpack_node_type(mpack_node_t node);
+
+/**
+ * Returns true if the given node is a nil node; false otherwise.
+ *
+ * To ensure that a node is nil and flag an error otherwise, use
+ * mpack_node_nil().
+ */
+bool mpack_node_is_nil(mpack_node_t node);
+
+/**
+ * Returns true if the given node handle indicates a missing node; false otherwise.
+ *
+ * To ensure that a node is missing and flag an error otherwise, use
+ * mpack_node_missing().
+ */
+bool mpack_node_is_missing(mpack_node_t node);
+
+/**
+ * Checks that the given node is of nil type, raising @ref mpack_error_type
+ * otherwise.
+ *
+ * Use mpack_node_is_nil() to return whether the node is nil.
+ */
+void mpack_node_nil(mpack_node_t node);
+
+/**
+ * Checks that the given node indicates a missing node, raising @ref
+ * mpack_error_type otherwise.
+ *
+ * Use mpack_node_is_missing() to return whether the node is missing.
+ */
+void mpack_node_missing(mpack_node_t node);
+
+/**
+ * Returns the bool value of the node. If this node is not of the correct
+ * type, false is returned and mpack_error_type is raised.
+ */
+bool mpack_node_bool(mpack_node_t node);
+
+/**
+ * Checks if the given node is of bool type with value true, raising
+ * mpack_error_type otherwise.
+ */
+void mpack_node_true(mpack_node_t node);
+
+/**
+ * Checks if the given node is of bool type with value false, raising
+ * mpack_error_type otherwise.
+ */
+void mpack_node_false(mpack_node_t node);
+
+/**
+ * Returns the 8-bit unsigned value of the node. If this node is not
+ * of a compatible type, @ref mpack_error_type is raised and zero is returned.
+ */
+uint8_t mpack_node_u8(mpack_node_t node);
+
+/**
+ * Returns the 8-bit signed value of the node. If this node is not
+ * of a compatible type, @ref mpack_error_type is raised and zero is returned.
+ */
+int8_t mpack_node_i8(mpack_node_t node);
+
+/**
+ * Returns the 16-bit unsigned value of the node. If this node is not
+ * of a compatible type, @ref mpack_error_type is raised and zero is returned.
+ */
+uint16_t mpack_node_u16(mpack_node_t node);
+
+/**
+ * Returns the 16-bit signed value of the node. If this node is not
+ * of a compatible type, @ref mpack_error_type is raised and zero is returned.
+ */
+int16_t mpack_node_i16(mpack_node_t node);
+
+/**
+ * Returns the 32-bit unsigned value of the node. If this node is not
+ * of a compatible type, @ref mpack_error_type is raised and zero is returned.
+ */
+uint32_t mpack_node_u32(mpack_node_t node);
+
+/**
+ * Returns the 32-bit signed value of the node. If this node is not
+ * of a compatible type, @ref mpack_error_type is raised and zero is returned.
+ */
+int32_t mpack_node_i32(mpack_node_t node);
+
+/**
+ * Returns the 64-bit unsigned value of the node. If this node is not
+ * of a compatible type, @ref mpack_error_type is raised, and zero is returned.
+ */
+uint64_t mpack_node_u64(mpack_node_t node);
+
+/**
+ * Returns the 64-bit signed value of the node. If this node is not
+ * of a compatible type, @ref mpack_error_type is raised and zero is returned.
+ */
+int64_t mpack_node_i64(mpack_node_t node);
+
+/**
+ * Returns the unsigned int value of the node.
+ *
+ * Returns zero if an error occurs.
+ *
+ * @throws mpack_error_type If the node is not an integer type or does not fit in the range of an unsigned int
+ */
+unsigned int mpack_node_uint(mpack_node_t node);
+
+/**
+ * Returns the int value of the node.
+ *
+ * Returns zero if an error occurs.
+ *
+ * @throws mpack_error_type If the node is not an integer type or does not fit in the range of an int
+ */
+int mpack_node_int(mpack_node_t node);
+
+#if MPACK_FLOAT
+/**
+ * Returns the float value of the node. The underlying value can be an
+ * integer, float or double; the value is converted to a float.
+ *
+ * @note Reading a double or a large integer with this function can incur a
+ * loss of precision.
+ *
+ * @throws mpack_error_type if the underlying value is not a float, double or integer.
+ */
+float mpack_node_float(mpack_node_t node);
+#endif
+
+#if MPACK_DOUBLE
+/**
+ * Returns the double value of the node. The underlying value can be an
+ * integer, float or double; the value is converted to a double.
+ *
+ * @note Reading a very large integer with this function can incur a
+ * loss of precision.
+ *
+ * @throws mpack_error_type if the underlying value is not a float, double or integer.
+ */
+double mpack_node_double(mpack_node_t node);
+#endif
+
+#if MPACK_FLOAT
+/**
+ * Returns the float value of the node. The underlying value must be a float,
+ * not a double or an integer. This ensures no loss of precision can occur.
+ *
+ * @throws mpack_error_type if the underlying value is not a float.
+ */
+float mpack_node_float_strict(mpack_node_t node);
+#endif
+
+#if MPACK_DOUBLE
+/**
+ * Returns the double value of the node. The underlying value must be a float
+ * or double, not an integer. This ensures no loss of precision can occur.
+ *
+ * @throws mpack_error_type if the underlying value is not a float or double.
+ */
+double mpack_node_double_strict(mpack_node_t node);
+#endif
+
+#if !MPACK_FLOAT
+/**
+ * Returns the float value of the node as a raw uint32_t. The underlying value
+ * must be a float, not a double or an integer.
+ *
+ * @throws mpack_error_type if the underlying value is not a float.
+ */
+uint32_t mpack_node_raw_float(mpack_node_t node);
+#endif
+
+#if !MPACK_DOUBLE
+/**
+ * Returns the double value of the node as a raw uint64_t. The underlying value
+ * must be a double, not a float or an integer.
+ *
+ * @throws mpack_error_type if the underlying value is not a float or double.
+ */
+uint64_t mpack_node_raw_double(mpack_node_t node);
+#endif
+
+
+#if MPACK_EXTENSIONS
+/**
+ * Returns a timestamp.
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ *
+ * @throws mpack_error_type if the underlying value is not a timestamp.
+ */
+mpack_timestamp_t mpack_node_timestamp(mpack_node_t node);
+
+/**
+ * Returns a timestamp's (signed) seconds since 1970-01-01T00:00:00Z.
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ *
+ * @throws mpack_error_type if the underlying value is not a timestamp.
+ */
+int64_t mpack_node_timestamp_seconds(mpack_node_t node);
+
+/**
+ * Returns a timestamp's additional nanoseconds.
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ *
+ * @return A nanosecond count between 0 and 999,999,999 inclusive.
+ * @throws mpack_error_type if the underlying value is not a timestamp.
+ */
+uint32_t mpack_node_timestamp_nanoseconds(mpack_node_t node);
+#endif
+
+/**
+ * @}
+ */
+
+/**
+ * @name Node String and Data Functions
+ * @{
+ */
+
+/**
+ * Checks that the given node contains a valid UTF-8 string.
+ *
+ * If the string is invalid, this flags an error, which would cause subsequent calls
+ * to mpack_node_str() to return NULL and mpack_node_strlen() to return zero. So you
+ * can check the node for error immediately after calling this, or you can call those
+ * functions to use the data anyway and check for errors later.
+ *
+ * @throws mpack_error_type If this node is not a string or does not contain valid UTF-8.
+ *
+ * @param node The string node to test
+ *
+ * @see mpack_node_str()
+ * @see mpack_node_strlen()
+ */
+void mpack_node_check_utf8(mpack_node_t node);
+
+/**
+ * Checks that the given node contains a valid UTF-8 string with no NUL bytes.
+ *
+ * This does not check that the string has a null-terminator! It only checks whether
+ * the string could safely be represented as a C-string by appending a null-terminator.
+ * (If the string does already contain a null-terminator, this will flag an error.)
+ *
+ * This is performed automatically by other UTF-8 cstr helper functions. Only
+ * call this if you will do something else with the data directly, but you still
+ * want to ensure it will be valid as a UTF-8 C-string.
+ *
+ * @throws mpack_error_type If this node is not a string, does not contain valid UTF-8,
+ *     or contains a NUL byte.
+ *
+ * @param node The string node to test
+ *
+ * @see mpack_node_str()
+ * @see mpack_node_strlen()
+ * @see mpack_node_copy_utf8_cstr()
+ * @see mpack_node_utf8_cstr_alloc()
+ */
+void mpack_node_check_utf8_cstr(mpack_node_t node);
+
+#if MPACK_EXTENSIONS
+/**
+ * Returns the extension type of the given ext node.
+ *
+ * This returns zero if the tree is in an error state.
+ *
+ * @note This requires @ref MPACK_EXTENSIONS.
+ */
+int8_t mpack_node_exttype(mpack_node_t node);
+#endif
+
+/**
+ * Returns the number of bytes in the given bin node.
+ *
+ * This returns zero if the tree is in an error state.
+ *
+ * If this node is not a bin, @ref mpack_error_type is raised and zero is returned.
+ */
+size_t mpack_node_bin_size(mpack_node_t node);
+
+/**
+ * Returns the length of the given str, bin or ext node.
+ *
+ * This returns zero if the tree is in an error state.
+ *
+ * If this node is not a str, bin or ext, @ref mpack_error_type is raised and zero
+ * is returned.
+ */
+uint32_t mpack_node_data_len(mpack_node_t node);
+
+/**
+ * Returns the length in bytes of the given string node. This does not
+ * include any null-terminator.
+ *
+ * This returns zero if the tree is in an error state.
+ *
+ * If this node is not a str, @ref mpack_error_type is raised and zero is returned.
+ */
+size_t mpack_node_strlen(mpack_node_t node);
+
+/**
+ * Returns a pointer to the data contained by this node, ensuring the node is a
+ * string.
+ *
+ * @warning Strings are not null-terminated! Use one of the cstr functions
+ * to get a null-terminated string.
+ *
+ * The pointer is valid as long as the data backing the tree is valid.
+ *
+ * If this node is not a string, @ref mpack_error_type is raised and @c NULL is returned.
+ *
+ * @see mpack_node_copy_cstr()
+ * @see mpack_node_cstr_alloc()
+ * @see mpack_node_utf8_cstr_alloc()
+ */
+const char* mpack_node_str(mpack_node_t node);
+
+/**
+ * Returns a pointer to the data contained by this node.
+ *
+ * @note Strings are not null-terminated! Use one of the cstr functions
+ * to get a null-terminated string.
+ *
+ * The pointer is valid as long as the data backing the tree is valid.
+ *
+ * If this node is not of a str, bin or ext, @ref mpack_error_type is raised, and
+ * @c NULL is returned.
+ *
+ * @see mpack_node_copy_cstr()
+ * @see mpack_node_cstr_alloc()
+ * @see mpack_node_utf8_cstr_alloc()
+ */
+const char* mpack_node_data(mpack_node_t node);
+
+/**
+ * Returns a pointer to the data contained by this bin node.
+ *
+ * The pointer is valid as long as the data backing the tree is valid.
+ *
+ * If this node is not a bin, @ref mpack_error_type is raised and @c NULL is
+ * returned.
+ */
+const char* mpack_node_bin_data(mpack_node_t node);
+
+/**
+ * Copies the bytes contained by this node into the given buffer, returning the
+ * number of bytes in the node.
+ *
+ * @throws mpack_error_type If this node is not a str, bin or ext type
+ * @throws mpack_error_too_big If the string does not fit in the given buffer
+ *
+ * @param node The string node from which to copy data
+ * @param buffer A buffer in which to copy the node's bytes
+ * @param bufsize The size of the given buffer
+ *
+ * @return The number of bytes in the node, or zero if an error occurs.
+ */
+size_t mpack_node_copy_data(mpack_node_t node, char* buffer, size_t bufsize);
+
+/**
+ * Checks that the given node contains a valid UTF-8 string and copies the
+ * string into the given buffer, returning the number of bytes in the string.
+ *
+ * @throws mpack_error_type If this node is not a string
+ * @throws mpack_error_too_big If the string does not fit in the given buffer
+ *
+ * @param node The string node from which to copy data
+ * @param buffer A buffer in which to copy the node's bytes
+ * @param bufsize The size of the given buffer
+ *
+ * @return The number of bytes in the node, or zero if an error occurs.
+ */
+size_t mpack_node_copy_utf8(mpack_node_t node, char* buffer, size_t bufsize);
+
+/**
+ * Checks that the given node contains a string with no NUL bytes, copies the string
+ * into the given buffer, and adds a null terminator.
+ *
+ * If this node is not of a string type, @ref mpack_error_type is raised. If the string
+ * does not fit, @ref mpack_error_data is raised.
+ *
+ * If any error occurs, the buffer will contain an empty null-terminated string.
+ *
+ * @param node The string node from which to copy data
+ * @param buffer A buffer in which to copy the node's string
+ * @param size The size of the given buffer
+ */
+void mpack_node_copy_cstr(mpack_node_t node, char* buffer, size_t size);
+
+/**
+ * Checks that the given node contains a valid UTF-8 string with no NUL bytes,
+ * copies the string into the given buffer, and adds a null terminator.
+ *
+ * If this node is not of a string type, @ref mpack_error_type is raised. If the string
+ * does not fit, @ref mpack_error_data is raised.
+ *
+ * If any error occurs, the buffer will contain an empty null-terminated string.
+ *
+ * @param node The string node from which to copy data
+ * @param buffer A buffer in which to copy the node's string
+ * @param size The size of the given buffer
+ */
+void mpack_node_copy_utf8_cstr(mpack_node_t node, char* buffer, size_t size);
+
+#ifdef MPACK_MALLOC
+/**
+ * Allocates a new chunk of data using MPACK_MALLOC() with the bytes
+ * contained by this node.
+ *
+ * The allocated data must be freed with MPACK_FREE() (or simply free()
+ * if MPack's allocator hasn't been customized.)
+ *
+ * @throws mpack_error_type If this node is not a str, bin or ext type
+ * @throws mpack_error_too_big If the size of the data is larger than the
+ *     given maximum size
+ * @throws mpack_error_memory If an allocation failure occurs
+ *
+ * @param node The node from which to allocate and copy data
+ * @param maxsize The maximum size to allocate
+ *
+ * @return The allocated data, or NULL if any error occurs.
+ */
+char* mpack_node_data_alloc(mpack_node_t node, size_t maxsize);
+
+/**
+ * Allocates a new null-terminated string using MPACK_MALLOC() with the string
+ * contained by this node.
+ *
+ * The allocated string must be freed with MPACK_FREE() (or simply free()
+ * if MPack's allocator hasn't been customized.)
+ *
+ * @throws mpack_error_type If this node is not a string or contains NUL bytes
+ * @throws mpack_error_too_big If the size of the string plus null-terminator
+ *     is larger than the given maximum size
+ * @throws mpack_error_memory If an allocation failure occurs
+ *
+ * @param node The node from which to allocate and copy string data
+ * @param maxsize The maximum size to allocate, including the null-terminator
+ *
+ * @return The allocated string, or NULL if any error occurs.
+ */
+char* mpack_node_cstr_alloc(mpack_node_t node, size_t maxsize);
+
+/**
+ * Allocates a new null-terminated string using MPACK_MALLOC() with the UTF-8
+ * string contained by this node.
+ *
+ * The allocated string must be freed with MPACK_FREE() (or simply free()
+ * if MPack's allocator hasn't been customized.)
+ *
+ * @throws mpack_error_type If this node is not a string, is not valid UTF-8,
+ *     or contains NUL bytes
+ * @throws mpack_error_too_big If the size of the string plus null-terminator
+ *     is larger than the given maximum size
+ * @throws mpack_error_memory If an allocation failure occurs
+ *
+ * @param node The node from which to allocate and copy string data
+ * @param maxsize The maximum size to allocate, including the null-terminator
+ *
+ * @return The allocated string, or NULL if any error occurs.
+ */
+char* mpack_node_utf8_cstr_alloc(mpack_node_t node, size_t maxsize);
+#endif
+
+/**
+ * Searches the given string array for a string matching the given
+ * node and returns its index.
+ *
+ * If the node does not match any of the given strings,
+ * @ref mpack_error_type is flagged. Use mpack_node_enum_optional()
+ * if you want to allow values other than the given strings.
+ *
+ * If any error occurs or if the tree is in an error state, @a count
+ * is returned.
+ *
+ * This can be used to quickly parse a string into an enum when the
+ * enum values range from 0 to @a count-1. If the last value in the
+ * enum is a special "count" value, it can be passed as the count,
+ * and the return value can be cast directly to the enum type.
+ *
+ * @code{.c}
+ * typedef enum           { APPLE ,  BANANA ,  ORANGE , COUNT} fruit_t;
+ * const char* fruits[] = {"apple", "banana", "orange"};
+ *
+ * fruit_t fruit = (fruit_t)mpack_node_enum(node, fruits, COUNT);
+ * @endcode
+ *
+ * @param node The node
+ * @param strings An array of expected strings of length count
+ * @param count The number of strings
+ * @return The index of the matched string, or @a count in case of error
+ */
+size_t mpack_node_enum(mpack_node_t node, const char* strings[], size_t count);
+
+/**
+ * Searches the given string array for a string matching the given node,
+ * returning its index or @a count if no strings match.
+ *
+ * If the value is not a string, or it does not match any of the
+ * given strings, @a count is returned and no error is flagged.
+ *
+ * If any error occurs or if the tree is in an error state, @a count
+ * is returned.
+ *
+ * This can be used to quickly parse a string into an enum when the
+ * enum values range from 0 to @a count-1. If the last value in the
+ * enum is a special "count" value, it can be passed as the count,
+ * and the return value can be cast directly to the enum type.
+ *
+ * @code{.c}
+ * typedef enum           { APPLE ,  BANANA ,  ORANGE , COUNT} fruit_t;
+ * const char* fruits[] = {"apple", "banana", "orange"};
+ *
+ * fruit_t fruit = (fruit_t)mpack_node_enum_optional(node, fruits, COUNT);
+ * @endcode
+ *
+ * @param node The node
+ * @param strings An array of expected strings of length count
+ * @param count The number of strings
+ * @return The index of the matched string, or @a count in case of error
+ */
+size_t mpack_node_enum_optional(mpack_node_t node, const char* strings[], size_t count);
+
+/**
+ * @}
+ */
+
+/**
+ * @name Compound Node Functions
+ * @{
+ */
+
+/**
+ * Returns the length of the given array node. Raises mpack_error_type
+ * and returns 0 if the given node is not an array.
+ */
+size_t mpack_node_array_length(mpack_node_t node);
+
+/**
+ * Returns the node in the given array at the given index. If the node
+ * is not an array, @ref mpack_error_type is raised and a nil node is returned.
+ * If the given index is out of bounds, @ref mpack_error_data is raised and
+ * a nil node is returned.
+ */
+mpack_node_t mpack_node_array_at(mpack_node_t node, size_t index);
+
+/**
+ * Returns the number of key/value pairs in the given map node. Raises
+ * mpack_error_type and returns 0 if the given node is not a map.
+ */
+size_t mpack_node_map_count(mpack_node_t node);
+
+/**
+ * Returns the key node in the given map at the given index.
+ *
+ * A nil node is returned in case of error.
+ *
+ * @throws mpack_error_type if the node is not a map
+ * @throws mpack_error_data if the given index is out of bounds
+ */
+mpack_node_t mpack_node_map_key_at(mpack_node_t node, size_t index);
+
+/**
+ * Returns the value node in the given map at the given index.
+ *
+ * A nil node is returned in case of error.
+ *
+ * @throws mpack_error_type if the node is not a map
+ * @throws mpack_error_data if the given index is out of bounds
+ */
+mpack_node_t mpack_node_map_value_at(mpack_node_t node, size_t index);
+
+/**
+ * Returns the value node in the given map for the given integer key.
+ *
+ * The key must exist within the map. Use mpack_node_map_int_optional() to
+ * check for optional keys.
+ *
+ * The key must be unique. An error is flagged if the node has multiple
+ * entries with the given key.
+ *
+ * @throws mpack_error_type If the node is not a map
+ * @throws mpack_error_data If the node does not contain exactly one entry with the given key
+ *
+ * @return The value node for the given key, or a nil node in case of error
+ */
+mpack_node_t mpack_node_map_int(mpack_node_t node, int64_t num);
+
+/**
+ * Returns the value node in the given map for the given integer key, or a
+ * missing node if the map does not contain the given key.
+ *
+ * The key must be unique. An error is flagged if the node has multiple
+ * entries with the given key.
+ *
+ * @throws mpack_error_type If the node is not a map
+ * @throws mpack_error_data If the node contains more than one entry with the given key
+ *
+ * @return The value node for the given key, or a missing node if the key does
+ *         not exist, or a nil node in case of error
+ *
+ * @see mpack_node_is_missing()
+ */
+mpack_node_t mpack_node_map_int_optional(mpack_node_t node, int64_t num);
+
+/**
+ * Returns the value node in the given map for the given unsigned integer key.
+ *
+ * The key must exist within the map. Use mpack_node_map_uint_optional() to
+ * check for optional keys.
+ *
+ * The key must be unique. An error is flagged if the node has multiple
+ * entries with the given key.
+ *
+ * @throws mpack_error_type If the node is not a map
+ * @throws mpack_error_data If the node does not contain exactly one entry with the given key
+ *
+ * @return The value node for the given key, or a nil node in case of error
+ */
+mpack_node_t mpack_node_map_uint(mpack_node_t node, uint64_t num);
+
+/**
+ * Returns the value node in the given map for the given unsigned integer
+ * key, or a missing node if the map does not contain the given key.
+ *
+ * The key must be unique. An error is flagged if the node has multiple
+ * entries with the given key.
+ *
+ * @throws mpack_error_type If the node is not a map
+ * @throws mpack_error_data If the node contains more than one entry with the given key
+ *
+ * @return The value node for the given key, or a missing node if the key does
+ *         not exist, or a nil node in case of error
+ *
+ * @see mpack_node_is_missing()
+ */
+mpack_node_t mpack_node_map_uint_optional(mpack_node_t node, uint64_t num);
+
+/**
+ * Returns the value node in the given map for the given string key.
+ *
+ * The key must exist within the map. Use mpack_node_map_str_optional() to
+ * check for optional keys.
+ *
+ * The key must be unique. An error is flagged if the node has multiple
+ * entries with the given key.
+ *
+ * @throws mpack_error_type If the node is not a map
+ * @throws mpack_error_data If the node does not contain exactly one entry with the given key
+ *
+ * @return The value node for the given key, or a nil node in case of error
+ */
+mpack_node_t mpack_node_map_str(mpack_node_t node, const char* str, size_t length);
+
+/**
+ * Returns the value node in the given map for the given string key, or a missing
+ * node if the map does not contain the given key.
+ *
+ * The key must be unique. An error is flagged if the node has multiple
+ * entries with the given key.
+ *
+ * @throws mpack_error_type If the node is not a map
+ * @throws mpack_error_data If the node contains more than one entry with the given key
+ *
+ * @return The value node for the given key, or a missing node if the key does
+ *         not exist, or a nil node in case of error
+ *
+ * @see mpack_node_is_missing()
+ */
+mpack_node_t mpack_node_map_str_optional(mpack_node_t node, const char* str, size_t length);
+
+/**
+ * Returns the value node in the given map for the given null-terminated
+ * string key.
+ *
+ * The key must exist within the map. Use mpack_node_map_cstr_optional() to
+ * check for optional keys.
+ *
+ * The key must be unique. An error is flagged if the node has multiple
+ * entries with the given key.
+ *
+ * @throws mpack_error_type If the node is not a map
+ * @throws mpack_error_data If the node does not contain exactly one entry with the given key
+ *
+ * @return The value node for the given key, or a nil node in case of error
+ */
+mpack_node_t mpack_node_map_cstr(mpack_node_t node, const char* cstr);
+
+/**
+ * Returns the value node in the given map for the given null-terminated
+ * string key, or a missing node if the map does not contain the given key.
+ *
+ * The key must be unique. An error is flagged if the node has multiple
+ * entries with the given key.
+ *
+ * @throws mpack_error_type If the node is not a map
+ * @throws mpack_error_data If the node contains more than one entry with the given key
+ *
+ * @return The value node for the given key, or a missing node if the key does
+ *         not exist, or a nil node in case of error
+ *
+ * @see mpack_node_is_missing()
+ */
+mpack_node_t mpack_node_map_cstr_optional(mpack_node_t node, const char* cstr);
+
+/**
+ * Returns true if the given node map contains exactly one entry with the
+ * given integer key.
+ *
+ * The key must be unique. An error is flagged if the node has multiple
+ * entries with the given key.
+ *
+ * @throws mpack_error_type If the node is not a map
+ * @throws mpack_error_data If the node contains more than one entry with the given key
+ */
+bool mpack_node_map_contains_int(mpack_node_t node, int64_t num);
+
+/**
+ * Returns true if the given node map contains exactly one entry with the
+ * given unsigned integer key.
+ *
+ * The key must be unique. An error is flagged if the node has multiple
+ * entries with the given key.
+ *
+ * @throws mpack_error_type If the node is not a map
+ * @throws mpack_error_data If the node contains more than one entry with the given key
+ */
+bool mpack_node_map_contains_uint(mpack_node_t node, uint64_t num);
+
+/**
+ * Returns true if the given node map contains exactly one entry with the
+ * given string key.
+ *
+ * The key must be unique. An error is flagged if the node has multiple
+ * entries with the given key.
+ *
+ * @throws mpack_error_type If the node is not a map
+ * @throws mpack_error_data If the node contains more than one entry with the given key
+ */
+bool mpack_node_map_contains_str(mpack_node_t node, const char* str, size_t length);
+
+/**
+ * Returns true if the given node map contains exactly one entry with the
+ * given null-terminated string key.
+ *
+ * The key must be unique. An error is flagged if the node has multiple
+ * entries with the given key.
+ *
+ * @throws mpack_error_type If the node is not a map
+ * @throws mpack_error_data If the node contains more than one entry with the given key
+ */
+bool mpack_node_map_contains_cstr(mpack_node_t node, const char* cstr);
+
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
+
+#endif
+
+MPACK_EXTERN_C_END
+MPACK_SILENCE_WARNINGS_END
+
+#endif
+
+
+#endif
+
diff --git a/vendors/uthash.h b/vendors/uthash.h
new file mode 100644
index 0000000..68693bf
--- /dev/null
+++ b/vendors/uthash.h
@@ -0,0 +1,1140 @@
+/*
+Copyright (c) 2003-2022, Troy D. Hanson  https://troydhanson.github.io/uthash/
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef UTHASH_H
+#define UTHASH_H
+
+#define UTHASH_VERSION 2.3.0
+
+#include <string.h>   /* memcmp, memset, strlen */
+#include <stddef.h>   /* ptrdiff_t */
+#include <stdlib.h>   /* exit */
+
+#if defined(HASH_DEFINE_OWN_STDINT) && HASH_DEFINE_OWN_STDINT
+/* This codepath is provided for backward compatibility, but I plan to remove it. */
+#warning "HASH_DEFINE_OWN_STDINT is deprecated; please use HASH_NO_STDINT instead"
+typedef unsigned int uint32_t;
+typedef unsigned char uint8_t;
+#elif defined(HASH_NO_STDINT) && HASH_NO_STDINT
+#else
+#include <stdint.h>   /* uint8_t, uint32_t */
+#endif
+
+/* These macros use decltype or the earlier __typeof GNU extension.
+   As decltype is only available in newer compilers (VS2010 or gcc 4.3+
+   when compiling c++ source) this code uses whatever method is needed
+   or, for VS2008 where neither is available, uses casting workarounds. */
+#if !defined(DECLTYPE) && !defined(NO_DECLTYPE)
+#if defined(_MSC_VER)   /* MS compiler */
+#if _MSC_VER >= 1600 && defined(__cplusplus)  /* VS2010 or newer in C++ mode */
+#define DECLTYPE(x) (decltype(x))
+#else                   /* VS2008 or older (or VS2010 in C mode) */
+#define NO_DECLTYPE
+#endif
+#elif defined(__MCST__)  /* Elbrus C Compiler */
+#define DECLTYPE(x) (__typeof(x))
+#elif defined(__BORLANDC__) || defined(__ICCARM__) || defined(__LCC__) || defined(__WATCOMC__)
+#define NO_DECLTYPE
+#else                   /* GNU, Sun and other compilers */
+#define DECLTYPE(x) (__typeof(x))
+#endif
+#endif
+
+#ifdef NO_DECLTYPE
+#define DECLTYPE(x)
+#define DECLTYPE_ASSIGN(dst,src)                                                 \
+do {                                                                             \
+  char **_da_dst = (char**)(&(dst));                                             \
+  *_da_dst = (char*)(src);                                                       \
+} while (0)
+#else
+#define DECLTYPE_ASSIGN(dst,src)                                                 \
+do {                                                                             \
+  (dst) = DECLTYPE(dst)(src);                                                    \
+} while (0)
+#endif
+
+#ifndef uthash_malloc
+#define uthash_malloc(sz) malloc(sz)      /* malloc fcn                      */
+#endif
+#ifndef uthash_free
+#define uthash_free(ptr,sz) free(ptr)     /* free fcn                        */
+#endif
+#ifndef uthash_bzero
+#define uthash_bzero(a,n) memset(a,'\0',n)
+#endif
+#ifndef uthash_strlen
+#define uthash_strlen(s) strlen(s)
+#endif
+
+#ifndef HASH_FUNCTION
+#define HASH_FUNCTION(keyptr,keylen,hashv) HASH_JEN(keyptr, keylen, hashv)
+#endif
+
+#ifndef HASH_KEYCMP
+#define HASH_KEYCMP(a,b,n) memcmp(a,b,n)
+#endif
+
+#ifndef uthash_noexpand_fyi
+#define uthash_noexpand_fyi(tbl)          /* can be defined to log noexpand  */
+#endif
+#ifndef uthash_expand_fyi
+#define uthash_expand_fyi(tbl)            /* can be defined to log expands   */
+#endif
+
+#ifndef HASH_NONFATAL_OOM
+#define HASH_NONFATAL_OOM 0
+#endif
+
+#if HASH_NONFATAL_OOM
+/* malloc failures can be recovered from */
+
+#ifndef uthash_nonfatal_oom
+#define uthash_nonfatal_oom(obj) do {} while (0)    /* non-fatal OOM error */
+#endif
+
+#define HASH_RECORD_OOM(oomed) do { (oomed) = 1; } while (0)
+#define IF_HASH_NONFATAL_OOM(x) x
+
+#else
+/* malloc failures result in lost memory, hash tables are unusable */
+
+#ifndef uthash_fatal
+#define uthash_fatal(msg) exit(-1)        /* fatal OOM error */
+#endif
+
+#define HASH_RECORD_OOM(oomed) uthash_fatal("out of memory")
+#define IF_HASH_NONFATAL_OOM(x)
+
+#endif
+
+/* initial number of buckets */
+#define HASH_INITIAL_NUM_BUCKETS 32U     /* initial number of buckets        */
+#define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */
+#define HASH_BKT_CAPACITY_THRESH 10U     /* expand when bucket count reaches */
+
+/* calculate the element whose hash handle address is hhp */
+#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho)))
+/* calculate the hash handle from element address elp */
+#define HH_FROM_ELMT(tbl,elp) ((UT_hash_handle*)(void*)(((char*)(elp)) + ((tbl)->hho)))
+
+#define HASH_ROLLBACK_BKT(hh, head, itemptrhh)                                   \
+do {                                                                             \
+  struct UT_hash_handle *_hd_hh_item = (itemptrhh);                              \
+  unsigned _hd_bkt;                                                              \
+  HASH_TO_BKT(_hd_hh_item->hashv, (head)->hh.tbl->num_buckets, _hd_bkt);         \
+  (head)->hh.tbl->buckets[_hd_bkt].count++;                                      \
+  _hd_hh_item->hh_next = NULL;                                                   \
+  _hd_hh_item->hh_prev = NULL;                                                   \
+} while (0)
+
+#define HASH_VALUE(keyptr,keylen,hashv)                                          \
+do {                                                                             \
+  HASH_FUNCTION(keyptr, keylen, hashv);                                          \
+} while (0)
+
+#define HASH_FIND_BYHASHVALUE(hh,head,keyptr,keylen,hashval,out)                 \
+do {                                                                             \
+  (out) = NULL;                                                                  \
+  if (head) {                                                                    \
+    unsigned _hf_bkt;                                                            \
+    HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _hf_bkt);                  \
+    if (HASH_BLOOM_TEST((head)->hh.tbl, hashval) != 0) {                         \
+      HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], keyptr, keylen, hashval, out); \
+    }                                                                            \
+  }                                                                              \
+} while (0)
+
+#define HASH_FIND(hh,head,keyptr,keylen,out)                                     \
+do {                                                                             \
+  (out) = NULL;                                                                  \
+  if (head) {                                                                    \
+    unsigned _hf_hashv;                                                          \
+    HASH_VALUE(keyptr, keylen, _hf_hashv);                                       \
+    HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, _hf_hashv, out);             \
+  }                                                                              \
+} while (0)
+
+#ifdef HASH_BLOOM
+#define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM)
+#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8UL) + (((HASH_BLOOM_BITLEN%8UL)!=0UL) ? 1UL : 0UL)
+#define HASH_BLOOM_MAKE(tbl,oomed)                                               \
+do {                                                                             \
+  (tbl)->bloom_nbits = HASH_BLOOM;                                               \
+  (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN);                 \
+  if (!(tbl)->bloom_bv) {                                                        \
+    HASH_RECORD_OOM(oomed);                                                      \
+  } else {                                                                       \
+    uthash_bzero((tbl)->bloom_bv, HASH_BLOOM_BYTELEN);                           \
+    (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE;                                     \
+  }                                                                              \
+} while (0)
+
+#define HASH_BLOOM_FREE(tbl)                                                     \
+do {                                                                             \
+  uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN);                              \
+} while (0)
+
+#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8U] |= (1U << ((idx)%8U)))
+#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8U] & (1U << ((idx)%8U)))
+
+#define HASH_BLOOM_ADD(tbl,hashv)                                                \
+  HASH_BLOOM_BITSET((tbl)->bloom_bv, ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U)))
+
+#define HASH_BLOOM_TEST(tbl,hashv)                                               \
+  HASH_BLOOM_BITTEST((tbl)->bloom_bv, ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U)))
+
+#else
+#define HASH_BLOOM_MAKE(tbl,oomed)
+#define HASH_BLOOM_FREE(tbl)
+#define HASH_BLOOM_ADD(tbl,hashv)
+#define HASH_BLOOM_TEST(tbl,hashv) (1)
+#define HASH_BLOOM_BYTELEN 0U
+#endif
+
+#define HASH_MAKE_TABLE(hh,head,oomed)                                           \
+do {                                                                             \
+  (head)->hh.tbl = (UT_hash_table*)uthash_malloc(sizeof(UT_hash_table));         \
+  if (!(head)->hh.tbl) {                                                         \
+    HASH_RECORD_OOM(oomed);                                                      \
+  } else {                                                                       \
+    uthash_bzero((head)->hh.tbl, sizeof(UT_hash_table));                         \
+    (head)->hh.tbl->tail = &((head)->hh);                                        \
+    (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS;                      \
+    (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2;            \
+    (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head);                  \
+    (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc(                    \
+        HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket));               \
+    (head)->hh.tbl->signature = HASH_SIGNATURE;                                  \
+    if (!(head)->hh.tbl->buckets) {                                              \
+      HASH_RECORD_OOM(oomed);                                                    \
+      uthash_free((head)->hh.tbl, sizeof(UT_hash_table));                        \
+    } else {                                                                     \
+      uthash_bzero((head)->hh.tbl->buckets,                                      \
+          HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket));             \
+      HASH_BLOOM_MAKE((head)->hh.tbl, oomed);                                    \
+      IF_HASH_NONFATAL_OOM(                                                      \
+        if (oomed) {                                                             \
+          uthash_free((head)->hh.tbl->buckets,                                   \
+              HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket));           \
+          uthash_free((head)->hh.tbl, sizeof(UT_hash_table));                    \
+        }                                                                        \
+      )                                                                          \
+    }                                                                            \
+  }                                                                              \
+} while (0)
+
+#define HASH_REPLACE_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,replaced,cmpfcn) \
+do {                                                                             \
+  (replaced) = NULL;                                                             \
+  HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \
+  if (replaced) {                                                                \
+    HASH_DELETE(hh, head, replaced);                                             \
+  }                                                                              \
+  HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn); \
+} while (0)
+
+#define HASH_REPLACE_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add,replaced) \
+do {                                                                             \
+  (replaced) = NULL;                                                             \
+  HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \
+  if (replaced) {                                                                \
+    HASH_DELETE(hh, head, replaced);                                             \
+  }                                                                              \
+  HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add); \
+} while (0)
+
+#define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced)                   \
+do {                                                                             \
+  unsigned _hr_hashv;                                                            \
+  HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv);                         \
+  HASH_REPLACE_BYHASHVALUE(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced); \
+} while (0)
+
+#define HASH_REPLACE_INORDER(hh,head,fieldname,keylen_in,add,replaced,cmpfcn)    \
+do {                                                                             \
+  unsigned _hr_hashv;                                                            \
+  HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv);                         \
+  HASH_REPLACE_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced, cmpfcn); \
+} while (0)
+
+#define HASH_APPEND_LIST(hh, head, add)                                          \
+do {                                                                             \
+  (add)->hh.next = NULL;                                                         \
+  (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail);           \
+  (head)->hh.tbl->tail->next = (add);                                            \
+  (head)->hh.tbl->tail = &((add)->hh);                                           \
+} while (0)
+
+#define HASH_AKBI_INNER_LOOP(hh,head,add,cmpfcn)                                 \
+do {                                                                             \
+  do {                                                                           \
+    if (cmpfcn(DECLTYPE(head)(_hs_iter), add) > 0) {                             \
+      break;                                                                     \
+    }                                                                            \
+  } while ((_hs_iter = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->next));           \
+} while (0)
+
+#ifdef NO_DECLTYPE
+#undef HASH_AKBI_INNER_LOOP
+#define HASH_AKBI_INNER_LOOP(hh,head,add,cmpfcn)                                 \
+do {                                                                             \
+  char *_hs_saved_head = (char*)(head);                                          \
+  do {                                                                           \
+    DECLTYPE_ASSIGN(head, _hs_iter);                                             \
+    if (cmpfcn(head, add) > 0) {                                                 \
+      DECLTYPE_ASSIGN(head, _hs_saved_head);                                     \
+      break;                                                                     \
+    }                                                                            \
+    DECLTYPE_ASSIGN(head, _hs_saved_head);                                       \
+  } while ((_hs_iter = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->next));           \
+} while (0)
+#endif
+
+#if HASH_NONFATAL_OOM
+
+#define HASH_ADD_TO_TABLE(hh,head,keyptr,keylen_in,hashval,add,oomed)            \
+do {                                                                             \
+  if (!(oomed)) {                                                                \
+    unsigned _ha_bkt;                                                            \
+    (head)->hh.tbl->num_items++;                                                 \
+    HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt);                  \
+    HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh, oomed);    \
+    if (oomed) {                                                                 \
+      HASH_ROLLBACK_BKT(hh, head, &(add)->hh);                                   \
+      HASH_DELETE_HH(hh, head, &(add)->hh);                                      \
+      (add)->hh.tbl = NULL;                                                      \
+      uthash_nonfatal_oom(add);                                                  \
+    } else {                                                                     \
+      HASH_BLOOM_ADD((head)->hh.tbl, hashval);                                   \
+      HASH_EMIT_KEY(hh, head, keyptr, keylen_in);                                \
+    }                                                                            \
+  } else {                                                                       \
+    (add)->hh.tbl = NULL;                                                        \
+    uthash_nonfatal_oom(add);                                                    \
+  }                                                                              \
+} while (0)
+
+#else
+
+#define HASH_ADD_TO_TABLE(hh,head,keyptr,keylen_in,hashval,add,oomed)            \
+do {                                                                             \
+  unsigned _ha_bkt;                                                              \
+  (head)->hh.tbl->num_items++;                                                   \
+  HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt);                    \
+  HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh, oomed);      \
+  HASH_BLOOM_ADD((head)->hh.tbl, hashval);                                       \
+  HASH_EMIT_KEY(hh, head, keyptr, keylen_in);                                    \
+} while (0)
+
+#endif
+
+
+#define HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh,head,keyptr,keylen_in,hashval,add,cmpfcn) \
+do {                                                                             \
+  IF_HASH_NONFATAL_OOM( int _ha_oomed = 0; )                                     \
+  (add)->hh.hashv = (hashval);                                                   \
+  (add)->hh.key = (char*) (keyptr);                                              \
+  (add)->hh.keylen = (unsigned) (keylen_in);                                     \
+  if (!(head)) {                                                                 \
+    (add)->hh.next = NULL;                                                       \
+    (add)->hh.prev = NULL;                                                       \
+    HASH_MAKE_TABLE(hh, add, _ha_oomed);                                         \
+    IF_HASH_NONFATAL_OOM( if (!_ha_oomed) { )                                    \
+      (head) = (add);                                                            \
+    IF_HASH_NONFATAL_OOM( } )                                                    \
+  } else {                                                                       \
+    void *_hs_iter = (head);                                                     \
+    (add)->hh.tbl = (head)->hh.tbl;                                              \
+    HASH_AKBI_INNER_LOOP(hh, head, add, cmpfcn);                                 \
+    if (_hs_iter) {                                                              \
+      (add)->hh.next = _hs_iter;                                                 \
+      if (((add)->hh.prev = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->prev)) {     \
+        HH_FROM_ELMT((head)->hh.tbl, (add)->hh.prev)->next = (add);              \
+      } else {                                                                   \
+        (head) = (add);                                                          \
+      }                                                                          \
+      HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->prev = (add);                      \
+    } else {                                                                     \
+      HASH_APPEND_LIST(hh, head, add);                                           \
+    }                                                                            \
+  }                                                                              \
+  HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, _ha_oomed);       \
+  HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE_INORDER");                    \
+} while (0)
+
+#define HASH_ADD_KEYPTR_INORDER(hh,head,keyptr,keylen_in,add,cmpfcn)             \
+do {                                                                             \
+  unsigned _hs_hashv;                                                            \
+  HASH_VALUE(keyptr, keylen_in, _hs_hashv);                                      \
+  HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, keyptr, keylen_in, _hs_hashv, add, cmpfcn); \
+} while (0)
+
+#define HASH_ADD_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,cmpfcn) \
+  HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn)
+
+#define HASH_ADD_INORDER(hh,head,fieldname,keylen_in,add,cmpfcn)                 \
+  HASH_ADD_KEYPTR_INORDER(hh, head, &((add)->fieldname), keylen_in, add, cmpfcn)
+
+#define HASH_ADD_KEYPTR_BYHASHVALUE(hh,head,keyptr,keylen_in,hashval,add)        \
+do {                                                                             \
+  IF_HASH_NONFATAL_OOM( int _ha_oomed = 0; )                                     \
+  (add)->hh.hashv = (hashval);                                                   \
+  (add)->hh.key = (const void*) (keyptr);                                        \
+  (add)->hh.keylen = (unsigned) (keylen_in);                                     \
+  if (!(head)) {                                                                 \
+    (add)->hh.next = NULL;                                                       \
+    (add)->hh.prev = NULL;                                                       \
+    HASH_MAKE_TABLE(hh, add, _ha_oomed);                                         \
+    IF_HASH_NONFATAL_OOM( if (!_ha_oomed) { )                                    \
+      (head) = (add);                                                            \
+    IF_HASH_NONFATAL_OOM( } )                                                    \
+  } else {                                                                       \
+    (add)->hh.tbl = (head)->hh.tbl;                                              \
+    HASH_APPEND_LIST(hh, head, add);                                             \
+  }                                                                              \
+  HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, _ha_oomed);       \
+  HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE");                            \
+} while (0)
+
+#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add)                            \
+do {                                                                             \
+  unsigned _ha_hashv;                                                            \
+  HASH_VALUE(keyptr, keylen_in, _ha_hashv);                                      \
+  HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, keyptr, keylen_in, _ha_hashv, add);      \
+} while (0)
+
+#define HASH_ADD_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add)            \
+  HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add)
+
+#define HASH_ADD(hh,head,fieldname,keylen_in,add)                                \
+  HASH_ADD_KEYPTR(hh, head, &((add)->fieldname), keylen_in, add)
+
+#define HASH_TO_BKT(hashv,num_bkts,bkt)                                          \
+do {                                                                             \
+  bkt = ((hashv) & ((num_bkts) - 1U));                                           \
+} while (0)
+
+/* delete "delptr" from the hash table.
+ * "the usual" patch-up process for the app-order doubly-linked-list.
+ * The use of _hd_hh_del below deserves special explanation.
+ * These used to be expressed using (delptr) but that led to a bug
+ * if someone used the same symbol for the head and deletee, like
+ *  HASH_DELETE(hh,users,users);
+ * We want that to work, but by changing the head (users) below
+ * we were forfeiting our ability to further refer to the deletee (users)
+ * in the patch-up process. Solution: use scratch space to
+ * copy the deletee pointer, then the latter references are via that
+ * scratch pointer rather than through the repointed (users) symbol.
+ */
+#define HASH_DELETE(hh,head,delptr)                                              \
+    HASH_DELETE_HH(hh, head, &(delptr)->hh)
+
+#define HASH_DELETE_HH(hh,head,delptrhh)                                         \
+do {                                                                             \
+  const struct UT_hash_handle *_hd_hh_del = (delptrhh);                          \
+  if ((_hd_hh_del->prev == NULL) && (_hd_hh_del->next == NULL)) {                \
+    HASH_BLOOM_FREE((head)->hh.tbl);                                             \
+    uthash_free((head)->hh.tbl->buckets,                                         \
+                (head)->hh.tbl->num_buckets * sizeof(struct UT_hash_bucket));    \
+    uthash_free((head)->hh.tbl, sizeof(UT_hash_table));                          \
+    (head) = NULL;                                                               \
+  } else {                                                                       \
+    unsigned _hd_bkt;                                                            \
+    if (_hd_hh_del == (head)->hh.tbl->tail) {                                    \
+      (head)->hh.tbl->tail = HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev);     \
+    }                                                                            \
+    if (_hd_hh_del->prev != NULL) {                                              \
+      HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev)->next = _hd_hh_del->next;   \
+    } else {                                                                     \
+      DECLTYPE_ASSIGN(head, _hd_hh_del->next);                                   \
+    }                                                                            \
+    if (_hd_hh_del->next != NULL) {                                              \
+      HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->next)->prev = _hd_hh_del->prev;   \
+    }                                                                            \
+    HASH_TO_BKT(_hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt);        \
+    HASH_DEL_IN_BKT((head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del);               \
+    (head)->hh.tbl->num_items--;                                                 \
+  }                                                                              \
+  HASH_FSCK(hh, head, "HASH_DELETE_HH");                                         \
+} while (0)
+
+/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */
+#define HASH_FIND_STR(head,findstr,out)                                          \
+do {                                                                             \
+    unsigned _uthash_hfstr_keylen = (unsigned)uthash_strlen(findstr);            \
+    HASH_FIND(hh, head, findstr, _uthash_hfstr_keylen, out);                     \
+} while (0)
+#define HASH_ADD_STR(head,strfield,add)                                          \
+do {                                                                             \
+    unsigned _uthash_hastr_keylen = (unsigned)uthash_strlen((add)->strfield);    \
+    HASH_ADD(hh, head, strfield[0], _uthash_hastr_keylen, add);                  \
+} while (0)
+#define HASH_REPLACE_STR(head,strfield,add,replaced)                             \
+do {                                                                             \
+    unsigned _uthash_hrstr_keylen = (unsigned)uthash_strlen((add)->strfield);    \
+    HASH_REPLACE(hh, head, strfield[0], _uthash_hrstr_keylen, add, replaced);    \
+} while (0)
+#define HASH_FIND_INT(head,findint,out)                                          \
+    HASH_FIND(hh,head,findint,sizeof(int),out)
+#define HASH_ADD_INT(head,intfield,add)                                          \
+    HASH_ADD(hh,head,intfield,sizeof(int),add)
+#define HASH_REPLACE_INT(head,intfield,add,replaced)                             \
+    HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced)
+#define HASH_FIND_PTR(head,findptr,out)                                          \
+    HASH_FIND(hh,head,findptr,sizeof(void *),out)
+#define HASH_ADD_PTR(head,ptrfield,add)                                          \
+    HASH_ADD(hh,head,ptrfield,sizeof(void *),add)
+#define HASH_REPLACE_PTR(head,ptrfield,add,replaced)                             \
+    HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced)
+#define HASH_DEL(head,delptr)                                                    \
+    HASH_DELETE(hh,head,delptr)
+
+/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined.
+ * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined.
+ */
+#ifdef HASH_DEBUG
+#include <stdio.h>   /* fprintf, stderr */
+#define HASH_OOPS(...) do { fprintf(stderr, __VA_ARGS__); exit(-1); } while (0)
+#define HASH_FSCK(hh,head,where)                                                 \
+do {                                                                             \
+  struct UT_hash_handle *_thh;                                                   \
+  if (head) {                                                                    \
+    unsigned _bkt_i;                                                             \
+    unsigned _count = 0;                                                         \
+    char *_prev;                                                                 \
+    for (_bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; ++_bkt_i) {           \
+      unsigned _bkt_count = 0;                                                   \
+      _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head;                            \
+      _prev = NULL;                                                              \
+      while (_thh) {                                                             \
+        if (_prev != (char*)(_thh->hh_prev)) {                                   \
+          HASH_OOPS("%s: invalid hh_prev %p, actual %p\n",                       \
+              (where), (void*)_thh->hh_prev, (void*)_prev);                      \
+        }                                                                        \
+        _bkt_count++;                                                            \
+        _prev = (char*)(_thh);                                                   \
+        _thh = _thh->hh_next;                                                    \
+      }                                                                          \
+      _count += _bkt_count;                                                      \
+      if ((head)->hh.tbl->buckets[_bkt_i].count !=  _bkt_count) {                \
+        HASH_OOPS("%s: invalid bucket count %u, actual %u\n",                    \
+            (where), (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count);         \
+      }                                                                          \
+    }                                                                            \
+    if (_count != (head)->hh.tbl->num_items) {                                   \
+      HASH_OOPS("%s: invalid hh item count %u, actual %u\n",                     \
+          (where), (head)->hh.tbl->num_items, _count);                           \
+    }                                                                            \
+    _count = 0;                                                                  \
+    _prev = NULL;                                                                \
+    _thh =  &(head)->hh;                                                         \
+    while (_thh) {                                                               \
+      _count++;                                                                  \
+      if (_prev != (char*)_thh->prev) {                                          \
+        HASH_OOPS("%s: invalid prev %p, actual %p\n",                            \
+            (where), (void*)_thh->prev, (void*)_prev);                           \
+      }                                                                          \
+      _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh);                         \
+      _thh = (_thh->next ? HH_FROM_ELMT((head)->hh.tbl, _thh->next) : NULL);     \
+    }                                                                            \
+    if (_count != (head)->hh.tbl->num_items) {                                   \
+      HASH_OOPS("%s: invalid app item count %u, actual %u\n",                    \
+          (where), (head)->hh.tbl->num_items, _count);                           \
+    }                                                                            \
+  }                                                                              \
+} while (0)
+#else
+#define HASH_FSCK(hh,head,where)
+#endif
+
+/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
+ * the descriptor to which this macro is defined for tuning the hash function.
+ * The app can #include <unistd.h> to get the prototype for write(2). */
+#ifdef HASH_EMIT_KEYS
+#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)                                   \
+do {                                                                             \
+  unsigned _klen = fieldlen;                                                     \
+  write(HASH_EMIT_KEYS, &_klen, sizeof(_klen));                                  \
+  write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen);                        \
+} while (0)
+#else
+#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
+#endif
+
+/* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */
+#define HASH_BER(key,keylen,hashv)                                               \
+do {                                                                             \
+  unsigned _hb_keylen = (unsigned)keylen;                                        \
+  const unsigned char *_hb_key = (const unsigned char*)(key);                    \
+  (hashv) = 0;                                                                   \
+  while (_hb_keylen-- != 0U) {                                                   \
+    (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++;                           \
+  }                                                                              \
+} while (0)
+
+
+/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
+ * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
+ * (archive link: https://archive.is/Ivcan )
+ */
+#define HASH_SAX(key,keylen,hashv)                                               \
+do {                                                                             \
+  unsigned _sx_i;                                                                \
+  const unsigned char *_hs_key = (const unsigned char*)(key);                    \
+  hashv = 0;                                                                     \
+  for (_sx_i=0; _sx_i < keylen; _sx_i++) {                                       \
+    hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i];                       \
+  }                                                                              \
+} while (0)
+/* FNV-1a variation */
+#define HASH_FNV(key,keylen,hashv)                                               \
+do {                                                                             \
+  unsigned _fn_i;                                                                \
+  const unsigned char *_hf_key = (const unsigned char*)(key);                    \
+  (hashv) = 2166136261U;                                                         \
+  for (_fn_i=0; _fn_i < keylen; _fn_i++) {                                       \
+    hashv = hashv ^ _hf_key[_fn_i];                                              \
+    hashv = hashv * 16777619U;                                                   \
+  }                                                                              \
+} while (0)
+
+#define HASH_OAT(key,keylen,hashv)                                               \
+do {                                                                             \
+  unsigned _ho_i;                                                                \
+  const unsigned char *_ho_key=(const unsigned char*)(key);                      \
+  hashv = 0;                                                                     \
+  for(_ho_i=0; _ho_i < keylen; _ho_i++) {                                        \
+      hashv += _ho_key[_ho_i];                                                   \
+      hashv += (hashv << 10);                                                    \
+      hashv ^= (hashv >> 6);                                                     \
+  }                                                                              \
+  hashv += (hashv << 3);                                                         \
+  hashv ^= (hashv >> 11);                                                        \
+  hashv += (hashv << 15);                                                        \
+} while (0)
+
+#define HASH_JEN_MIX(a,b,c)                                                      \
+do {                                                                             \
+  a -= b; a -= c; a ^= ( c >> 13 );                                              \
+  b -= c; b -= a; b ^= ( a << 8 );                                               \
+  c -= a; c -= b; c ^= ( b >> 13 );                                              \
+  a -= b; a -= c; a ^= ( c >> 12 );                                              \
+  b -= c; b -= a; b ^= ( a << 16 );                                              \
+  c -= a; c -= b; c ^= ( b >> 5 );                                               \
+  a -= b; a -= c; a ^= ( c >> 3 );                                               \
+  b -= c; b -= a; b ^= ( a << 10 );                                              \
+  c -= a; c -= b; c ^= ( b >> 15 );                                              \
+} while (0)
+
+#define HASH_JEN(key,keylen,hashv)                                               \
+do {                                                                             \
+  unsigned _hj_i,_hj_j,_hj_k;                                                    \
+  unsigned const char *_hj_key=(unsigned const char*)(key);                      \
+  hashv = 0xfeedbeefu;                                                           \
+  _hj_i = _hj_j = 0x9e3779b9u;                                                   \
+  _hj_k = (unsigned)(keylen);                                                    \
+  while (_hj_k >= 12U) {                                                         \
+    _hj_i +=    (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 )                      \
+        + ( (unsigned)_hj_key[2] << 16 )                                         \
+        + ( (unsigned)_hj_key[3] << 24 ) );                                      \
+    _hj_j +=    (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 )                      \
+        + ( (unsigned)_hj_key[6] << 16 )                                         \
+        + ( (unsigned)_hj_key[7] << 24 ) );                                      \
+    hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 )                         \
+        + ( (unsigned)_hj_key[10] << 16 )                                        \
+        + ( (unsigned)_hj_key[11] << 24 ) );                                     \
+                                                                                 \
+     HASH_JEN_MIX(_hj_i, _hj_j, hashv);                                          \
+                                                                                 \
+     _hj_key += 12;                                                              \
+     _hj_k -= 12U;                                                               \
+  }                                                                              \
+  hashv += (unsigned)(keylen);                                                   \
+  switch ( _hj_k ) {                                                             \
+    case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); /* FALLTHROUGH */         \
+    case 10: hashv += ( (unsigned)_hj_key[9] << 16 );  /* FALLTHROUGH */         \
+    case 9:  hashv += ( (unsigned)_hj_key[8] << 8 );   /* FALLTHROUGH */         \
+    case 8:  _hj_j += ( (unsigned)_hj_key[7] << 24 );  /* FALLTHROUGH */         \
+    case 7:  _hj_j += ( (unsigned)_hj_key[6] << 16 );  /* FALLTHROUGH */         \
+    case 6:  _hj_j += ( (unsigned)_hj_key[5] << 8 );   /* FALLTHROUGH */         \
+    case 5:  _hj_j += _hj_key[4];                      /* FALLTHROUGH */         \
+    case 4:  _hj_i += ( (unsigned)_hj_key[3] << 24 );  /* FALLTHROUGH */         \
+    case 3:  _hj_i += ( (unsigned)_hj_key[2] << 16 );  /* FALLTHROUGH */         \
+    case 2:  _hj_i += ( (unsigned)_hj_key[1] << 8 );   /* FALLTHROUGH */         \
+    case 1:  _hj_i += _hj_key[0];                      /* FALLTHROUGH */         \
+    default: ;                                                                   \
+  }                                                                              \
+  HASH_JEN_MIX(_hj_i, _hj_j, hashv);                                             \
+} while (0)
+
+/* The Paul Hsieh hash function */
+#undef get16bits
+#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__)             \
+  || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
+#define get16bits(d) (*((const uint16_t *) (d)))
+#endif
+
+#if !defined (get16bits)
+#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)             \
+                       +(uint32_t)(((const uint8_t *)(d))[0]) )
+#endif
+#define HASH_SFH(key,keylen,hashv)                                               \
+do {                                                                             \
+  unsigned const char *_sfh_key=(unsigned const char*)(key);                     \
+  uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen;                                \
+                                                                                 \
+  unsigned _sfh_rem = _sfh_len & 3U;                                             \
+  _sfh_len >>= 2;                                                                \
+  hashv = 0xcafebabeu;                                                           \
+                                                                                 \
+  /* Main loop */                                                                \
+  for (;_sfh_len > 0U; _sfh_len--) {                                             \
+    hashv    += get16bits (_sfh_key);                                            \
+    _sfh_tmp  = ((uint32_t)(get16bits (_sfh_key+2)) << 11) ^ hashv;              \
+    hashv     = (hashv << 16) ^ _sfh_tmp;                                        \
+    _sfh_key += 2U*sizeof (uint16_t);                                            \
+    hashv    += hashv >> 11;                                                     \
+  }                                                                              \
+                                                                                 \
+  /* Handle end cases */                                                         \
+  switch (_sfh_rem) {                                                            \
+    case 3: hashv += get16bits (_sfh_key);                                       \
+            hashv ^= hashv << 16;                                                \
+            hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)]) << 18;              \
+            hashv += hashv >> 11;                                                \
+            break;                                                               \
+    case 2: hashv += get16bits (_sfh_key);                                       \
+            hashv ^= hashv << 11;                                                \
+            hashv += hashv >> 17;                                                \
+            break;                                                               \
+    case 1: hashv += *_sfh_key;                                                  \
+            hashv ^= hashv << 10;                                                \
+            hashv += hashv >> 1;                                                 \
+            break;                                                               \
+    default: ;                                                                   \
+  }                                                                              \
+                                                                                 \
+  /* Force "avalanching" of final 127 bits */                                    \
+  hashv ^= hashv << 3;                                                           \
+  hashv += hashv >> 5;                                                           \
+  hashv ^= hashv << 4;                                                           \
+  hashv += hashv >> 17;                                                          \
+  hashv ^= hashv << 25;                                                          \
+  hashv += hashv >> 6;                                                           \
+} while (0)
+
+/* iterate over items in a known bucket to find desired item */
+#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,hashval,out)               \
+do {                                                                             \
+  if ((head).hh_head != NULL) {                                                  \
+    DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (head).hh_head));                     \
+  } else {                                                                       \
+    (out) = NULL;                                                                \
+  }                                                                              \
+  while ((out) != NULL) {                                                        \
+    if ((out)->hh.hashv == (hashval) && (out)->hh.keylen == (keylen_in)) {       \
+      if (HASH_KEYCMP((out)->hh.key, keyptr, keylen_in) == 0) {                  \
+        break;                                                                   \
+      }                                                                          \
+    }                                                                            \
+    if ((out)->hh.hh_next != NULL) {                                             \
+      DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (out)->hh.hh_next));                \
+    } else {                                                                     \
+      (out) = NULL;                                                              \
+    }                                                                            \
+  }                                                                              \
+} while (0)
+
+/* add an item to a bucket  */
+#define HASH_ADD_TO_BKT(head,hh,addhh,oomed)                                     \
+do {                                                                             \
+  UT_hash_bucket *_ha_head = &(head);                                            \
+  _ha_head->count++;                                                             \
+  (addhh)->hh_next = _ha_head->hh_head;                                          \
+  (addhh)->hh_prev = NULL;                                                       \
+  if (_ha_head->hh_head != NULL) {                                               \
+    _ha_head->hh_head->hh_prev = (addhh);                                        \
+  }                                                                              \
+  _ha_head->hh_head = (addhh);                                                   \
+  if ((_ha_head->count >= ((_ha_head->expand_mult + 1U) * HASH_BKT_CAPACITY_THRESH)) \
+      && !(addhh)->tbl->noexpand) {                                              \
+    HASH_EXPAND_BUCKETS(addhh,(addhh)->tbl, oomed);                              \
+    IF_HASH_NONFATAL_OOM(                                                        \
+      if (oomed) {                                                               \
+        HASH_DEL_IN_BKT(head,addhh);                                             \
+      }                                                                          \
+    )                                                                            \
+  }                                                                              \
+} while (0)
+
+/* remove an item from a given bucket */
+#define HASH_DEL_IN_BKT(head,delhh)                                              \
+do {                                                                             \
+  UT_hash_bucket *_hd_head = &(head);                                            \
+  _hd_head->count--;                                                             \
+  if (_hd_head->hh_head == (delhh)) {                                            \
+    _hd_head->hh_head = (delhh)->hh_next;                                        \
+  }                                                                              \
+  if ((delhh)->hh_prev) {                                                        \
+    (delhh)->hh_prev->hh_next = (delhh)->hh_next;                                \
+  }                                                                              \
+  if ((delhh)->hh_next) {                                                        \
+    (delhh)->hh_next->hh_prev = (delhh)->hh_prev;                                \
+  }                                                                              \
+} while (0)
+
+/* Bucket expansion has the effect of doubling the number of buckets
+ * and redistributing the items into the new buckets. Ideally the
+ * items will distribute more or less evenly into the new buckets
+ * (the extent to which this is true is a measure of the quality of
+ * the hash function as it applies to the key domain).
+ *
+ * With the items distributed into more buckets, the chain length
+ * (item count) in each bucket is reduced. Thus by expanding buckets
+ * the hash keeps a bound on the chain length. This bounded chain
+ * length is the essence of how a hash provides constant time lookup.
+ *
+ * The calculation of tbl->ideal_chain_maxlen below deserves some
+ * explanation. First, keep in mind that we're calculating the ideal
+ * maximum chain length based on the *new* (doubled) bucket count.
+ * In fractions this is just n/b (n=number of items,b=new num buckets).
+ * Since the ideal chain length is an integer, we want to calculate
+ * ceil(n/b). We don't depend on floating point arithmetic in this
+ * hash, so to calculate ceil(n/b) with integers we could write
+ *
+ *      ceil(n/b) = (n/b) + ((n%b)?1:0)
+ *
+ * and in fact a previous version of this hash did just that.
+ * But now we have improved things a bit by recognizing that b is
+ * always a power of two. We keep its base 2 log handy (call it lb),
+ * so now we can write this with a bit shift and logical AND:
+ *
+ *      ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
+ *
+ */
+#define HASH_EXPAND_BUCKETS(hh,tbl,oomed)                                        \
+do {                                                                             \
+  unsigned _he_bkt;                                                              \
+  unsigned _he_bkt_i;                                                            \
+  struct UT_hash_handle *_he_thh, *_he_hh_nxt;                                   \
+  UT_hash_bucket *_he_new_buckets, *_he_newbkt;                                  \
+  _he_new_buckets = (UT_hash_bucket*)uthash_malloc(                              \
+           sizeof(struct UT_hash_bucket) * (tbl)->num_buckets * 2U);             \
+  if (!_he_new_buckets) {                                                        \
+    HASH_RECORD_OOM(oomed);                                                      \
+  } else {                                                                       \
+    uthash_bzero(_he_new_buckets,                                                \
+        sizeof(struct UT_hash_bucket) * (tbl)->num_buckets * 2U);                \
+    (tbl)->ideal_chain_maxlen =                                                  \
+       ((tbl)->num_items >> ((tbl)->log2_num_buckets+1U)) +                      \
+       ((((tbl)->num_items & (((tbl)->num_buckets*2U)-1U)) != 0U) ? 1U : 0U);    \
+    (tbl)->nonideal_items = 0;                                                   \
+    for (_he_bkt_i = 0; _he_bkt_i < (tbl)->num_buckets; _he_bkt_i++) {           \
+      _he_thh = (tbl)->buckets[ _he_bkt_i ].hh_head;                             \
+      while (_he_thh != NULL) {                                                  \
+        _he_hh_nxt = _he_thh->hh_next;                                           \
+        HASH_TO_BKT(_he_thh->hashv, (tbl)->num_buckets * 2U, _he_bkt);           \
+        _he_newbkt = &(_he_new_buckets[_he_bkt]);                                \
+        if (++(_he_newbkt->count) > (tbl)->ideal_chain_maxlen) {                 \
+          (tbl)->nonideal_items++;                                               \
+          if (_he_newbkt->count > _he_newbkt->expand_mult * (tbl)->ideal_chain_maxlen) { \
+            _he_newbkt->expand_mult++;                                           \
+          }                                                                      \
+        }                                                                        \
+        _he_thh->hh_prev = NULL;                                                 \
+        _he_thh->hh_next = _he_newbkt->hh_head;                                  \
+        if (_he_newbkt->hh_head != NULL) {                                       \
+          _he_newbkt->hh_head->hh_prev = _he_thh;                                \
+        }                                                                        \
+        _he_newbkt->hh_head = _he_thh;                                           \
+        _he_thh = _he_hh_nxt;                                                    \
+      }                                                                          \
+    }                                                                            \
+    uthash_free((tbl)->buckets, (tbl)->num_buckets * sizeof(struct UT_hash_bucket)); \
+    (tbl)->num_buckets *= 2U;                                                    \
+    (tbl)->log2_num_buckets++;                                                   \
+    (tbl)->buckets = _he_new_buckets;                                            \
+    (tbl)->ineff_expands = ((tbl)->nonideal_items > ((tbl)->num_items >> 1)) ?   \
+        ((tbl)->ineff_expands+1U) : 0U;                                          \
+    if ((tbl)->ineff_expands > 1U) {                                             \
+      (tbl)->noexpand = 1;                                                       \
+      uthash_noexpand_fyi(tbl);                                                  \
+    }                                                                            \
+    uthash_expand_fyi(tbl);                                                      \
+  }                                                                              \
+} while (0)
+
+
+/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
+/* Note that HASH_SORT assumes the hash handle name to be hh.
+ * HASH_SRT was added to allow the hash handle name to be passed in. */
+#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
+#define HASH_SRT(hh,head,cmpfcn)                                                 \
+do {                                                                             \
+  unsigned _hs_i;                                                                \
+  unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize;               \
+  struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail;            \
+  if (head != NULL) {                                                            \
+    _hs_insize = 1;                                                              \
+    _hs_looping = 1;                                                             \
+    _hs_list = &((head)->hh);                                                    \
+    while (_hs_looping != 0U) {                                                  \
+      _hs_p = _hs_list;                                                          \
+      _hs_list = NULL;                                                           \
+      _hs_tail = NULL;                                                           \
+      _hs_nmerges = 0;                                                           \
+      while (_hs_p != NULL) {                                                    \
+        _hs_nmerges++;                                                           \
+        _hs_q = _hs_p;                                                           \
+        _hs_psize = 0;                                                           \
+        for (_hs_i = 0; _hs_i < _hs_insize; ++_hs_i) {                           \
+          _hs_psize++;                                                           \
+          _hs_q = ((_hs_q->next != NULL) ?                                       \
+            HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL);                   \
+          if (_hs_q == NULL) {                                                   \
+            break;                                                               \
+          }                                                                      \
+        }                                                                        \
+        _hs_qsize = _hs_insize;                                                  \
+        while ((_hs_psize != 0U) || ((_hs_qsize != 0U) && (_hs_q != NULL))) {    \
+          if (_hs_psize == 0U) {                                                 \
+            _hs_e = _hs_q;                                                       \
+            _hs_q = ((_hs_q->next != NULL) ?                                     \
+              HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL);                 \
+            _hs_qsize--;                                                         \
+          } else if ((_hs_qsize == 0U) || (_hs_q == NULL)) {                     \
+            _hs_e = _hs_p;                                                       \
+            if (_hs_p != NULL) {                                                 \
+              _hs_p = ((_hs_p->next != NULL) ?                                   \
+                HH_FROM_ELMT((head)->hh.tbl, _hs_p->next) : NULL);               \
+            }                                                                    \
+            _hs_psize--;                                                         \
+          } else if ((cmpfcn(                                                    \
+                DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl, _hs_p)),             \
+                DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl, _hs_q))              \
+                )) <= 0) {                                                       \
+            _hs_e = _hs_p;                                                       \
+            if (_hs_p != NULL) {                                                 \
+              _hs_p = ((_hs_p->next != NULL) ?                                   \
+                HH_FROM_ELMT((head)->hh.tbl, _hs_p->next) : NULL);               \
+            }                                                                    \
+            _hs_psize--;                                                         \
+          } else {                                                               \
+            _hs_e = _hs_q;                                                       \
+            _hs_q = ((_hs_q->next != NULL) ?                                     \
+              HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL);                 \
+            _hs_qsize--;                                                         \
+          }                                                                      \
+          if ( _hs_tail != NULL ) {                                              \
+            _hs_tail->next = ((_hs_e != NULL) ?                                  \
+              ELMT_FROM_HH((head)->hh.tbl, _hs_e) : NULL);                       \
+          } else {                                                               \
+            _hs_list = _hs_e;                                                    \
+          }                                                                      \
+          if (_hs_e != NULL) {                                                   \
+            _hs_e->prev = ((_hs_tail != NULL) ?                                  \
+              ELMT_FROM_HH((head)->hh.tbl, _hs_tail) : NULL);                    \
+          }                                                                      \
+          _hs_tail = _hs_e;                                                      \
+        }                                                                        \
+        _hs_p = _hs_q;                                                           \
+      }                                                                          \
+      if (_hs_tail != NULL) {                                                    \
+        _hs_tail->next = NULL;                                                   \
+      }                                                                          \
+      if (_hs_nmerges <= 1U) {                                                   \
+        _hs_looping = 0;                                                         \
+        (head)->hh.tbl->tail = _hs_tail;                                         \
+        DECLTYPE_ASSIGN(head, ELMT_FROM_HH((head)->hh.tbl, _hs_list));           \
+      }                                                                          \
+      _hs_insize *= 2U;                                                          \
+    }                                                                            \
+    HASH_FSCK(hh, head, "HASH_SRT");                                             \
+  }                                                                              \
+} while (0)
+
+/* This function selects items from one hash into another hash.
+ * The end result is that the selected items have dual presence
+ * in both hashes. There is no copy of the items made; rather
+ * they are added into the new hash through a secondary hash
+ * hash handle that must be present in the structure. */
+#define HASH_SELECT(hh_dst, dst, hh_src, src, cond)                              \
+do {                                                                             \
+  unsigned _src_bkt, _dst_bkt;                                                   \
+  void *_last_elt = NULL, *_elt;                                                 \
+  UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL;                         \
+  ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst));                 \
+  if ((src) != NULL) {                                                           \
+    for (_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) {    \
+      for (_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head;               \
+        _src_hh != NULL;                                                         \
+        _src_hh = _src_hh->hh_next) {                                            \
+        _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh);                         \
+        if (cond(_elt)) {                                                        \
+          IF_HASH_NONFATAL_OOM( int _hs_oomed = 0; )                             \
+          _dst_hh = (UT_hash_handle*)(void*)(((char*)_elt) + _dst_hho);          \
+          _dst_hh->key = _src_hh->key;                                           \
+          _dst_hh->keylen = _src_hh->keylen;                                     \
+          _dst_hh->hashv = _src_hh->hashv;                                       \
+          _dst_hh->prev = _last_elt;                                             \
+          _dst_hh->next = NULL;                                                  \
+          if (_last_elt_hh != NULL) {                                            \
+            _last_elt_hh->next = _elt;                                           \
+          }                                                                      \
+          if ((dst) == NULL) {                                                   \
+            DECLTYPE_ASSIGN(dst, _elt);                                          \
+            HASH_MAKE_TABLE(hh_dst, dst, _hs_oomed);                             \
+            IF_HASH_NONFATAL_OOM(                                                \
+              if (_hs_oomed) {                                                   \
+                uthash_nonfatal_oom(_elt);                                       \
+                (dst) = NULL;                                                    \
+                continue;                                                        \
+              }                                                                  \
+            )                                                                    \
+          } else {                                                               \
+            _dst_hh->tbl = (dst)->hh_dst.tbl;                                    \
+          }                                                                      \
+          HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt);      \
+          HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt], hh_dst, _dst_hh, _hs_oomed); \
+          (dst)->hh_dst.tbl->num_items++;                                        \
+          IF_HASH_NONFATAL_OOM(                                                  \
+            if (_hs_oomed) {                                                     \
+              HASH_ROLLBACK_BKT(hh_dst, dst, _dst_hh);                           \
+              HASH_DELETE_HH(hh_dst, dst, _dst_hh);                              \
+              _dst_hh->tbl = NULL;                                               \
+              uthash_nonfatal_oom(_elt);                                         \
+              continue;                                                          \
+            }                                                                    \
+          )                                                                      \
+          HASH_BLOOM_ADD(_dst_hh->tbl, _dst_hh->hashv);                          \
+          _last_elt = _elt;                                                      \
+          _last_elt_hh = _dst_hh;                                                \
+        }                                                                        \
+      }                                                                          \
+    }                                                                            \
+  }                                                                              \
+  HASH_FSCK(hh_dst, dst, "HASH_SELECT");                                         \
+} while (0)
+
+#define HASH_CLEAR(hh,head)                                                      \
+do {                                                                             \
+  if ((head) != NULL) {                                                          \
+    HASH_BLOOM_FREE((head)->hh.tbl);                                             \
+    uthash_free((head)->hh.tbl->buckets,                                         \
+                (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket));      \
+    uthash_free((head)->hh.tbl, sizeof(UT_hash_table));                          \
+    (head) = NULL;                                                               \
+  }                                                                              \
+} while (0)
+
+#define HASH_OVERHEAD(hh,head)                                                   \
+ (((head) != NULL) ? (                                                           \
+ (size_t)(((head)->hh.tbl->num_items   * sizeof(UT_hash_handle))   +             \
+          ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket))   +             \
+           sizeof(UT_hash_table)                                   +             \
+           (HASH_BLOOM_BYTELEN))) : 0U)
+
+#ifdef NO_DECLTYPE
+#define HASH_ITER(hh,head,el,tmp)                                                \
+for(((el)=(head)), ((*(char**)(&(tmp)))=(char*)((head!=NULL)?(head)->hh.next:NULL)); \
+  (el) != NULL; ((el)=(tmp)), ((*(char**)(&(tmp)))=(char*)((tmp!=NULL)?(tmp)->hh.next:NULL)))
+#else
+#define HASH_ITER(hh,head,el,tmp)                                                \
+for(((el)=(head)), ((tmp)=DECLTYPE(el)((head!=NULL)?(head)->hh.next:NULL));      \
+  (el) != NULL; ((el)=(tmp)), ((tmp)=DECLTYPE(el)((tmp!=NULL)?(tmp)->hh.next:NULL)))
+#endif
+
+/* obtain a count of items in the hash */
+#define HASH_COUNT(head) HASH_CNT(hh,head)
+#define HASH_CNT(hh,head) ((head != NULL)?((head)->hh.tbl->num_items):0U)
+
+typedef struct UT_hash_bucket {
+   struct UT_hash_handle *hh_head;
+   unsigned count;
+
+   /* expand_mult is normally set to 0. In this situation, the max chain length
+    * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
+    * the bucket's chain exceeds this length, bucket expansion is triggered).
+    * However, setting expand_mult to a non-zero value delays bucket expansion
+    * (that would be triggered by additions to this particular bucket)
+    * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
+    * (The multiplier is simply expand_mult+1). The whole idea of this
+    * multiplier is to reduce bucket expansions, since they are expensive, in
+    * situations where we know that a particular bucket tends to be overused.
+    * It is better to let its chain length grow to a longer yet-still-bounded
+    * value, than to do an O(n) bucket expansion too often.
+    */
+   unsigned expand_mult;
+
+} UT_hash_bucket;
+
+/* random signature used only to find hash tables in external analysis */
+#define HASH_SIGNATURE 0xa0111fe1u
+#define HASH_BLOOM_SIGNATURE 0xb12220f2u
+
+typedef struct UT_hash_table {
+   UT_hash_bucket *buckets;
+   unsigned num_buckets, log2_num_buckets;
+   unsigned num_items;
+   struct UT_hash_handle *tail; /* tail hh in app order, for fast append    */
+   ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */
+
+   /* in an ideal situation (all buckets used equally), no bucket would have
+    * more than ceil(#items/#buckets) items. that's the ideal chain length. */
+   unsigned ideal_chain_maxlen;
+
+   /* nonideal_items is the number of items in the hash whose chain position
+    * exceeds the ideal chain maxlen. these items pay the penalty for an uneven
+    * hash distribution; reaching them in a chain traversal takes >ideal steps */
+   unsigned nonideal_items;
+
+   /* ineffective expands occur when a bucket doubling was performed, but
+    * afterward, more than half the items in the hash had nonideal chain
+    * positions. If this happens on two consecutive expansions we inhibit any
+    * further expansion, as it's not helping; this happens when the hash
+    * function isn't a good fit for the key domain. When expansion is inhibited
+    * the hash will still work, albeit no longer in constant time. */
+   unsigned ineff_expands, noexpand;
+
+   uint32_t signature; /* used only to find hash tables in external analysis */
+#ifdef HASH_BLOOM
+   uint32_t bloom_sig; /* used only to test bloom exists in external analysis */
+   uint8_t *bloom_bv;
+   uint8_t bloom_nbits;
+#endif
+
+} UT_hash_table;
+
+typedef struct UT_hash_handle {
+   struct UT_hash_table *tbl;
+   void *prev;                       /* prev element in app order      */
+   void *next;                       /* next element in app order      */
+   struct UT_hash_handle *hh_prev;   /* previous hh in bucket order    */
+   struct UT_hash_handle *hh_next;   /* next hh in bucket order        */
+   const void *key;                  /* ptr to enclosing struct's key  */
+   unsigned keylen;                  /* enclosing struct's key len     */
+   unsigned hashv;                   /* result of hash-fcn(key)        */
+} UT_hash_handle;
+
+#endif /* UTHASH_H */
diff --git a/vendors/xxhash/LICENSE b/vendors/xxhash/LICENSE
new file mode 100644
index 0000000..6bc30a1
--- /dev/null
+++ b/vendors/xxhash/LICENSE
@@ -0,0 +1,26 @@
+xxHash Library
+Copyright (c) 2012-2020 Yann Collet
+All rights reserved.
+
+BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendors/xxhash/README.md b/vendors/xxhash/README.md
new file mode 100644
index 0000000..2406c8d
--- /dev/null
+++ b/vendors/xxhash/README.md
@@ -0,0 +1,236 @@
+
+xxHash - Extremely fast hash algorithm
+======================================
+
+xxHash is an Extremely fast Hash algorithm, running at RAM speed limits.
+It successfully completes the [SMHasher](https://code.google.com/p/smhasher/wiki/SMHasher) test suite
+which evaluates collision, dispersion and randomness qualities of hash functions.
+Code is highly portable, and hashes are identical across all platforms (little / big endian).
+
+|Branch      |Status   |
+|------------|---------|
+|dev         | [![Build Status](https://github.com/Cyan4973/xxHash/actions/workflows/ci.yml/badge.svg?branch=dev)](https://github.com/Cyan4973/xxHash/actions?query=branch%3Adev+) |
+
+
+Benchmarks
+-------------------------
+
+The reference system uses an Intel i7-9700K cpu, and runs Ubuntu x64 20.04.
+The [open source benchmark program] is compiled with `clang` v10.0 using `-O3` flag.
+
+| Hash Name     | Width | Bandwidth (GB/s) | Small Data Velocity | Quality | Comment |
+| ---------     | ----- | ---------------- | ----- | --- | --- |
+| __XXH3__ (SSE2) |  64 | 31.5 GB/s        | 133.1 | 10
+| __XXH128__ (SSE2) | 128 | 29.6 GB/s      | 118.1 | 10
+| _RAM sequential read_ | N/A | 28.0 GB/s  |   N/A | N/A | _for reference_
+| City64        |    64 | 22.0 GB/s        |  76.6 | 10
+| T1ha2         |    64 | 22.0 GB/s        |  99.0 |  9 | Slightly worse [collisions]
+| City128       |   128 | 21.7 GB/s        |  57.7 | 10
+| __XXH64__     |    64 | 19.4 GB/s        |  71.0 | 10
+| SpookyHash    |    64 | 19.3 GB/s        |  53.2 | 10
+| Mum           |    64 | 18.0 GB/s        |  67.0 |  9 | Slightly worse [collisions]
+| __XXH32__     |    32 |  9.7 GB/s        |  71.9 | 10
+| City32        |    32 |  9.1 GB/s        |  66.0 | 10
+| Murmur3       |    32 |  3.9 GB/s        |  56.1 | 10
+| SipHash       |    64 |  3.0 GB/s        |  43.2 | 10
+| FNV64         |    64 |  1.2 GB/s        |  62.7 |  5 | Poor avalanche properties
+| Blake2        |   256 |  1.1 GB/s        |   5.1 | 10 | Cryptographic
+| SHA1          |   160 |  0.8 GB/s        |   5.6 | 10 | Cryptographic but broken
+| MD5           |   128 |  0.6 GB/s        |   7.8 | 10 | Cryptographic but broken
+
+[open source benchmark program]: https://github.com/Cyan4973/xxHash/tree/release/tests/bench
+[collisions]: https://github.com/Cyan4973/xxHash/wiki/Collision-ratio-comparison#collision-study
+
+note 1: Small data velocity is a _rough_ evaluation of algorithm's efficiency on small data. For more detailed analysis, please refer to next paragraph.
+
+note 2: some algorithms feature _faster than RAM_ speed. In which case, they can only reach their full speed when input data is already in CPU cache (L3 or better). Otherwise, they max out on RAM speed limit.
+
+### Small data
+
+Performance on large data is only one part of the picture.
+Hashing is also very useful in constructions like hash tables and bloom filters.
+In these use cases, it's frequent to hash a lot of small data (starting at a few bytes).
+Algorithm's performance can be very different for such scenarios, since parts of the algorithm,
+such as initialization or finalization, become fixed cost.
+The impact of branch mis-prediction also becomes much more present.
+
+XXH3 has been designed for excellent performance on both long and small inputs,
+which can be observed in the following graph:
+
+![XXH3, latency, random size](https://user-images.githubusercontent.com/750081/61976089-aedeab00-af9f-11e9-9239-e5375d6c080f.png)
+
+For a more detailed analysis, visit the wiki :
+https://github.com/Cyan4973/xxHash/wiki/Performance-comparison#benchmarks-concentrating-on-small-data-
+
+Quality
+-------------------------
+
+Speed is not the only property that matters.
+Produced hash values must respect excellent dispersion and randomness properties,
+so that any sub-section of it can be used to maximally spread out a table or index,
+as well as reduce the amount of collisions to the minimal theoretical level, following the [birthday paradox].
+
+`xxHash` has been tested with Austin Appleby's excellent SMHasher test suite,
+and passes all tests, ensuring reasonable quality levels.
+It also passes extended tests from [newer forks of SMHasher], featuring additional scenarios and conditions.
+
+Finally, xxHash provides its own [massive collision tester](https://github.com/Cyan4973/xxHash/tree/dev/tests/collisions),
+able to generate and compare billions of hashes to test the limits of 64-bit hash algorithms.
+On this front too, xxHash features good results, in line with the [birthday paradox].
+A more detailed analysis is documented [in the wiki](https://github.com/Cyan4973/xxHash/wiki/Collision-ratio-comparison).
+
+[birthday paradox]: https://en.wikipedia.org/wiki/Birthday_problem
+[newer forks of SMHasher]: https://github.com/rurban/smhasher
+
+
+### Build modifiers
+
+The following macros can be set at compilation time to modify libxxhash's behavior. They are generally disabled by default.
+
+- `XXH_INLINE_ALL`: Make all functions `inline`, with implementations being directly included within `xxhash.h`.
+                    Inlining functions is beneficial for speed on small keys.
+                    It's _extremely effective_ when key length is expressed as _a compile time constant_,
+                    with performance improvements observed in the +200% range .
+                    See [this article](https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html) for details.
+- `XXH_PRIVATE_API`: same outcome as `XXH_INLINE_ALL`. Still available for legacy support.
+                     The name underlines that `XXH_*` symbols will not be exported.
+- `XXH_NAMESPACE`: Prefixes all symbols with the value of `XXH_NAMESPACE`.
+                   This macro can only use compilable character set.
+                   Useful to evade symbol naming collisions,
+                   in case of multiple inclusions of xxHash's source code.
+                   Client applications still use the regular function names,
+                   as symbols are automatically translated through `xxhash.h`.
+- `XXH_FORCE_MEMORY_ACCESS`: The default method `0` uses a portable `memcpy()` notation.
+                             Method `1` uses a gcc-specific `packed` attribute, which can provide better performance for some targets.
+                             Method `2` forces unaligned reads, which is not standards compliant, but might sometimes be the only way to extract better read performance.
+                             Method `3` uses a byteshift operation, which is best for old compilers which don't inline `memcpy()` or big-endian systems without a byteswap instruction
+- `XXH_FORCE_ALIGN_CHECK`: Use a faster direct read path when input is aligned.
+                           This option can result in dramatic performance improvement when input to hash is aligned on 32 or 64-bit boundaries,
+                           when running on architectures unable to load memory from unaligned addresses, or suffering a performance penalty from it.
+                           It is (slightly) detrimental on platform with good unaligned memory access performance (same instruction for both aligned and unaligned accesses).
+                           This option is automatically disabled on `x86`, `x64` and `aarch64`, and enabled on all other platforms.
+- `XXH_VECTOR` : manually select a vector instruction set (default: auto-selected at compilation time). Available instruction sets are `XXH_SCALAR`, `XXH_SSE2`, `XXH_AVX2`, `XXH_AVX512`, `XXH_NEON` and `XXH_VSX`. Compiler may require additional flags to ensure proper support (for example, `gcc` on linux will require `-mavx2` for AVX2, and `-mavx512f` for AVX512).
+- `XXH_NO_PREFETCH` : disable prefetching. Some platforms or situations may perform better without prefetching. XXH3 only.
+- `XXH_PREFETCH_DIST` : select prefetching distance. For close-to-metal adaptation to specific hardware platforms. XXH3 only.
+- `XXH_NO_INLINE_HINTS`: By default, xxHash uses `__attribute__((always_inline))` and `__forceinline` to improve performance at the cost of code size.
+                         Defining this macro to 1 will mark all internal functions as `static`, allowing the compiler to decide whether to inline a function or not.
+                         This is very useful when optimizing for smallest binary size,
+                         and is automatically defined when compiling with `-O0`, `-Os`, `-Oz`, or `-fno-inline` on GCC and Clang.
+                         This may also increase performance depending on compiler and architecture.
+- `XXH32_ENDJMP`: Switch multi-branch finalization stage of XXH32 by a single jump.
+                  This is generally undesirable for performance, especially when hashing inputs of random sizes.
+                  But depending on exact architecture and compiler, a jump might provide slightly better performance on small inputs. Disabled by default.
+- `XXH_STATIC_LINKING_ONLY`: gives access to internal state declaration, required for static allocation.
+                             Incompatible with dynamic linking, due to risks of ABI changes.
+- `XXH_NO_XXH3` : removes symbols related to `XXH3` (both 64 & 128 bits) from generated binary.
+                  Useful to reduce binary size, notably for applications which do not use `XXH3`.
+- `XXH_NO_LONG_LONG`: removes compilation of algorithms relying on 64-bit types (XXH3 and XXH64). Only XXH32 will be compiled.
+                      Useful for targets (architectures and compilers) without 64-bit support.
+- `XXH_IMPORT`: MSVC specific: should only be defined for dynamic linking, as it prevents linkage errors.
+- `XXH_CPU_LITTLE_ENDIAN`: By default, endianness is determined by a runtime test resolved at compile time.
+                           If, for some reason, the compiler cannot simplify the runtime test, it can cost performance.
+                           It's possible to skip auto-detection and simply state that the architecture is little-endian by setting this macro to 1.
+                           Setting it to 0 states big-endian.
+- `XXH_DEBUGLEVEL` : When set to any value >= 1, enables `assert()` statements.
+                     This (slightly) slows down execution, but may help finding bugs during debugging sessions.
+
+When compiling the Command Line Interface `xxhsum` with `make`, the following environment variables can also be set :
+- `DISPATCH=1` : use `xxh_x86dispatch.c`, to automatically select between `scalar`, `sse2`, `avx2` or `avx512` instruction set at runtime, depending on local host. This option is only valid for `x86`/`x64` systems.
+
+
+### Building xxHash - Using vcpkg
+
+You can download and install xxHash using the [vcpkg](https://github.com/Microsoft/vcpkg) dependency manager:
+
+    git clone https://github.com/Microsoft/vcpkg.git
+    cd vcpkg
+    ./bootstrap-vcpkg.sh
+    ./vcpkg integrate install
+    ./vcpkg install xxhash
+
+The xxHash port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
+
+
+### Example
+
+The simplest example calls xxhash 64-bit variant as a one-shot function
+generating a hash value from a single buffer, and invoked from a C/C++ program:
+
+```C
+#include "xxhash.h"
+
+    (...)
+    XXH64_hash_t hash = XXH64(buffer, size, seed);
+}
+```
+
+Streaming variant is more involved, but makes it possible to provide data incrementally:
+
+```C
+#include "stdlib.h"   /* abort() */
+#include "xxhash.h"
+
+
+XXH64_hash_t calcul_hash_streaming(FileHandler fh)
+{
+    /* create a hash state */
+    XXH64_state_t* const state = XXH64_createState();
+    if (state==NULL) abort();
+
+    size_t const bufferSize = SOME_SIZE;
+    void* const buffer = malloc(bufferSize);
+    if (buffer==NULL) abort();
+
+    /* Initialize state with selected seed */
+    XXH64_hash_t const seed = 0;   /* or any other value */
+    if (XXH64_reset(state, seed) == XXH_ERROR) abort();
+
+    /* Feed the state with input data, any size, any number of times */
+    (...)
+    while ( /* some data left */ ) {
+        size_t const length = get_more_data(buffer, bufferSize, fh);
+        if (XXH64_update(state, buffer, length) == XXH_ERROR) abort();
+        (...)
+    }
+    (...)
+
+    /* Produce the final hash value */
+    XXH64_hash_t const hash = XXH64_digest(state);
+
+    /* State could be re-used; but in this example, it is simply freed  */
+    free(buffer);
+    XXH64_freeState(state);
+
+    return hash;
+}
+```
+
+
+### License
+
+The library files `xxhash.c` and `xxhash.h` are BSD licensed.
+The utility `xxhsum` is GPL licensed.
+
+
+### Other programming languages
+
+Beyond the C reference version,
+xxHash is also available from many different programming languages,
+thanks to great contributors.
+They are [listed here](http://www.xxhash.com/#other-languages).
+
+
+### Packaging status
+
+Many distributions bundle a package manager
+which allows easy xxhash installation as both a `libxxhash` library
+and `xxhsum` command line interface.
+
+[![Packaging status](https://repology.org/badge/vertical-allrepos/xxhash.svg)](https://repology.org/project/xxhash/versions)
+
+
+### Special Thanks
+
+- Takayuki Matsuoka, aka @t-mat, for creating `xxhsum -c` and great support during early xxh releases
+- Mathias Westerdahl, aka @JCash, for introducing the first version of `XXH64`
+- Devin Hussey, aka @easyaspi314, for incredible low-level optimizations on `XXH3` and `XXH128`
diff --git a/vendors/xxhash/xxhash.c b/vendors/xxhash/xxhash.c
new file mode 100644
index 0000000..0fae88c
--- /dev/null
+++ b/vendors/xxhash/xxhash.c
@@ -0,0 +1,43 @@
+/*
+ * xxHash - Extremely Fast Hash algorithm
+ * Copyright (C) 2012-2020 Yann Collet
+ *
+ * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following disclaimer
+ *      in the documentation and/or other materials provided with the
+ *      distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at:
+ *   - xxHash homepage: https://www.xxhash.com
+ *   - xxHash source repository: https://github.com/Cyan4973/xxHash
+ */
+
+
+/*
+ * xxhash.c instantiates functions defined in xxhash.h
+ */
+
+#define XXH_STATIC_LINKING_ONLY   /* access advanced declarations */
+#define XXH_IMPLEMENTATION   /* access definitions */
+
+#include "xxhash.h"
diff --git a/vendors/xxhash/xxhash.h b/vendors/xxhash/xxhash.h
new file mode 100644
index 0000000..08ab794
--- /dev/null
+++ b/vendors/xxhash/xxhash.h
@@ -0,0 +1,5580 @@
+/*
+ * xxHash - Extremely Fast Hash algorithm
+ * Header File
+ * Copyright (C) 2012-2020 Yann Collet
+ *
+ * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following disclaimer
+ *      in the documentation and/or other materials provided with the
+ *      distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at:
+ *   - xxHash homepage: https://www.xxhash.com
+ *   - xxHash source repository: https://github.com/Cyan4973/xxHash
+ */
+/*!
+ * @mainpage xxHash
+ *
+ * @file xxhash.h
+ * xxHash prototypes and implementation
+ */
+/* TODO: update */
+/* Notice extracted from xxHash homepage:
+
+xxHash is an extremely fast hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name            Speed       Q.Score   Author
+xxHash          5.4 GB/s     10
+CrapWow         3.2 GB/s      2       Andrew
+MurmurHash 3a   2.7 GB/s     10       Austin Appleby
+SpookyHash      2.0 GB/s     10       Bob Jenkins
+SBox            1.4 GB/s      9       Bret Mulvey
+Lookup3         1.2 GB/s      9       Bob Jenkins
+SuperFastHash   1.2 GB/s      1       Paul Hsieh
+CityHash64      1.05 GB/s    10       Pike & Alakuijala
+FNV             0.55 GB/s     5       Fowler, Noll, Vo
+CRC32           0.43 GB/s     9
+MD5-32          0.33 GB/s    10       Ronald L. Rivest
+SHA1-32         0.28 GB/s    10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+
+Note: SMHasher's CRC32 implementation is not the fastest one.
+Other speed-oriented implementations can be faster,
+especially in combination with PCLMUL instruction:
+https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735
+
+A 64-bit version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bit applications only.
+Name     Speed on 64 bits    Speed on 32 bits
+XXH64       13.8 GB/s            1.9 GB/s
+XXH32        6.8 GB/s            6.0 GB/s
+*/
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* ****************************
+ *  INLINE mode
+ ******************************/
+/*!
+ * XXH_INLINE_ALL (and XXH_PRIVATE_API)
+ * Use these build macros to inline xxhash into the target unit.
+ * Inlining improves performance on small inputs, especially when the length is
+ * expressed as a compile-time constant:
+ *
+ *      https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
+ *
+ * It also keeps xxHash symbols private to the unit, so they are not exported.
+ *
+ * Usage:
+ *     #define XXH_INLINE_ALL
+ *     #include "xxhash.h"
+ *
+ * Do not compile and link xxhash.o as a separate object, as it is not useful.
+ */
+#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
+    && !defined(XXH_INLINE_ALL_31684351384)
+   /* this section should be traversed only once */
+#  define XXH_INLINE_ALL_31684351384
+   /* give access to the advanced API, required to compile implementations */
+#  undef XXH_STATIC_LINKING_ONLY   /* avoid macro redef */
+#  define XXH_STATIC_LINKING_ONLY
+   /* make all functions private */
+#  undef XXH_PUBLIC_API
+#  if defined(__GNUC__)
+#    define XXH_PUBLIC_API static __inline __attribute__((unused))
+#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#    define XXH_PUBLIC_API static inline
+#  elif defined(_MSC_VER)
+#    define XXH_PUBLIC_API static __inline
+#  else
+     /* note: this version may generate warnings for unused static functions */
+#    define XXH_PUBLIC_API static
+#  endif
+
+   /*
+    * This part deals with the special case where a unit wants to inline xxHash,
+    * but "xxhash.h" has previously been included without XXH_INLINE_ALL,
+    * such as part of some previously included *.h header file.
+    * Without further action, the new include would just be ignored,
+    * and functions would effectively _not_ be inlined (silent failure).
+    * The following macros solve this situation by prefixing all inlined names,
+    * avoiding naming collision with previous inclusions.
+    */
+   /* Before that, we unconditionally #undef all symbols,
+    * in case they were already defined with XXH_NAMESPACE.
+    * They will then be redefined for XXH_INLINE_ALL
+    */
+#  undef XXH_versionNumber
+    /* XXH32 */
+#  undef XXH32
+#  undef XXH32_createState
+#  undef XXH32_freeState
+#  undef XXH32_reset
+#  undef XXH32_update
+#  undef XXH32_digest
+#  undef XXH32_copyState
+#  undef XXH32_canonicalFromHash
+#  undef XXH32_hashFromCanonical
+    /* XXH64 */
+#  undef XXH64
+#  undef XXH64_createState
+#  undef XXH64_freeState
+#  undef XXH64_reset
+#  undef XXH64_update
+#  undef XXH64_digest
+#  undef XXH64_copyState
+#  undef XXH64_canonicalFromHash
+#  undef XXH64_hashFromCanonical
+    /* XXH3_64bits */
+#  undef XXH3_64bits
+#  undef XXH3_64bits_withSecret
+#  undef XXH3_64bits_withSeed
+#  undef XXH3_64bits_withSecretandSeed
+#  undef XXH3_createState
+#  undef XXH3_freeState
+#  undef XXH3_copyState
+#  undef XXH3_64bits_reset
+#  undef XXH3_64bits_reset_withSeed
+#  undef XXH3_64bits_reset_withSecret
+#  undef XXH3_64bits_update
+#  undef XXH3_64bits_digest
+#  undef XXH3_generateSecret
+    /* XXH3_128bits */
+#  undef XXH128
+#  undef XXH3_128bits
+#  undef XXH3_128bits_withSeed
+#  undef XXH3_128bits_withSecret
+#  undef XXH3_128bits_reset
+#  undef XXH3_128bits_reset_withSeed
+#  undef XXH3_128bits_reset_withSecret
+#  undef XXH3_128bits_reset_withSecretandSeed
+#  undef XXH3_128bits_update
+#  undef XXH3_128bits_digest
+#  undef XXH128_isEqual
+#  undef XXH128_cmp
+#  undef XXH128_canonicalFromHash
+#  undef XXH128_hashFromCanonical
+    /* Finally, free the namespace itself */
+#  undef XXH_NAMESPACE
+
+    /* employ the namespace for XXH_INLINE_ALL */
+#  define XXH_NAMESPACE XXH_INLINE_
+   /*
+    * Some identifiers (enums, type names) are not symbols,
+    * but they must nonetheless be renamed to avoid redeclaration.
+    * Alternative solution: do not redeclare them.
+    * However, this requires some #ifdefs, and has a more dispersed impact.
+    * Meanwhile, renaming can be achieved in a single place.
+    */
+#  define XXH_IPREF(Id)   XXH_NAMESPACE ## Id
+#  define XXH_OK XXH_IPREF(XXH_OK)
+#  define XXH_ERROR XXH_IPREF(XXH_ERROR)
+#  define XXH_errorcode XXH_IPREF(XXH_errorcode)
+#  define XXH32_canonical_t  XXH_IPREF(XXH32_canonical_t)
+#  define XXH64_canonical_t  XXH_IPREF(XXH64_canonical_t)
+#  define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t)
+#  define XXH32_state_s XXH_IPREF(XXH32_state_s)
+#  define XXH32_state_t XXH_IPREF(XXH32_state_t)
+#  define XXH64_state_s XXH_IPREF(XXH64_state_s)
+#  define XXH64_state_t XXH_IPREF(XXH64_state_t)
+#  define XXH3_state_s  XXH_IPREF(XXH3_state_s)
+#  define XXH3_state_t  XXH_IPREF(XXH3_state_t)
+#  define XXH128_hash_t XXH_IPREF(XXH128_hash_t)
+   /* Ensure the header is parsed again, even if it was previously included */
+#  undef XXHASH_H_5627135585666179
+#  undef XXHASH_H_STATIC_13879238742
+#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
+
+
+
+/* ****************************************************************
+ *  Stable API
+ *****************************************************************/
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
+
+
+/*!
+ * @defgroup public Public API
+ * Contains details on the public xxHash functions.
+ * @{
+ */
+/* specific declaration modes for Windows */
+#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
+#  if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
+#    ifdef XXH_EXPORT
+#      define XXH_PUBLIC_API __declspec(dllexport)
+#    elif XXH_IMPORT
+#      define XXH_PUBLIC_API __declspec(dllimport)
+#    endif
+#  else
+#    define XXH_PUBLIC_API   /* do nothing */
+#  endif
+#endif
+
+#ifdef XXH_DOXYGEN
+/*!
+ * @brief Emulate a namespace by transparently prefixing all symbols.
+ *
+ * If you want to include _and expose_ xxHash functions from within your own
+ * library, but also want to avoid symbol collisions with other libraries which
+ * may also include xxHash, you can use XXH_NAMESPACE to automatically prefix
+ * any public symbol from xxhash library with the value of XXH_NAMESPACE
+ * (therefore, avoid empty or numeric values).
+ *
+ * Note that no change is required within the calling program as long as it
+ * includes `xxhash.h`: Regular symbol names will be automatically translated
+ * by this header.
+ */
+#  define XXH_NAMESPACE /* YOUR NAME HERE */
+#  undef XXH_NAMESPACE
+#endif
+
+#ifdef XXH_NAMESPACE
+#  define XXH_CAT(A,B) A##B
+#  define XXH_NAME2(A,B) XXH_CAT(A,B)
+#  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
+/* XXH32 */
+#  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+#  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+#  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+#  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+#  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+#  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+#  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
+#  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
+#  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
+/* XXH64 */
+#  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+#  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+#  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
+#  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
+#  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
+/* XXH3_64bits */
+#  define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
+#  define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
+#  define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
+#  define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed)
+#  define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
+#  define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
+#  define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
+#  define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
+#  define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
+#  define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
+#  define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed)
+#  define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
+#  define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
+#  define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
+#  define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed)
+/* XXH3_128bits */
+#  define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
+#  define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
+#  define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
+#  define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
+#  define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed)
+#  define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
+#  define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
+#  define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
+#  define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed)
+#  define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
+#  define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
+#  define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
+#  define XXH128_cmp     XXH_NAME2(XXH_NAMESPACE, XXH128_cmp)
+#  define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash)
+#  define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical)
+#endif
+
+
+/* *************************************
+*  Version
+***************************************/
+#define XXH_VERSION_MAJOR    0
+#define XXH_VERSION_MINOR    8
+#define XXH_VERSION_RELEASE  1
+#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
+
+/*!
+ * @brief Obtains the xxHash version.
+ *
+ * This is mostly useful when xxHash is compiled as a shared library,
+ * since the returned value comes from the library, as opposed to header file.
+ *
+ * @return `XXH_VERSION_NUMBER` of the invoked library.
+ */
+XXH_PUBLIC_API unsigned XXH_versionNumber (void);
+
+
+/* ****************************
+*  Common basic types
+******************************/
+#include <stddef.h>   /* size_t */
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+/*-**********************************************************************
+*  32-bit hash
+************************************************************************/
+#if defined(XXH_DOXYGEN) /* Don't show <stdint.h> include */
+/*!
+ * @brief An unsigned 32-bit integer.
+ *
+ * Not necessarily defined to `uint32_t` but functionally equivalent.
+ */
+typedef uint32_t XXH32_hash_t;
+
+#elif !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#   include <stdint.h>
+    typedef uint32_t XXH32_hash_t;
+
+#else
+#   include <limits.h>
+#   if UINT_MAX == 0xFFFFFFFFUL
+      typedef unsigned int XXH32_hash_t;
+#   else
+#     if ULONG_MAX == 0xFFFFFFFFUL
+        typedef unsigned long XXH32_hash_t;
+#     else
+#       error "unsupported platform: need a 32-bit type"
+#     endif
+#   endif
+#endif
+
+/*!
+ * @}
+ *
+ * @defgroup xxh32_family XXH32 family
+ * @ingroup public
+ * Contains functions used in the classic 32-bit xxHash algorithm.
+ *
+ * @note
+ *   XXH32 is useful for older platforms, with no or poor 64-bit performance.
+ *   Note that @ref xxh3_family provides competitive speed
+ *   for both 32-bit and 64-bit systems, and offers true 64/128 bit hash results.
+ *
+ * @see @ref xxh64_family, @ref xxh3_family : Other xxHash families
+ * @see @ref xxh32_impl for implementation details
+ * @{
+ */
+
+/*!
+ * @brief Calculates the 32-bit hash of @p input using xxHash32.
+ *
+ * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s
+ *
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ * @param seed The 32-bit seed to alter the hash's output predictably.
+ *
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 32-bit hash value.
+ *
+ * @see
+ *    XXH64(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
+ *    Direct equivalents for the other variants of xxHash.
+ * @see
+ *    XXH32_createState(), XXH32_update(), XXH32_digest(): Streaming version.
+ */
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
+
+/*!
+ * Streaming functions generate the xxHash value from an incremental input.
+ * This method is slower than single-call functions, due to state management.
+ * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
+ *
+ * An XXH state must first be allocated using `XXH*_createState()`.
+ *
+ * Start a new hash by initializing the state with a seed using `XXH*_reset()`.
+ *
+ * Then, feed the hash state by calling `XXH*_update()` as many times as necessary.
+ *
+ * The function returns an error code, with 0 meaning OK, and any other value
+ * meaning there is an error.
+ *
+ * Finally, a hash value can be produced anytime, by using `XXH*_digest()`.
+ * This function returns the nn-bits hash as an int or long long.
+ *
+ * It's still possible to continue inserting input into the hash state after a
+ * digest, and generate new hash values later on by invoking `XXH*_digest()`.
+ *
+ * When done, release the state using `XXH*_freeState()`.
+ *
+ * Example code for incrementally hashing a file:
+ * @code{.c}
+ *    #include <stdio.h>
+ *    #include <xxhash.h>
+ *    #define BUFFER_SIZE 256
+ *
+ *    // Note: XXH64 and XXH3 use the same interface.
+ *    XXH32_hash_t
+ *    hashFile(FILE* stream)
+ *    {
+ *        XXH32_state_t* state;
+ *        unsigned char buf[BUFFER_SIZE];
+ *        size_t amt;
+ *        XXH32_hash_t hash;
+ *
+ *        state = XXH32_createState();       // Create a state
+ *        assert(state != NULL);             // Error check here
+ *        XXH32_reset(state, 0xbaad5eed);    // Reset state with our seed
+ *        while ((amt = fread(buf, 1, sizeof(buf), stream)) != 0) {
+ *            XXH32_update(state, buf, amt); // Hash the file in chunks
+ *        }
+ *        hash = XXH32_digest(state);        // Finalize the hash
+ *        XXH32_freeState(state);            // Clean up
+ *        return hash;
+ *    }
+ * @endcode
+ */
+
+/*!
+ * @typedef struct XXH32_state_s XXH32_state_t
+ * @brief The opaque state struct for the XXH32 streaming API.
+ *
+ * @see XXH32_state_s for details.
+ */
+typedef struct XXH32_state_s XXH32_state_t;
+
+/*!
+ * @brief Allocates an @ref XXH32_state_t.
+ *
+ * Must be freed with XXH32_freeState().
+ * @return An allocated XXH32_state_t on success, `NULL` on failure.
+ */
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+/*!
+ * @brief Frees an @ref XXH32_state_t.
+ *
+ * Must be allocated with XXH32_createState().
+ * @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref XXH32_createState().
+ * @return XXH_OK.
+ */
+XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
+/*!
+ * @brief Copies one @ref XXH32_state_t to another.
+ *
+ * @param dst_state The state to copy to.
+ * @param src_state The state to copy from.
+ * @pre
+ *   @p dst_state and @p src_state must not be `NULL` and must not overlap.
+ */
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
+
+/*!
+ * @brief Resets an @ref XXH32_state_t to begin a new hash.
+ *
+ * This function resets and seeds a state. Call it before @ref XXH32_update().
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed The 32-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, XXH32_hash_t seed);
+
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH32_state_t.
+ *
+ * Call this to incrementally consume blocks of data.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+
+/*!
+ * @brief Returns the calculated hash value from an @ref XXH32_state_t.
+ *
+ * @note
+ *   Calling XXH32_digest() will not affect @p statePtr, so you can update,
+ *   digest, and update again.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ *  @p statePtr must not be `NULL`.
+ *
+ * @return The calculated xxHash32 value from that state.
+ */
+XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
+
+/*******   Canonical representation   *******/
+
+/*
+ * The default return values from XXH functions are unsigned 32 and 64 bit
+ * integers.
+ * This the simplest and fastest format for further post-processing.
+ *
+ * However, this leaves open the question of what is the order on the byte level,
+ * since little and big endian conventions will store the same number differently.
+ *
+ * The canonical representation settles this issue by mandating big-endian
+ * convention, the same convention as human-readable numbers (large digits first).
+ *
+ * When writing hash values to storage, sending them over a network, or printing
+ * them, it's highly recommended to use the canonical representation to ensure
+ * portability across a wider range of systems, present and future.
+ *
+ * The following functions allow transformation of hash values to and from
+ * canonical format.
+ */
+
+/*!
+ * @brief Canonical (big endian) representation of @ref XXH32_hash_t.
+ */
+typedef struct {
+    unsigned char digest[4]; /*!< Hash bytes, big endian */
+} XXH32_canonical_t;
+
+/*!
+ * @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t.
+ *
+ * @param dst The @ref XXH32_canonical_t pointer to be stored to.
+ * @param hash The @ref XXH32_hash_t to be converted.
+ *
+ * @pre
+ *   @p dst must not be `NULL`.
+ */
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
+
+/*!
+ * @brief Converts an @ref XXH32_canonical_t to a native @ref XXH32_hash_t.
+ *
+ * @param src The @ref XXH32_canonical_t to convert.
+ *
+ * @pre
+ *   @p src must not be `NULL`.
+ *
+ * @return The converted hash.
+ */
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+
+
+#ifdef __has_attribute
+# define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
+#else
+# define XXH_HAS_ATTRIBUTE(x) 0
+#endif
+
+/* C-language Attributes are added in C23. */
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
+# define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
+#else
+# define XXH_HAS_C_ATTRIBUTE(x) 0
+#endif
+
+#if defined(__cplusplus) && defined(__has_cpp_attribute)
+# define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
+#else
+# define XXH_HAS_CPP_ATTRIBUTE(x) 0
+#endif
+
+/*
+Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
+introduced in CPP17 and C23.
+CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
+C23   : https://en.cppreference.com/w/c/language/attributes/fallthrough
+*/
+#if XXH_HAS_C_ATTRIBUTE(x)
+# define XXH_FALLTHROUGH [[fallthrough]]
+#elif XXH_HAS_CPP_ATTRIBUTE(x)
+# define XXH_FALLTHROUGH [[fallthrough]]
+#elif XXH_HAS_ATTRIBUTE(__fallthrough__)
+# define XXH_FALLTHROUGH __attribute__ ((fallthrough))
+#else
+# define XXH_FALLTHROUGH
+#endif
+
+/*!
+ * @}
+ * @ingroup public
+ * @{
+ */
+
+#ifndef XXH_NO_LONG_LONG
+/*-**********************************************************************
+*  64-bit hash
+************************************************************************/
+#if defined(XXH_DOXYGEN) /* don't include <stdint.h> */
+/*!
+ * @brief An unsigned 64-bit integer.
+ *
+ * Not necessarily defined to `uint64_t` but functionally equivalent.
+ */
+typedef uint64_t XXH64_hash_t;
+#elif !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#  include <stdint.h>
+   typedef uint64_t XXH64_hash_t;
+#else
+#  include <limits.h>
+#  if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL
+     /* LP64 ABI says uint64_t is unsigned long */
+     typedef unsigned long XXH64_hash_t;
+#  else
+     /* the following type must have a width of 64-bit */
+     typedef unsigned long long XXH64_hash_t;
+#  endif
+#endif
+
+/*!
+ * @}
+ *
+ * @defgroup xxh64_family XXH64 family
+ * @ingroup public
+ * @{
+ * Contains functions used in the classic 64-bit xxHash algorithm.
+ *
+ * @note
+ *   XXH3 provides competitive speed for both 32-bit and 64-bit systems,
+ *   and offers true 64/128 bit hash results.
+ *   It provides better speed for systems with vector processing capabilities.
+ */
+
+
+/*!
+ * @brief Calculates the 64-bit hash of @p input using xxHash64.
+ *
+ * This function usually runs faster on 64-bit systems, but slower on 32-bit
+ * systems (see benchmark).
+ *
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ * @param seed The 64-bit seed to alter the hash's output predictably.
+ *
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 64-bit hash.
+ *
+ * @see
+ *    XXH32(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
+ *    Direct equivalents for the other variants of xxHash.
+ * @see
+ *    XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version.
+ */
+XXH_PUBLIC_API XXH64_hash_t XXH64(const void* input, size_t length, XXH64_hash_t seed);
+
+/*******   Streaming   *******/
+/*!
+ * @brief The opaque state struct for the XXH64 streaming API.
+ *
+ * @see XXH64_state_s for details.
+ */
+typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
+
+/*******   Canonical representation   *******/
+typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+
+/*!
+ * @}
+ * ************************************************************************
+ * @defgroup xxh3_family XXH3 family
+ * @ingroup public
+ * @{
+ *
+ * XXH3 is a more recent hash algorithm featuring:
+ *  - Improved speed for both small and large inputs
+ *  - True 64-bit and 128-bit outputs
+ *  - SIMD acceleration
+ *  - Improved 32-bit viability
+ *
+ * Speed analysis methodology is explained here:
+ *
+ *    https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html
+ *
+ * Compared to XXH64, expect XXH3 to run approximately
+ * ~2x faster on large inputs and >3x faster on small ones,
+ * exact differences vary depending on platform.
+ *
+ * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,
+ * but does not require it.
+ * Any 32-bit and 64-bit targets that can run XXH32 smoothly
+ * can run XXH3 at competitive speeds, even without vector support.
+ * Further details are explained in the implementation.
+ *
+ * Optimized implementations are provided for AVX512, AVX2, SSE2, NEON, POWER8,
+ * ZVector and scalar targets. This can be controlled via the XXH_VECTOR macro.
+ *
+ * XXH3 implementation is portable:
+ * it has a generic C90 formulation that can be compiled on any platform,
+ * all implementations generage exactly the same hash value on all platforms.
+ * Starting from v0.8.0, it's also labelled "stable", meaning that
+ * any future version will also generate the same hash value.
+ *
+ * XXH3 offers 2 variants, _64bits and _128bits.
+ *
+ * When only 64 bits are needed, prefer invoking the _64bits variant, as it
+ * reduces the amount of mixing, resulting in faster speed on small inputs.
+ * It's also generally simpler to manipulate a scalar return type than a struct.
+ *
+ * The API supports one-shot hashing, streaming mode, and custom secrets.
+ */
+
+/*-**********************************************************************
+*  XXH3 64-bit variant
+************************************************************************/
+
+/* XXH3_64bits():
+ * default 64-bit variant, using default secret and default seed of 0.
+ * It's the fastest variant. */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* data, size_t len);
+
+/*
+ * XXH3_64bits_withSeed():
+ * This variant generates a custom secret on the fly
+ * based on default secret altered using the `seed` value.
+ * While this operation is decently fast, note that it's not completely free.
+ * Note: seed==0 produces the same results as XXH3_64bits().
+ */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
+
+/*!
+ * The bare minimum size for a custom secret.
+ *
+ * @see
+ *  XXH3_64bits_withSecret(), XXH3_64bits_reset_withSecret(),
+ *  XXH3_128bits_withSecret(), XXH3_128bits_reset_withSecret().
+ */
+#define XXH3_SECRET_SIZE_MIN 136
+
+/*
+ * XXH3_64bits_withSecret():
+ * It's possible to provide any blob of bytes as a "secret" to generate the hash.
+ * This makes it more difficult for an external actor to prepare an intentional collision.
+ * The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN).
+ * However, the quality of the secret impacts the dispersion of the hash algorithm.
+ * Therefore, the secret _must_ look like a bunch of random bytes.
+ * Avoid "trivial" or structured data such as repeated sequences or a text document.
+ * Whenever in doubt about the "randomness" of the blob of bytes,
+ * consider employing "XXH3_generateSecret()" instead (see below).
+ * It will generate a proper high entropy secret derived from the blob of bytes.
+ * Another advantage of using XXH3_generateSecret() is that
+ * it guarantees that all bits within the initial blob of bytes
+ * will impact every bit of the output.
+ * This is not necessarily the case when using the blob of bytes directly
+ * because, when hashing _small_ inputs, only a portion of the secret is employed.
+ */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
+
+
+/*******   Streaming   *******/
+/*
+ * Streaming requires state maintenance.
+ * This operation costs memory and CPU.
+ * As a consequence, streaming is slower than one-shot hashing.
+ * For better performance, prefer one-shot functions whenever applicable.
+ */
+
+/*!
+ * @brief The state struct for the XXH3 streaming API.
+ *
+ * @see XXH3_state_s for details.
+ */
+typedef struct XXH3_state_s XXH3_state_t;
+XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void);
+XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr);
+XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state);
+
+/*
+ * XXH3_64bits_reset():
+ * Initialize with default parameters.
+ * digest will be equivalent to `XXH3_64bits()`.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t* statePtr);
+/*
+ * XXH3_64bits_reset_withSeed():
+ * Generate a custom secret from `seed`, and store it into `statePtr`.
+ * digest will be equivalent to `XXH3_64bits_withSeed()`.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
+/*
+ * XXH3_64bits_reset_withSecret():
+ * `secret` is referenced, it _must outlive_ the hash streaming session.
+ * Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`,
+ * and the quality of produced hash values depends on secret's entropy
+ * (secret's content should look like a bunch of random bytes).
+ * When in doubt about the randomness of a candidate `secret`,
+ * consider employing `XXH3_generateSecret()` instead (see below).
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
+
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH64_hash_t  XXH3_64bits_digest (const XXH3_state_t* statePtr);
+
+/* note : canonical representation of XXH3 is the same as XXH64
+ * since they both produce XXH64_hash_t values */
+
+
+/*-**********************************************************************
+*  XXH3 128-bit variant
+************************************************************************/
+
+/*!
+ * @brief The return value from 128-bit hashes.
+ *
+ * Stored in little endian order, although the fields themselves are in native
+ * endianness.
+ */
+typedef struct {
+    XXH64_hash_t low64;   /*!< `value & 0xFFFFFFFFFFFFFFFF` */
+    XXH64_hash_t high64;  /*!< `value >> 64` */
+} XXH128_hash_t;
+
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* data, size_t len);
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
+
+/*******   Streaming   *******/
+/*
+ * Streaming requires state maintenance.
+ * This operation costs memory and CPU.
+ * As a consequence, streaming is slower than one-shot hashing.
+ * For better performance, prefer one-shot functions whenever applicable.
+ *
+ * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits().
+ * Use already declared XXH3_createState() and XXH3_freeState().
+ *
+ * All reset and streaming functions have same meaning as their 64-bit counterpart.
+ */
+
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH3_state_t* statePtr);
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
+
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
+
+/* Following helper functions make it possible to compare XXH128_hast_t values.
+ * Since XXH128_hash_t is a structure, this capability is not offered by the language.
+ * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */
+
+/*!
+ * XXH128_isEqual():
+ * Return: 1 if `h1` and `h2` are equal, 0 if they are not.
+ */
+XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
+
+/*!
+ * XXH128_cmp():
+ *
+ * This comparator is compatible with stdlib's `qsort()`/`bsearch()`.
+ *
+ * return: >0 if *h128_1  > *h128_2
+ *         =0 if *h128_1 == *h128_2
+ *         <0 if *h128_1  < *h128_2
+ */
+XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2);
+
+
+/*******   Canonical representation   *******/
+typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;
+XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash);
+XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src);
+
+
+#endif  /* XXH_NO_LONG_LONG */
+
+/*!
+ * @}
+ */
+#endif /* XXHASH_H_5627135585666179 */
+
+
+
+#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
+#define XXHASH_H_STATIC_13879238742
+/* ****************************************************************************
+ * This section contains declarations which are not guaranteed to remain stable.
+ * They may change in future versions, becoming incompatible with a different
+ * version of the library.
+ * These declarations should only be used with static linking.
+ * Never use them in association with dynamic linking!
+ ***************************************************************************** */
+
+/*
+ * These definitions are only present to allow static allocation
+ * of XXH states, on stack or in a struct, for example.
+ * Never **ever** access their members directly.
+ */
+
+/*!
+ * @internal
+ * @brief Structure for XXH32 streaming API.
+ *
+ * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
+ * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
+ * an opaque type. This allows fields to safely be changed.
+ *
+ * Typedef'd to @ref XXH32_state_t.
+ * Do not access the members of this struct directly.
+ * @see XXH64_state_s, XXH3_state_s
+ */
+struct XXH32_state_s {
+   XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */
+   XXH32_hash_t large_len;    /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */
+   XXH32_hash_t v[4];         /*!< Accumulator lanes */
+   XXH32_hash_t mem32[4];     /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */
+   XXH32_hash_t memsize;      /*!< Amount of data in @ref mem32 */
+   XXH32_hash_t reserved;     /*!< Reserved field. Do not read or write to it, it may be removed. */
+};   /* typedef'd to XXH32_state_t */
+
+
+#ifndef XXH_NO_LONG_LONG  /* defined when there is no 64-bit support */
+
+/*!
+ * @internal
+ * @brief Structure for XXH64 streaming API.
+ *
+ * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
+ * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
+ * an opaque type. This allows fields to safely be changed.
+ *
+ * Typedef'd to @ref XXH64_state_t.
+ * Do not access the members of this struct directly.
+ * @see XXH32_state_s, XXH3_state_s
+ */
+struct XXH64_state_s {
+   XXH64_hash_t total_len;    /*!< Total length hashed. This is always 64-bit. */
+   XXH64_hash_t v[4];         /*!< Accumulator lanes */
+   XXH64_hash_t mem64[4];     /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */
+   XXH32_hash_t memsize;      /*!< Amount of data in @ref mem64 */
+   XXH32_hash_t reserved32;   /*!< Reserved field, needed for padding anyways*/
+   XXH64_hash_t reserved64;   /*!< Reserved field. Do not read or write to it, it may be removed. */
+};   /* typedef'd to XXH64_state_t */
+
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
+#  include <stdalign.h>
+#  define XXH_ALIGN(n)      alignas(n)
+#elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */
+/* In C++ alignas() is a keyword */
+#  define XXH_ALIGN(n)      alignas(n)
+#elif defined(__GNUC__)
+#  define XXH_ALIGN(n)      __attribute__ ((aligned(n)))
+#elif defined(_MSC_VER)
+#  define XXH_ALIGN(n)      __declspec(align(n))
+#else
+#  define XXH_ALIGN(n)   /* disabled */
+#endif
+
+/* Old GCC versions only accept the attribute after the type in structures. */
+#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L))   /* C11+ */ \
+    && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \
+    && defined(__GNUC__)
+#   define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)
+#else
+#   define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type
+#endif
+
+/*!
+ * @brief The size of the internal XXH3 buffer.
+ *
+ * This is the optimal update size for incremental hashing.
+ *
+ * @see XXH3_64b_update(), XXH3_128b_update().
+ */
+#define XXH3_INTERNALBUFFER_SIZE 256
+
+/*!
+ * @brief Default size of the secret buffer (and @ref XXH3_kSecret).
+ *
+ * This is the size used in @ref XXH3_kSecret and the seeded functions.
+ *
+ * Not to be confused with @ref XXH3_SECRET_SIZE_MIN.
+ */
+#define XXH3_SECRET_DEFAULT_SIZE 192
+
+/*!
+ * @internal
+ * @brief Structure for XXH3 streaming API.
+ *
+ * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
+ * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined.
+ * Otherwise it is an opaque type.
+ * Never use this definition in combination with dynamic library.
+ * This allows fields to safely be changed in the future.
+ *
+ * @note ** This structure has a strict alignment requirement of 64 bytes!! **
+ * Do not allocate this with `malloc()` or `new`,
+ * it will not be sufficiently aligned.
+ * Use @ref XXH3_createState() and @ref XXH3_freeState(), or stack allocation.
+ *
+ * Typedef'd to @ref XXH3_state_t.
+ * Do never access the members of this struct directly.
+ *
+ * @see XXH3_INITSTATE() for stack initialization.
+ * @see XXH3_createState(), XXH3_freeState().
+ * @see XXH32_state_s, XXH64_state_s
+ */
+struct XXH3_state_s {
+   XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
+       /*!< The 8 accumulators. Similar to `vN` in @ref XXH32_state_s::v1 and @ref XXH64_state_s */
+   XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
+       /*!< Used to store a custom secret generated from a seed. */
+   XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
+       /*!< The internal buffer. @see XXH32_state_s::mem32 */
+   XXH32_hash_t bufferedSize;
+       /*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */
+   XXH32_hash_t useSeed;
+       /*!< Reserved field. Needed for padding on 64-bit. */
+   size_t nbStripesSoFar;
+       /*!< Number or stripes processed. */
+   XXH64_hash_t totalLen;
+       /*!< Total length hashed. 64-bit even on 32-bit targets. */
+   size_t nbStripesPerBlock;
+       /*!< Number of stripes per block. */
+   size_t secretLimit;
+       /*!< Size of @ref customSecret or @ref extSecret */
+   XXH64_hash_t seed;
+       /*!< Seed for _withSeed variants. Must be zero otherwise, @see XXH3_INITSTATE() */
+   XXH64_hash_t reserved64;
+       /*!< Reserved field. */
+   const unsigned char* extSecret;
+       /*!< Reference to an external secret for the _withSecret variants, NULL
+        *   for other variants. */
+   /* note: there may be some padding at the end due to alignment on 64 bytes */
+}; /* typedef'd to XXH3_state_t */
+
+#undef XXH_ALIGN_MEMBER
+
+/*!
+ * @brief Initializes a stack-allocated `XXH3_state_s`.
+ *
+ * When the @ref XXH3_state_t structure is merely emplaced on stack,
+ * it should be initialized with XXH3_INITSTATE() or a memset()
+ * in case its first reset uses XXH3_NNbits_reset_withSeed().
+ * This init can be omitted if the first reset uses default or _withSecret mode.
+ * This operation isn't necessary when the state is created with XXH3_createState().
+ * Note that this doesn't prepare the state for a streaming operation,
+ * it's still necessary to use XXH3_NNbits_reset*() afterwards.
+ */
+#define XXH3_INITSTATE(XXH3_state_ptr)   { (XXH3_state_ptr)->seed = 0; }
+
+
+/* XXH128() :
+ * simple alias to pre-selected XXH3_128bits variant
+ */
+XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed);
+
+
+/* ===   Experimental API   === */
+/* Symbols defined below must be considered tied to a specific library version. */
+
+/*
+ * XXH3_generateSecret():
+ *
+ * Derive a high-entropy secret from any user-defined content, named customSeed.
+ * The generated secret can be used in combination with `*_withSecret()` functions.
+ * The `_withSecret()` variants are useful to provide a higher level of protection than 64-bit seed,
+ * as it becomes much more difficult for an external actor to guess how to impact the calculation logic.
+ *
+ * The function accepts as input a custom seed of any length and any content,
+ * and derives from it a high-entropy secret of length @secretSize
+ * into an already allocated buffer @secretBuffer.
+ * @secretSize must be >= XXH3_SECRET_SIZE_MIN
+ *
+ * The generated secret can then be used with any `*_withSecret()` variant.
+ * Functions `XXH3_128bits_withSecret()`, `XXH3_64bits_withSecret()`,
+ * `XXH3_128bits_reset_withSecret()` and `XXH3_64bits_reset_withSecret()`
+ * are part of this list. They all accept a `secret` parameter
+ * which must be large enough for implementation reasons (>= XXH3_SECRET_SIZE_MIN)
+ * _and_ feature very high entropy (consist of random-looking bytes).
+ * These conditions can be a high bar to meet, so
+ * XXH3_generateSecret() can be employed to ensure proper quality.
+ *
+ * customSeed can be anything. It can have any size, even small ones,
+ * and its content can be anything, even "poor entropy" sources such as a bunch of zeroes.
+ * The resulting `secret` will nonetheless provide all required qualities.
+ *
+ * When customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize);
+
+
+/*
+ * XXH3_generateSecret_fromSeed():
+ *
+ * Generate the same secret as the _withSeed() variants.
+ *
+ * The resulting secret has a length of XXH3_SECRET_DEFAULT_SIZE (necessarily).
+ * @secretBuffer must be already allocated, of size at least XXH3_SECRET_DEFAULT_SIZE bytes.
+ *
+ * The generated secret can be used in combination with
+ *`*_withSecret()` and `_withSecretandSeed()` variants.
+ * This generator is notably useful in combination with `_withSecretandSeed()`,
+ * as a way to emulate a faster `_withSeed()` variant.
+ */
+XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed);
+
+/*
+ * *_withSecretandSeed() :
+ * These variants generate hash values using either
+ * @seed for "short" keys (< XXH3_MIDSIZE_MAX = 240 bytes)
+ * or @secret for "large" keys (>= XXH3_MIDSIZE_MAX).
+ *
+ * This generally benefits speed, compared to `_withSeed()` or `_withSecret()`.
+ * `_withSeed()` has to generate the secret on the fly for "large" keys.
+ * It's fast, but can be perceptible for "not so large" keys (< 1 KB).
+ * `_withSecret()` has to generate the masks on the fly for "small" keys,
+ * which requires more instructions than _withSeed() variants.
+ * Therefore, _withSecretandSeed variant combines the best of both worlds.
+ *
+ * When @secret has been generated by XXH3_generateSecret_fromSeed(),
+ * this variant produces *exactly* the same results as `_withSeed()` variant,
+ * hence offering only a pure speed benefit on "large" input,
+ * by skipping the need to regenerate the secret for every large input.
+ *
+ * Another usage scenario is to hash the secret to a 64-bit hash value,
+ * for example with XXH3_64bits(), which then becomes the seed,
+ * and then employ both the seed and the secret in _withSecretandSeed().
+ * On top of speed, an added benefit is that each bit in the secret
+ * has a 50% chance to swap each bit in the output,
+ * via its impact to the seed.
+ * This is not guaranteed when using the secret directly in "small data" scenarios,
+ * because only portions of the secret are employed for small data.
+ */
+XXH_PUBLIC_API XXH64_hash_t
+XXH3_64bits_withSecretandSeed(const void* data, size_t len,
+                              const void* secret, size_t secretSize,
+                              XXH64_hash_t seed);
+
+XXH_PUBLIC_API XXH128_hash_t
+XXH3_128bits_withSecretandSeed(const void* data, size_t len,
+                               const void* secret, size_t secretSize,
+                               XXH64_hash_t seed64);
+
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
+                                    const void* secret, size_t secretSize,
+                                    XXH64_hash_t seed64);
+
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
+                                     const void* secret, size_t secretSize,
+                                     XXH64_hash_t seed64);
+
+
+#endif  /* XXH_NO_LONG_LONG */
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
+#  define XXH_IMPLEMENTATION
+#endif
+
+#endif  /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */
+
+
+/* ======================================================================== */
+/* ======================================================================== */
+/* ======================================================================== */
+
+
+/*-**********************************************************************
+ * xxHash implementation
+ *-**********************************************************************
+ * xxHash's implementation used to be hosted inside xxhash.c.
+ *
+ * However, inlining requires implementation to be visible to the compiler,
+ * hence be included alongside the header.
+ * Previously, implementation was hosted inside xxhash.c,
+ * which was then #included when inlining was activated.
+ * This construction created issues with a few build and install systems,
+ * as it required xxhash.c to be stored in /include directory.
+ *
+ * xxHash implementation is now directly integrated within xxhash.h.
+ * As a consequence, xxhash.c is no longer needed in /include.
+ *
+ * xxhash.c is still available and is still useful.
+ * In a "normal" setup, when xxhash is not inlined,
+ * xxhash.h only exposes the prototypes and public symbols,
+ * while xxhash.c can be built into an object file xxhash.o
+ * which can then be linked into the final binary.
+ ************************************************************************/
+
+#if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
+   || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
+#  define XXH_IMPLEM_13a8737387
+
+/* *************************************
+*  Tuning parameters
+***************************************/
+
+/*!
+ * @defgroup tuning Tuning parameters
+ * @{
+ *
+ * Various macros to control xxHash's behavior.
+ */
+#ifdef XXH_DOXYGEN
+/*!
+ * @brief Define this to disable 64-bit code.
+ *
+ * Useful if only using the @ref xxh32_family and you have a strict C90 compiler.
+ */
+#  define XXH_NO_LONG_LONG
+#  undef XXH_NO_LONG_LONG /* don't actually */
+/*!
+ * @brief Controls how unaligned memory is accessed.
+ *
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is
+ * safe and portable.
+ *
+ * Unfortunately, on some target/compiler combinations, the generated assembly
+ * is sub-optimal.
+ *
+ * The below switch allow selection of a different access method
+ * in the search for improved performance.
+ *
+ * @par Possible options:
+ *
+ *  - `XXH_FORCE_MEMORY_ACCESS=0` (default): `memcpy`
+ *   @par
+ *     Use `memcpy()`. Safe and portable. Note that most modern compilers will
+ *     eliminate the function call and treat it as an unaligned access.
+ *
+ *  - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((packed))`
+ *   @par
+ *     Depends on compiler extensions and is therefore not portable.
+ *     This method is safe _if_ your compiler supports it,
+ *     and *generally* as fast or faster than `memcpy`.
+ *
+ *  - `XXH_FORCE_MEMORY_ACCESS=2`: Direct cast
+ *  @par
+ *     Casts directly and dereferences. This method doesn't depend on the
+ *     compiler, but it violates the C standard as it directly dereferences an
+ *     unaligned pointer. It can generate buggy code on targets which do not
+ *     support unaligned memory accesses, but in some circumstances, it's the
+ *     only known way to get the most performance.
+ *
+ *  - `XXH_FORCE_MEMORY_ACCESS=3`: Byteshift
+ *  @par
+ *     Also portable. This can generate the best code on old compilers which don't
+ *     inline small `memcpy()` calls, and it might also be faster on big-endian
+ *     systems which lack a native byteswap instruction. However, some compilers
+ *     will emit literal byteshifts even if the target supports unaligned access.
+ *  .
+ *
+ * @warning
+ *   Methods 1 and 2 rely on implementation-defined behavior. Use these with
+ *   care, as what works on one compiler/platform/optimization level may cause
+ *   another to read garbage data or even crash.
+ *
+ * See http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details.
+ *
+ * Prefer these methods in priority order (0 > 3 > 1 > 2)
+ */
+#  define XXH_FORCE_MEMORY_ACCESS 0
+
+/*!
+ * @def XXH_FORCE_ALIGN_CHECK
+ * @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
+ * and XXH64() only).
+ *
+ * This is an important performance trick for architectures without decent
+ * unaligned memory access performance.
+ *
+ * It checks for input alignment, and when conditions are met, uses a "fast
+ * path" employing direct 32-bit/64-bit reads, resulting in _dramatically
+ * faster_ read speed.
+ *
+ * The check costs one initial branch per hash, which is generally negligible,
+ * but not zero.
+ *
+ * Moreover, it's not useful to generate an additional code path if memory
+ * access uses the same instruction for both aligned and unaligned
+ * addresses (e.g. x86 and aarch64).
+ *
+ * In these cases, the alignment check can be removed by setting this macro to 0.
+ * Then the code will always use unaligned memory access.
+ * Align check is automatically disabled on x86, x64 & arm64,
+ * which are platforms known to offer good unaligned memory accesses performance.
+ *
+ * This option does not affect XXH3 (only XXH32 and XXH64).
+ */
+#  define XXH_FORCE_ALIGN_CHECK 0
+
+/*!
+ * @def XXH_NO_INLINE_HINTS
+ * @brief When non-zero, sets all functions to `static`.
+ *
+ * By default, xxHash tries to force the compiler to inline almost all internal
+ * functions.
+ *
+ * This can usually improve performance due to reduced jumping and improved
+ * constant folding, but significantly increases the size of the binary which
+ * might not be favorable.
+ *
+ * Additionally, sometimes the forced inlining can be detrimental to performance,
+ * depending on the architecture.
+ *
+ * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the
+ * compiler full control on whether to inline or not.
+ *
+ * When not optimizing (-O0), optimizing for size (-Os, -Oz), or using
+ * -fno-inline with GCC or Clang, this will automatically be defined.
+ */
+#  define XXH_NO_INLINE_HINTS 0
+
+/*!
+ * @def XXH32_ENDJMP
+ * @brief Whether to use a jump for `XXH32_finalize`.
+ *
+ * For performance, `XXH32_finalize` uses multiple branches in the finalizer.
+ * This is generally preferable for performance,
+ * but depending on exact architecture, a jmp may be preferable.
+ *
+ * This setting is only possibly making a difference for very small inputs.
+ */
+#  define XXH32_ENDJMP 0
+
+/*!
+ * @internal
+ * @brief Redefines old internal names.
+ *
+ * For compatibility with code that uses xxHash's internals before the names
+ * were changed to improve namespacing. There is no other reason to use this.
+ */
+#  define XXH_OLD_NAMES
+#  undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
+#endif /* XXH_DOXYGEN */
+/*!
+ * @}
+ */
+
+#ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+   /* prefer __packed__ structures (method 1) for gcc on armv7+ and mips */
+#  if !defined(__clang__) && \
+( \
+    (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
+    ( \
+        defined(__GNUC__) && ( \
+            (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || \
+            ( \
+                defined(__mips__) && \
+                (__mips <= 5 || __mips_isa_rev < 6) && \
+                (!defined(__mips16) || defined(__mips_mips16e2)) \
+            ) \
+        ) \
+    ) \
+)
+#    define XXH_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+#ifndef XXH_FORCE_ALIGN_CHECK  /* can be defined externally */
+#  if defined(__i386)  || defined(__x86_64__) || defined(__aarch64__) \
+   || defined(_M_IX86) || defined(_M_X64)     || defined(_M_ARM64) /* visual */
+#    define XXH_FORCE_ALIGN_CHECK 0
+#  else
+#    define XXH_FORCE_ALIGN_CHECK 1
+#  endif
+#endif
+
+#ifndef XXH_NO_INLINE_HINTS
+#  if defined(__OPTIMIZE_SIZE__) /* -Os, -Oz */ \
+   || defined(__NO_INLINE__)     /* -O0, -fno-inline */
+#    define XXH_NO_INLINE_HINTS 1
+#  else
+#    define XXH_NO_INLINE_HINTS 0
+#  endif
+#endif
+
+#ifndef XXH32_ENDJMP
+/* generally preferable for performance */
+#  define XXH32_ENDJMP 0
+#endif
+
+/*!
+ * @defgroup impl Implementation
+ * @{
+ */
+
+
+/* *************************************
+*  Includes & Memory related functions
+***************************************/
+/*
+ * Modify the local functions below should you wish to use
+ * different memory routines for malloc() and free()
+ */
+#include <stdlib.h>
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than malloc().
+ */
+static void* XXH_malloc(size_t s) { return malloc(s); }
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than free().
+ */
+static void XXH_free(void* p) { free(p); }
+
+#include <string.h>
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than memcpy().
+ */
+static void* XXH_memcpy(void* dest, const void* src, size_t size)
+{
+    return memcpy(dest,src,size);
+}
+
+#include <limits.h>   /* ULLONG_MAX */
+
+
+/* *************************************
+*  Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER /* Visual Studio warning fix */
+#  pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
+#endif
+
+#if XXH_NO_INLINE_HINTS  /* disable inlining hints */
+#  if defined(__GNUC__) || defined(__clang__)
+#    define XXH_FORCE_INLINE static __attribute__((unused))
+#  else
+#    define XXH_FORCE_INLINE static
+#  endif
+#  define XXH_NO_INLINE static
+/* enable inlining hints */
+#elif defined(__GNUC__) || defined(__clang__)
+#  define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
+#  define XXH_NO_INLINE static __attribute__((noinline))
+#elif defined(_MSC_VER)  /* Visual Studio */
+#  define XXH_FORCE_INLINE static __forceinline
+#  define XXH_NO_INLINE static __declspec(noinline)
+#elif defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))   /* C99 */
+#  define XXH_FORCE_INLINE static inline
+#  define XXH_NO_INLINE static
+#else
+#  define XXH_FORCE_INLINE static
+#  define XXH_NO_INLINE static
+#endif
+
+
+
+/* *************************************
+*  Debug
+***************************************/
+/*!
+ * @ingroup tuning
+ * @def XXH_DEBUGLEVEL
+ * @brief Sets the debugging level.
+ *
+ * XXH_DEBUGLEVEL is expected to be defined externally, typically via the
+ * compiler's command line options. The value must be a number.
+ */
+#ifndef XXH_DEBUGLEVEL
+#  ifdef DEBUGLEVEL /* backwards compat */
+#    define XXH_DEBUGLEVEL DEBUGLEVEL
+#  else
+#    define XXH_DEBUGLEVEL 0
+#  endif
+#endif
+
+#if (XXH_DEBUGLEVEL>=1)
+#  include <assert.h>   /* note: can still be disabled with NDEBUG */
+#  define XXH_ASSERT(c)   assert(c)
+#else
+#  define XXH_ASSERT(c)   ((void)0)
+#endif
+
+/* note: use after variable declarations */
+#ifndef XXH_STATIC_ASSERT
+#  if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)    /* C11 */
+#    include <assert.h>
+#    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
+#  elif defined(__cplusplus) && (__cplusplus >= 201103L)            /* C++11 */
+#    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
+#  else
+#    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0)
+#  endif
+#  define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c)
+#endif
+
+/*!
+ * @internal
+ * @def XXH_COMPILER_GUARD(var)
+ * @brief Used to prevent unwanted optimizations for @p var.
+ *
+ * It uses an empty GCC inline assembly statement with a register constraint
+ * which forces @p var into a general purpose register (eg eax, ebx, ecx
+ * on x86) and marks it as modified.
+ *
+ * This is used in a few places to avoid unwanted autovectorization (e.g.
+ * XXH32_round()). All vectorization we want is explicit via intrinsics,
+ * and _usually_ isn't wanted elsewhere.
+ *
+ * We also use it to prevent unwanted constant folding for AArch64 in
+ * XXH3_initCustomSecret_scalar().
+ */
+#if defined(__GNUC__) || defined(__clang__)
+#  define XXH_COMPILER_GUARD(var) __asm__ __volatile__("" : "+r" (var))
+#else
+#  define XXH_COMPILER_GUARD(var) ((void)0)
+#endif
+
+/* *************************************
+*  Basic Types
+***************************************/
+#if !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+  typedef uint8_t xxh_u8;
+#else
+  typedef unsigned char xxh_u8;
+#endif
+typedef XXH32_hash_t xxh_u32;
+
+#ifdef XXH_OLD_NAMES
+#  define BYTE xxh_u8
+#  define U8   xxh_u8
+#  define U32  xxh_u32
+#endif
+
+/* ***   Memory access   *** */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_read32(const void* ptr)
+ * @brief Reads an unaligned 32-bit integer from @p ptr in native endianness.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ *
+ * @param ptr The pointer to read from.
+ * @return The 32-bit native endian integer from the bytes at @p ptr.
+ */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_readLE32(const void* ptr)
+ * @brief Reads an unaligned 32-bit little endian integer from @p ptr.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ *
+ * @param ptr The pointer to read from.
+ * @return The 32-bit little endian integer from the bytes at @p ptr.
+ */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_readBE32(const void* ptr)
+ * @brief Reads an unaligned 32-bit big endian integer from @p ptr.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ *
+ * @param ptr The pointer to read from.
+ * @return The 32-bit big endian integer from the bytes at @p ptr.
+ */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_readLE32_align(const void* ptr, XXH_alignment align)
+ * @brief Like @ref XXH_readLE32(), but has an option for aligned reads.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ * Note that when @ref XXH_FORCE_ALIGN_CHECK == 0, the @p align parameter is
+ * always @ref XXH_alignment::XXH_unaligned.
+ *
+ * @param ptr The pointer to read from.
+ * @param align Whether @p ptr is aligned.
+ * @pre
+ *   If @p align == @ref XXH_alignment::XXH_aligned, @p ptr must be 4 byte
+ *   aligned.
+ * @return The 32-bit little endian integer from the bytes at @p ptr.
+ */
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
+/*
+ * Manual byteshift. Best for old compilers which don't inline memcpy.
+ * We actually directly use XXH_readLE32 and XXH_readBE32.
+ */
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/*
+ * Force direct memory access. Only works on CPU which support unaligned memory
+ * access in hardware.
+ */
+static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/*
+ * __pack instructions are safer but compiler specific, hence potentially
+ * problematic for some compilers.
+ *
+ * Currently only defined for GCC and ICC.
+ */
+#ifdef XXH_OLD_NAMES
+typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
+#endif
+static xxh_u32 XXH_read32(const void* ptr)
+{
+    typedef union { xxh_u32 u32; } __attribute__((packed)) xxh_unalign;
+    return ((const xxh_unalign*)ptr)->u32;
+}
+
+#else
+
+/*
+ * Portable and safe solution. Generally efficient.
+ * see: http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
+ */
+static xxh_u32 XXH_read32(const void* memPtr)
+{
+    xxh_u32 val;
+    XXH_memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+
+/* ***   Endianness   *** */
+
+/*!
+ * @ingroup tuning
+ * @def XXH_CPU_LITTLE_ENDIAN
+ * @brief Whether the target is little endian.
+ *
+ * Defined to 1 if the target is little endian, or 0 if it is big endian.
+ * It can be defined externally, for example on the compiler command line.
+ *
+ * If it is not defined,
+ * a runtime check (which is usually constant folded) is used instead.
+ *
+ * @note
+ *   This is not necessarily defined to an integer constant.
+ *
+ * @see XXH_isLittleEndian() for the runtime check.
+ */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+/*
+ * Try to detect endianness automatically, to avoid the nonstandard behavior
+ * in `XXH_isLittleEndian()`
+ */
+#  if defined(_WIN32) /* Windows is always little endian */ \
+     || defined(__LITTLE_ENDIAN__) \
+     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#    define XXH_CPU_LITTLE_ENDIAN 1
+#  elif defined(__BIG_ENDIAN__) \
+     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#    define XXH_CPU_LITTLE_ENDIAN 0
+#  else
+/*!
+ * @internal
+ * @brief Runtime check for @ref XXH_CPU_LITTLE_ENDIAN.
+ *
+ * Most compilers will constant fold this.
+ */
+static int XXH_isLittleEndian(void)
+{
+    /*
+     * Portable and well-defined behavior.
+     * Don't use static: it is detrimental to performance.
+     */
+    const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 };
+    return one.c[0];
+}
+#   define XXH_CPU_LITTLE_ENDIAN   XXH_isLittleEndian()
+#  endif
+#endif
+
+
+
+
+/* ****************************************
+*  Compiler-specific Functions and Macros
+******************************************/
+#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+#ifdef __has_builtin
+#  define XXH_HAS_BUILTIN(x) __has_builtin(x)
+#else
+#  define XXH_HAS_BUILTIN(x) 0
+#endif
+
+/*!
+ * @internal
+ * @def XXH_rotl32(x,r)
+ * @brief 32-bit rotate left.
+ *
+ * @param x The 32-bit integer to be rotated.
+ * @param r The number of bits to rotate.
+ * @pre
+ *   @p r > 0 && @p r < 32
+ * @note
+ *   @p x and @p r may be evaluated multiple times.
+ * @return The rotated result.
+ */
+#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \
+                               && XXH_HAS_BUILTIN(__builtin_rotateleft64)
+#  define XXH_rotl32 __builtin_rotateleft32
+#  define XXH_rotl64 __builtin_rotateleft64
+/* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */
+#elif defined(_MSC_VER)
+#  define XXH_rotl32(x,r) _rotl(x,r)
+#  define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+#  define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
+#  define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
+#endif
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_swap32(xxh_u32 x)
+ * @brief A 32-bit byteswap.
+ *
+ * @param x The 32-bit integer to byteswap.
+ * @return @p x, byteswapped.
+ */
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXH_swap32 _byteswap_ulong
+#elif XXH_GCC_VERSION >= 403
+#  define XXH_swap32 __builtin_bswap32
+#else
+static xxh_u32 XXH_swap32 (xxh_u32 x)
+{
+    return  ((x << 24) & 0xff000000 ) |
+            ((x <<  8) & 0x00ff0000 ) |
+            ((x >>  8) & 0x0000ff00 ) |
+            ((x >> 24) & 0x000000ff );
+}
+#endif
+
+
+/* ***************************
+*  Memory reads
+*****************************/
+
+/*!
+ * @internal
+ * @brief Enum to indicate whether a pointer is aligned.
+ */
+typedef enum {
+    XXH_aligned,  /*!< Aligned */
+    XXH_unaligned /*!< Possibly unaligned */
+} XXH_alignment;
+
+/*
+ * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load.
+ *
+ * This is ideal for older compilers which don't inline memcpy.
+ */
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
+
+XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr)
+{
+    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
+    return bytePtr[0]
+         | ((xxh_u32)bytePtr[1] << 8)
+         | ((xxh_u32)bytePtr[2] << 16)
+         | ((xxh_u32)bytePtr[3] << 24);
+}
+
+XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr)
+{
+    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
+    return bytePtr[3]
+         | ((xxh_u32)bytePtr[2] << 8)
+         | ((xxh_u32)bytePtr[1] << 16)
+         | ((xxh_u32)bytePtr[0] << 24);
+}
+
+#else
+XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
+}
+
+static xxh_u32 XXH_readBE32(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
+}
+#endif
+
+XXH_FORCE_INLINE xxh_u32
+XXH_readLE32_align(const void* ptr, XXH_alignment align)
+{
+    if (align==XXH_unaligned) {
+        return XXH_readLE32(ptr);
+    } else {
+        return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
+    }
+}
+
+
+/* *************************************
+*  Misc
+***************************************/
+/*! @ingroup public */
+XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
+
+
+/* *******************************************************************
+*  32-bit hash functions
+*********************************************************************/
+/*!
+ * @}
+ * @defgroup xxh32_impl XXH32 implementation
+ * @ingroup impl
+ * @{
+ */
+ /* #define instead of static const, to be used as initializers */
+#define XXH_PRIME32_1  0x9E3779B1U  /*!< 0b10011110001101110111100110110001 */
+#define XXH_PRIME32_2  0x85EBCA77U  /*!< 0b10000101111010111100101001110111 */
+#define XXH_PRIME32_3  0xC2B2AE3DU  /*!< 0b11000010101100101010111000111101 */
+#define XXH_PRIME32_4  0x27D4EB2FU  /*!< 0b00100111110101001110101100101111 */
+#define XXH_PRIME32_5  0x165667B1U  /*!< 0b00010110010101100110011110110001 */
+
+#ifdef XXH_OLD_NAMES
+#  define PRIME32_1 XXH_PRIME32_1
+#  define PRIME32_2 XXH_PRIME32_2
+#  define PRIME32_3 XXH_PRIME32_3
+#  define PRIME32_4 XXH_PRIME32_4
+#  define PRIME32_5 XXH_PRIME32_5
+#endif
+
+/*!
+ * @internal
+ * @brief Normal stripe processing routine.
+ *
+ * This shuffles the bits so that any bit from @p input impacts several bits in
+ * @p acc.
+ *
+ * @param acc The accumulator lane.
+ * @param input The stripe of input to mix.
+ * @return The mixed accumulator lane.
+ */
+static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
+{
+    acc += input * XXH_PRIME32_2;
+    acc  = XXH_rotl32(acc, 13);
+    acc *= XXH_PRIME32_1;
+#if (defined(__SSE4_1__) || defined(__aarch64__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
+    /*
+     * UGLY HACK:
+     * A compiler fence is the only thing that prevents GCC and Clang from
+     * autovectorizing the XXH32 loop (pragmas and attributes don't work for some
+     * reason) without globally disabling SSE4.1.
+     *
+     * The reason we want to avoid vectorization is because despite working on
+     * 4 integers at a time, there are multiple factors slowing XXH32 down on
+     * SSE4:
+     * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on
+     *   newer chips!) making it slightly slower to multiply four integers at
+     *   once compared to four integers independently. Even when pmulld was
+     *   fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE
+     *   just to multiply unless doing a long operation.
+     *
+     * - Four instructions are required to rotate,
+     *      movqda tmp,  v // not required with VEX encoding
+     *      pslld  tmp, 13 // tmp <<= 13
+     *      psrld  v,   19 // x >>= 19
+     *      por    v,  tmp // x |= tmp
+     *   compared to one for scalar:
+     *      roll   v, 13    // reliably fast across the board
+     *      shldl  v, v, 13 // Sandy Bridge and later prefer this for some reason
+     *
+     * - Instruction level parallelism is actually more beneficial here because
+     *   the SIMD actually serializes this operation: While v1 is rotating, v2
+     *   can load data, while v3 can multiply. SSE forces them to operate
+     *   together.
+     *
+     * This is also enabled on AArch64, as Clang autovectorizes it incorrectly
+     * and it is pointless writing a NEON implementation that is basically the
+     * same speed as scalar for XXH32.
+     */
+    XXH_COMPILER_GUARD(acc);
+#endif
+    return acc;
+}
+
+/*!
+ * @internal
+ * @brief Mixes all bits to finalize the hash.
+ *
+ * The final mix ensures that all input bits have a chance to impact any bit in
+ * the output digest, resulting in an unbiased distribution.
+ *
+ * @param h32 The hash to avalanche.
+ * @return The avalanched hash.
+ */
+static xxh_u32 XXH32_avalanche(xxh_u32 h32)
+{
+    h32 ^= h32 >> 15;
+    h32 *= XXH_PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= XXH_PRIME32_3;
+    h32 ^= h32 >> 16;
+    return(h32);
+}
+
+#define XXH_get32bits(p) XXH_readLE32_align(p, align)
+
+/*!
+ * @internal
+ * @brief Processes the last 0-15 bytes of @p ptr.
+ *
+ * There may be up to 15 bytes remaining to consume from the input.
+ * This final stage will digest them to ensure that all input bytes are present
+ * in the final mix.
+ *
+ * @param h32 The hash to finalize.
+ * @param ptr The pointer to the remaining input.
+ * @param len The remaining length, modulo 16.
+ * @param align Whether @p ptr is aligned.
+ * @return The finalized hash.
+ */
+static xxh_u32
+XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
+{
+#define XXH_PROCESS1 do {                           \
+    h32 += (*ptr++) * XXH_PRIME32_5;                \
+    h32 = XXH_rotl32(h32, 11) * XXH_PRIME32_1;      \
+} while (0)
+
+#define XXH_PROCESS4 do {                           \
+    h32 += XXH_get32bits(ptr) * XXH_PRIME32_3;      \
+    ptr += 4;                                   \
+    h32  = XXH_rotl32(h32, 17) * XXH_PRIME32_4;     \
+} while (0)
+
+    if (ptr==NULL) XXH_ASSERT(len == 0);
+
+    /* Compact rerolled version; generally faster */
+    if (!XXH32_ENDJMP) {
+        len &= 15;
+        while (len >= 4) {
+            XXH_PROCESS4;
+            len -= 4;
+        }
+        while (len > 0) {
+            XXH_PROCESS1;
+            --len;
+        }
+        return XXH32_avalanche(h32);
+    } else {
+         switch(len&15) /* or switch(bEnd - p) */ {
+           case 12:      XXH_PROCESS4;
+                         XXH_FALLTHROUGH;
+           case 8:       XXH_PROCESS4;
+                         XXH_FALLTHROUGH;
+           case 4:       XXH_PROCESS4;
+                         return XXH32_avalanche(h32);
+
+           case 13:      XXH_PROCESS4;
+                         XXH_FALLTHROUGH;
+           case 9:       XXH_PROCESS4;
+                         XXH_FALLTHROUGH;
+           case 5:       XXH_PROCESS4;
+                         XXH_PROCESS1;
+                         return XXH32_avalanche(h32);
+
+           case 14:      XXH_PROCESS4;
+                         XXH_FALLTHROUGH;
+           case 10:      XXH_PROCESS4;
+                         XXH_FALLTHROUGH;
+           case 6:       XXH_PROCESS4;
+                         XXH_PROCESS1;
+                         XXH_PROCESS1;
+                         return XXH32_avalanche(h32);
+
+           case 15:      XXH_PROCESS4;
+                         XXH_FALLTHROUGH;
+           case 11:      XXH_PROCESS4;
+                         XXH_FALLTHROUGH;
+           case 7:       XXH_PROCESS4;
+                         XXH_FALLTHROUGH;
+           case 3:       XXH_PROCESS1;
+                         XXH_FALLTHROUGH;
+           case 2:       XXH_PROCESS1;
+                         XXH_FALLTHROUGH;
+           case 1:       XXH_PROCESS1;
+                         XXH_FALLTHROUGH;
+           case 0:       return XXH32_avalanche(h32);
+        }
+        XXH_ASSERT(0);
+        return h32;   /* reaching this point is deemed impossible */
+    }
+}
+
+#ifdef XXH_OLD_NAMES
+#  define PROCESS1 XXH_PROCESS1
+#  define PROCESS4 XXH_PROCESS4
+#else
+#  undef XXH_PROCESS1
+#  undef XXH_PROCESS4
+#endif
+
+/*!
+ * @internal
+ * @brief The implementation for @ref XXH32().
+ *
+ * @param input , len , seed Directly passed from @ref XXH32().
+ * @param align Whether @p input is aligned.
+ * @return The calculated hash.
+ */
+XXH_FORCE_INLINE xxh_u32
+XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
+{
+    xxh_u32 h32;
+
+    if (input==NULL) XXH_ASSERT(len == 0);
+
+    if (len>=16) {
+        const xxh_u8* const bEnd = input + len;
+        const xxh_u8* const limit = bEnd - 15;
+        xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
+        xxh_u32 v2 = seed + XXH_PRIME32_2;
+        xxh_u32 v3 = seed + 0;
+        xxh_u32 v4 = seed - XXH_PRIME32_1;
+
+        do {
+            v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;
+            v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;
+            v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;
+            v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;
+        } while (input < limit);
+
+        h32 = XXH_rotl32(v1, 1)  + XXH_rotl32(v2, 7)
+            + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+    } else {
+        h32  = seed + XXH_PRIME32_5;
+    }
+
+    h32 += (xxh_u32)len;
+
+    return XXH32_finalize(h32, input, len&15, align);
+}
+
+/*! @ingroup xxh32_family */
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH32_state_t state;
+    XXH32_reset(&state, seed);
+    XXH32_update(&state, (const xxh_u8*)input, len);
+    return XXH32_digest(&state);
+#else
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
+            return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
+    }   }
+
+    return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
+#endif
+}
+
+
+
+/*******   Hash streaming   *******/
+/*!
+ * @ingroup xxh32_family
+ */
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
+{
+    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+/*! @ingroup xxh32_family */
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+/*! @ingroup xxh32_family */
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
+{
+    XXH_memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+/*! @ingroup xxh32_family */
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
+{
+    XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+    memset(&state, 0, sizeof(state));
+    state.v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
+    state.v[1] = seed + XXH_PRIME32_2;
+    state.v[2] = seed + 0;
+    state.v[3] = seed - XXH_PRIME32_1;
+    /* do not write into reserved, planned to be removed in a future version */
+    XXH_memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
+    return XXH_OK;
+}
+
+
+/*! @ingroup xxh32_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH32_update(XXH32_state_t* state, const void* input, size_t len)
+{
+    if (input==NULL) {
+        XXH_ASSERT(len == 0);
+        return XXH_OK;
+    }
+
+    {   const xxh_u8* p = (const xxh_u8*)input;
+        const xxh_u8* const bEnd = p + len;
+
+        state->total_len_32 += (XXH32_hash_t)len;
+        state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
+
+        if (state->memsize + len < 16)  {   /* fill in tmp buffer */
+            XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);
+            state->memsize += (XXH32_hash_t)len;
+            return XXH_OK;
+        }
+
+        if (state->memsize) {   /* some data left from previous update */
+            XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
+            {   const xxh_u32* p32 = state->mem32;
+                state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++;
+                state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++;
+                state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++;
+                state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32));
+            }
+            p += 16-state->memsize;
+            state->memsize = 0;
+        }
+
+        if (p <= bEnd-16) {
+            const xxh_u8* const limit = bEnd - 16;
+
+            do {
+                state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4;
+                state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4;
+                state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4;
+                state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4;
+            } while (p<=limit);
+
+        }
+
+        if (p < bEnd) {
+            XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
+            state->memsize = (unsigned)(bEnd-p);
+        }
+    }
+
+    return XXH_OK;
+}
+
+
+/*! @ingroup xxh32_family */
+XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
+{
+    xxh_u32 h32;
+
+    if (state->large_len) {
+        h32 = XXH_rotl32(state->v[0], 1)
+            + XXH_rotl32(state->v[1], 7)
+            + XXH_rotl32(state->v[2], 12)
+            + XXH_rotl32(state->v[3], 18);
+    } else {
+        h32 = state->v[2] /* == seed */ + XXH_PRIME32_5;
+    }
+
+    h32 += state->total_len_32;
+
+    return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
+}
+
+
+/*******   Canonical representation   *******/
+
+/*!
+ * @ingroup xxh32_family
+ * The default return values from XXH functions are unsigned 32 and 64 bit
+ * integers.
+ *
+ * The canonical representation uses big endian convention, the same convention
+ * as human-readable numbers (large digits first).
+ *
+ * This way, hash values can be written into a file or buffer, remaining
+ * comparable across different systems.
+ *
+ * The following functions allow transformation of hash values to and from their
+ * canonical format.
+ */
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
+    XXH_memcpy(dst, &hash, sizeof(*dst));
+}
+/*! @ingroup xxh32_family */
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
+{
+    return XXH_readBE32(src);
+}
+
+
+#ifndef XXH_NO_LONG_LONG
+
+/* *******************************************************************
+*  64-bit hash functions
+*********************************************************************/
+/*!
+ * @}
+ * @ingroup impl
+ * @{
+ */
+/*******   Memory access   *******/
+
+typedef XXH64_hash_t xxh_u64;
+
+#ifdef XXH_OLD_NAMES
+#  define U64 xxh_u64
+#endif
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
+/*
+ * Manual byteshift. Best for old compilers which don't inline memcpy.
+ * We actually directly use XXH_readLE64 and XXH_readBE64.
+ */
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static xxh_u64 XXH_read64(const void* memPtr)
+{
+    return *(const xxh_u64*) memPtr;
+}
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/*
+ * __pack instructions are safer, but compiler specific, hence potentially
+ * problematic for some compilers.
+ *
+ * Currently only defined for GCC and ICC.
+ */
+#ifdef XXH_OLD_NAMES
+typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
+#endif
+static xxh_u64 XXH_read64(const void* ptr)
+{
+    typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) xxh_unalign64;
+    return ((const xxh_unalign64*)ptr)->u64;
+}
+
+#else
+
+/*
+ * Portable and safe solution. Generally efficient.
+ * see: http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
+ */
+static xxh_u64 XXH_read64(const void* memPtr)
+{
+    xxh_u64 val;
+    XXH_memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXH_swap64 _byteswap_uint64
+#elif XXH_GCC_VERSION >= 403
+#  define XXH_swap64 __builtin_bswap64
+#else
+static xxh_u64 XXH_swap64(xxh_u64 x)
+{
+    return  ((x << 56) & 0xff00000000000000ULL) |
+            ((x << 40) & 0x00ff000000000000ULL) |
+            ((x << 24) & 0x0000ff0000000000ULL) |
+            ((x << 8)  & 0x000000ff00000000ULL) |
+            ((x >> 8)  & 0x00000000ff000000ULL) |
+            ((x >> 24) & 0x0000000000ff0000ULL) |
+            ((x >> 40) & 0x000000000000ff00ULL) |
+            ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+
+/* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
+
+XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr)
+{
+    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
+    return bytePtr[0]
+         | ((xxh_u64)bytePtr[1] << 8)
+         | ((xxh_u64)bytePtr[2] << 16)
+         | ((xxh_u64)bytePtr[3] << 24)
+         | ((xxh_u64)bytePtr[4] << 32)
+         | ((xxh_u64)bytePtr[5] << 40)
+         | ((xxh_u64)bytePtr[6] << 48)
+         | ((xxh_u64)bytePtr[7] << 56);
+}
+
+XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr)
+{
+    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
+    return bytePtr[7]
+         | ((xxh_u64)bytePtr[6] << 8)
+         | ((xxh_u64)bytePtr[5] << 16)
+         | ((xxh_u64)bytePtr[4] << 24)
+         | ((xxh_u64)bytePtr[3] << 32)
+         | ((xxh_u64)bytePtr[2] << 40)
+         | ((xxh_u64)bytePtr[1] << 48)
+         | ((xxh_u64)bytePtr[0] << 56);
+}
+
+#else
+XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+}
+
+static xxh_u64 XXH_readBE64(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
+}
+#endif
+
+XXH_FORCE_INLINE xxh_u64
+XXH_readLE64_align(const void* ptr, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return XXH_readLE64(ptr);
+    else
+        return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
+}
+
+
+/*******   xxh64   *******/
+/*!
+ * @}
+ * @defgroup xxh64_impl XXH64 implementation
+ * @ingroup impl
+ * @{
+ */
+/* #define rather that static const, to be used as initializers */
+#define XXH_PRIME64_1  0x9E3779B185EBCA87ULL  /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */
+#define XXH_PRIME64_2  0xC2B2AE3D27D4EB4FULL  /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */
+#define XXH_PRIME64_3  0x165667B19E3779F9ULL  /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */
+#define XXH_PRIME64_4  0x85EBCA77C2B2AE63ULL  /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */
+#define XXH_PRIME64_5  0x27D4EB2F165667C5ULL  /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */
+
+#ifdef XXH_OLD_NAMES
+#  define PRIME64_1 XXH_PRIME64_1
+#  define PRIME64_2 XXH_PRIME64_2
+#  define PRIME64_3 XXH_PRIME64_3
+#  define PRIME64_4 XXH_PRIME64_4
+#  define PRIME64_5 XXH_PRIME64_5
+#endif
+
+static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
+{
+    acc += input * XXH_PRIME64_2;
+    acc  = XXH_rotl64(acc, 31);
+    acc *= XXH_PRIME64_1;
+    return acc;
+}
+
+static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
+{
+    val  = XXH64_round(0, val);
+    acc ^= val;
+    acc  = acc * XXH_PRIME64_1 + XXH_PRIME64_4;
+    return acc;
+}
+
+static xxh_u64 XXH64_avalanche(xxh_u64 h64)
+{
+    h64 ^= h64 >> 33;
+    h64 *= XXH_PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= XXH_PRIME64_3;
+    h64 ^= h64 >> 32;
+    return h64;
+}
+
+
+#define XXH_get64bits(p) XXH_readLE64_align(p, align)
+
+static xxh_u64
+XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
+{
+    if (ptr==NULL) XXH_ASSERT(len == 0);
+    len &= 31;
+    while (len >= 8) {
+        xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
+        ptr += 8;
+        h64 ^= k1;
+        h64  = XXH_rotl64(h64,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
+        len -= 8;
+    }
+    if (len >= 4) {
+        h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
+        ptr += 4;
+        h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
+        len -= 4;
+    }
+    while (len > 0) {
+        h64 ^= (*ptr++) * XXH_PRIME64_5;
+        h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1;
+        --len;
+    }
+    return  XXH64_avalanche(h64);
+}
+
+#ifdef XXH_OLD_NAMES
+#  define PROCESS1_64 XXH_PROCESS1_64
+#  define PROCESS4_64 XXH_PROCESS4_64
+#  define PROCESS8_64 XXH_PROCESS8_64
+#else
+#  undef XXH_PROCESS1_64
+#  undef XXH_PROCESS4_64
+#  undef XXH_PROCESS8_64
+#endif
+
+XXH_FORCE_INLINE xxh_u64
+XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
+{
+    xxh_u64 h64;
+    if (input==NULL) XXH_ASSERT(len == 0);
+
+    if (len>=32) {
+        const xxh_u8* const bEnd = input + len;
+        const xxh_u8* const limit = bEnd - 31;
+        xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
+        xxh_u64 v2 = seed + XXH_PRIME64_2;
+        xxh_u64 v3 = seed + 0;
+        xxh_u64 v4 = seed - XXH_PRIME64_1;
+
+        do {
+            v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;
+            v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
+            v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
+            v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
+        } while (input<limit);
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+        h64 = XXH64_mergeRound(h64, v1);
+        h64 = XXH64_mergeRound(h64, v2);
+        h64 = XXH64_mergeRound(h64, v3);
+        h64 = XXH64_mergeRound(h64, v4);
+
+    } else {
+        h64  = seed + XXH_PRIME64_5;
+    }
+
+    h64 += (xxh_u64) len;
+
+    return XXH64_finalize(h64, input, len, align);
+}
+
+
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH64_state_t state;
+    XXH64_reset(&state, seed);
+    XXH64_update(&state, (const xxh_u8*)input, len);
+    return XXH64_digest(&state);
+#else
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
+            return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
+    }   }
+
+    return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
+
+#endif
+}
+
+/*******   Hash Streaming   *******/
+
+/*! @ingroup xxh64_family*/
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
+{
+    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
+{
+    XXH_memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
+{
+    XXH64_state_t state;   /* use a local state to memcpy() in order to avoid strict-aliasing warnings */
+    memset(&state, 0, sizeof(state));
+    state.v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
+    state.v[1] = seed + XXH_PRIME64_2;
+    state.v[2] = seed + 0;
+    state.v[3] = seed - XXH_PRIME64_1;
+     /* do not write into reserved64, might be removed in a future version */
+    XXH_memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64));
+    return XXH_OK;
+}
+
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH64_update (XXH64_state_t* state, const void* input, size_t len)
+{
+    if (input==NULL) {
+        XXH_ASSERT(len == 0);
+        return XXH_OK;
+    }
+
+    {   const xxh_u8* p = (const xxh_u8*)input;
+        const xxh_u8* const bEnd = p + len;
+
+        state->total_len += len;
+
+        if (state->memsize + len < 32) {  /* fill in tmp buffer */
+            XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);
+            state->memsize += (xxh_u32)len;
+            return XXH_OK;
+        }
+
+        if (state->memsize) {   /* tmp buffer is full */
+            XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
+            state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0));
+            state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1));
+            state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2));
+            state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3));
+            p += 32 - state->memsize;
+            state->memsize = 0;
+        }
+
+        if (p+32 <= bEnd) {
+            const xxh_u8* const limit = bEnd - 32;
+
+            do {
+                state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8;
+                state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8;
+                state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8;
+                state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8;
+            } while (p<=limit);
+
+        }
+
+        if (p < bEnd) {
+            XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
+            state->memsize = (unsigned)(bEnd-p);
+        }
+    }
+
+    return XXH_OK;
+}
+
+
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* state)
+{
+    xxh_u64 h64;
+
+    if (state->total_len >= 32) {
+        h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18);
+        h64 = XXH64_mergeRound(h64, state->v[0]);
+        h64 = XXH64_mergeRound(h64, state->v[1]);
+        h64 = XXH64_mergeRound(h64, state->v[2]);
+        h64 = XXH64_mergeRound(h64, state->v[3]);
+    } else {
+        h64  = state->v[2] /*seed*/ + XXH_PRIME64_5;
+    }
+
+    h64 += (xxh_u64) state->total_len;
+
+    return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
+}
+
+
+/******* Canonical representation   *******/
+
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
+    XXH_memcpy(dst, &hash, sizeof(*dst));
+}
+
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
+{
+    return XXH_readBE64(src);
+}
+
+#ifndef XXH_NO_XXH3
+
+/* *********************************************************************
+*  XXH3
+*  New generation hash designed for speed on small keys and vectorization
+************************************************************************ */
+/*!
+ * @}
+ * @defgroup xxh3_impl XXH3 implementation
+ * @ingroup impl
+ * @{
+ */
+
+/* ===   Compiler specifics   === */
+
+#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */
+#  define XXH_RESTRICT /* disable */
+#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* >= C99 */
+#  define XXH_RESTRICT   restrict
+#else
+/* Note: it might be useful to define __restrict or __restrict__ for some C++ compilers */
+#  define XXH_RESTRICT   /* disable */
+#endif
+
+#if (defined(__GNUC__) && (__GNUC__ >= 3))  \
+  || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \
+  || defined(__clang__)
+#    define XXH_likely(x) __builtin_expect(x, 1)
+#    define XXH_unlikely(x) __builtin_expect(x, 0)
+#else
+#    define XXH_likely(x) (x)
+#    define XXH_unlikely(x) (x)
+#endif
+
+#if defined(__GNUC__)
+#  if defined(__AVX2__)
+#    include <immintrin.h>
+#  elif defined(__SSE2__)
+#    include <emmintrin.h>
+#  elif defined(__ARM_NEON__) || defined(__ARM_NEON)
+#    define inline __inline__  /* circumvent a clang bug */
+#    include <arm_neon.h>
+#    undef inline
+#  endif
+#elif defined(_MSC_VER)
+#  include <intrin.h>
+#endif
+
+/*
+ * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while
+ * remaining a true 64-bit/128-bit hash function.
+ *
+ * This is done by prioritizing a subset of 64-bit operations that can be
+ * emulated without too many steps on the average 32-bit machine.
+ *
+ * For example, these two lines seem similar, and run equally fast on 64-bit:
+ *
+ *   xxh_u64 x;
+ *   x ^= (x >> 47); // good
+ *   x ^= (x >> 13); // bad
+ *
+ * However, to a 32-bit machine, there is a major difference.
+ *
+ * x ^= (x >> 47) looks like this:
+ *
+ *   x.lo ^= (x.hi >> (47 - 32));
+ *
+ * while x ^= (x >> 13) looks like this:
+ *
+ *   // note: funnel shifts are not usually cheap.
+ *   x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13));
+ *   x.hi ^= (x.hi >> 13);
+ *
+ * The first one is significantly faster than the second, simply because the
+ * shift is larger than 32. This means:
+ *  - All the bits we need are in the upper 32 bits, so we can ignore the lower
+ *    32 bits in the shift.
+ *  - The shift result will always fit in the lower 32 bits, and therefore,
+ *    we can ignore the upper 32 bits in the xor.
+ *
+ * Thanks to this optimization, XXH3 only requires these features to be efficient:
+ *
+ *  - Usable unaligned access
+ *  - A 32-bit or 64-bit ALU
+ *      - If 32-bit, a decent ADC instruction
+ *  - A 32 or 64-bit multiply with a 64-bit result
+ *  - For the 128-bit variant, a decent byteswap helps short inputs.
+ *
+ * The first two are already required by XXH32, and almost all 32-bit and 64-bit
+ * platforms which can run XXH32 can run XXH3 efficiently.
+ *
+ * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one
+ * notable exception.
+ *
+ * First of all, Thumb-1 lacks support for the UMULL instruction which
+ * performs the important long multiply. This means numerous __aeabi_lmul
+ * calls.
+ *
+ * Second of all, the 8 functional registers are just not enough.
+ * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need
+ * Lo registers, and this shuffling results in thousands more MOVs than A32.
+ *
+ * A32 and T32 don't have this limitation. They can access all 14 registers,
+ * do a 32->64 multiply with UMULL, and the flexible operand allowing free
+ * shifts is helpful, too.
+ *
+ * Therefore, we do a quick sanity check.
+ *
+ * If compiling Thumb-1 for a target which supports ARM instructions, we will
+ * emit a warning, as it is not a "sane" platform to compile for.
+ *
+ * Usually, if this happens, it is because of an accident and you probably need
+ * to specify -march, as you likely meant to compile for a newer architecture.
+ *
+ * Credit: large sections of the vectorial and asm source code paths
+ *         have been contributed by @easyaspi314
+ */
+#if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)
+#   warning "XXH3 is highly inefficient without ARM or Thumb-2."
+#endif
+
+/* ==========================================
+ * Vectorization detection
+ * ========================================== */
+
+#ifdef XXH_DOXYGEN
+/*!
+ * @ingroup tuning
+ * @brief Overrides the vectorization implementation chosen for XXH3.
+ *
+ * Can be defined to 0 to disable SIMD or any of the values mentioned in
+ * @ref XXH_VECTOR_TYPE.
+ *
+ * If this is not defined, it uses predefined macros to determine the best
+ * implementation.
+ */
+#  define XXH_VECTOR XXH_SCALAR
+/*!
+ * @ingroup tuning
+ * @brief Possible values for @ref XXH_VECTOR.
+ *
+ * Note that these are actually implemented as macros.
+ *
+ * If this is not defined, it is detected automatically.
+ * @ref XXH_X86DISPATCH overrides this.
+ */
+enum XXH_VECTOR_TYPE /* fake enum */ {
+    XXH_SCALAR = 0,  /*!< Portable scalar version */
+    XXH_SSE2   = 1,  /*!<
+                      * SSE2 for Pentium 4, Opteron, all x86_64.
+                      *
+                      * @note SSE2 is also guaranteed on Windows 10, macOS, and
+                      * Android x86.
+                      */
+    XXH_AVX2   = 2,  /*!< AVX2 for Haswell and Bulldozer */
+    XXH_AVX512 = 3,  /*!< AVX512 for Skylake and Icelake */
+    XXH_NEON   = 4,  /*!< NEON for most ARMv7-A and all AArch64 */
+    XXH_VSX    = 5,  /*!< VSX and ZVector for POWER8/z13 (64-bit) */
+};
+/*!
+ * @ingroup tuning
+ * @brief Selects the minimum alignment for XXH3's accumulators.
+ *
+ * When using SIMD, this should match the alignment reqired for said vector
+ * type, so, for example, 32 for AVX2.
+ *
+ * Default: Auto detected.
+ */
+#  define XXH_ACC_ALIGN 8
+#endif
+
+/* Actual definition */
+#ifndef XXH_DOXYGEN
+#  define XXH_SCALAR 0
+#  define XXH_SSE2   1
+#  define XXH_AVX2   2
+#  define XXH_AVX512 3
+#  define XXH_NEON   4
+#  define XXH_VSX    5
+#endif
+
+#ifndef XXH_VECTOR    /* can be defined on command line */
+#  if defined(__AVX512F__)
+#    define XXH_VECTOR XXH_AVX512
+#  elif defined(__AVX2__)
+#    define XXH_VECTOR XXH_AVX2
+#  elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
+#    define XXH_VECTOR XXH_SSE2
+#  elif ( \
+        defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
+     || defined(_M_ARM64) || defined(_M_ARM_ARMV7VE) /* msvc */ \
+   ) && ( \
+        defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
+    || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
+   )
+#    define XXH_VECTOR XXH_NEON
+#  elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
+     || (defined(__s390x__) && defined(__VEC__)) \
+     && defined(__GNUC__) /* TODO: IBM XL */
+#    define XXH_VECTOR XXH_VSX
+#  else
+#    define XXH_VECTOR XXH_SCALAR
+#  endif
+#endif
+
+/*
+ * Controls the alignment of the accumulator,
+ * for compatibility with aligned vector loads, which are usually faster.
+ */
+#ifndef XXH_ACC_ALIGN
+#  if defined(XXH_X86DISPATCH)
+#     define XXH_ACC_ALIGN 64  /* for compatibility with avx512 */
+#  elif XXH_VECTOR == XXH_SCALAR  /* scalar */
+#     define XXH_ACC_ALIGN 8
+#  elif XXH_VECTOR == XXH_SSE2  /* sse2 */
+#     define XXH_ACC_ALIGN 16
+#  elif XXH_VECTOR == XXH_AVX2  /* avx2 */
+#     define XXH_ACC_ALIGN 32
+#  elif XXH_VECTOR == XXH_NEON  /* neon */
+#     define XXH_ACC_ALIGN 16
+#  elif XXH_VECTOR == XXH_VSX   /* vsx */
+#     define XXH_ACC_ALIGN 16
+#  elif XXH_VECTOR == XXH_AVX512  /* avx512 */
+#     define XXH_ACC_ALIGN 64
+#  endif
+#endif
+
+#if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
+    || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
+#  define XXH_SEC_ALIGN XXH_ACC_ALIGN
+#else
+#  define XXH_SEC_ALIGN 8
+#endif
+
+/*
+ * UGLY HACK:
+ * GCC usually generates the best code with -O3 for xxHash.
+ *
+ * However, when targeting AVX2, it is overzealous in its unrolling resulting
+ * in code roughly 3/4 the speed of Clang.
+ *
+ * There are other issues, such as GCC splitting _mm256_loadu_si256 into
+ * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which
+ * only applies to Sandy and Ivy Bridge... which don't even support AVX2.
+ *
+ * That is why when compiling the AVX2 version, it is recommended to use either
+ *   -O2 -mavx2 -march=haswell
+ * or
+ *   -O2 -mavx2 -mno-avx256-split-unaligned-load
+ * for decent performance, or to use Clang instead.
+ *
+ * Fortunately, we can control the first one with a pragma that forces GCC into
+ * -O2, but the other one we can't control without "failed to inline always
+ * inline function due to target mismatch" warnings.
+ */
+#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
+  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
+  && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
+#  pragma GCC push_options
+#  pragma GCC optimize("-O2")
+#endif
+
+
+#if XXH_VECTOR == XXH_NEON
+/*
+ * NEON's setup for vmlal_u32 is a little more complicated than it is on
+ * SSE2, AVX2, and VSX.
+ *
+ * While PMULUDQ and VMULEUW both perform a mask, VMLAL.U32 performs an upcast.
+ *
+ * To do the same operation, the 128-bit 'Q' register needs to be split into
+ * two 64-bit 'D' registers, performing this operation::
+ *
+ *   [                a                 |                 b                ]
+ *            |              '---------. .--------'                |
+ *            |                         x                          |
+ *            |              .---------' '--------.                |
+ *   [ a & 0xFFFFFFFF | b & 0xFFFFFFFF ],[    a >> 32     |     b >> 32    ]
+ *
+ * Due to significant changes in aarch64, the fastest method for aarch64 is
+ * completely different than the fastest method for ARMv7-A.
+ *
+ * ARMv7-A treats D registers as unions overlaying Q registers, so modifying
+ * D11 will modify the high half of Q5. This is similar to how modifying AH
+ * will only affect bits 8-15 of AX on x86.
+ *
+ * VZIP takes two registers, and puts even lanes in one register and odd lanes
+ * in the other.
+ *
+ * On ARMv7-A, this strangely modifies both parameters in place instead of
+ * taking the usual 3-operand form.
+ *
+ * Therefore, if we want to do this, we can simply use a D-form VZIP.32 on the
+ * lower and upper halves of the Q register to end up with the high and low
+ * halves where we want - all in one instruction.
+ *
+ *   vzip.32   d10, d11       @ d10 = { d10[0], d11[0] }; d11 = { d10[1], d11[1] }
+ *
+ * Unfortunately we need inline assembly for this: Instructions modifying two
+ * registers at once is not possible in GCC or Clang's IR, and they have to
+ * create a copy.
+ *
+ * aarch64 requires a different approach.
+ *
+ * In order to make it easier to write a decent compiler for aarch64, many
+ * quirks were removed, such as conditional execution.
+ *
+ * NEON was also affected by this.
+ *
+ * aarch64 cannot access the high bits of a Q-form register, and writes to a
+ * D-form register zero the high bits, similar to how writes to W-form scalar
+ * registers (or DWORD registers on x86_64) work.
+ *
+ * The formerly free vget_high intrinsics now require a vext (with a few
+ * exceptions)
+ *
+ * Additionally, VZIP was replaced by ZIP1 and ZIP2, which are the equivalent
+ * of PUNPCKL* and PUNPCKH* in SSE, respectively, in order to only modify one
+ * operand.
+ *
+ * The equivalent of the VZIP.32 on the lower and upper halves would be this
+ * mess:
+ *
+ *   ext     v2.4s, v0.4s, v0.4s, #2 // v2 = { v0[2], v0[3], v0[0], v0[1] }
+ *   zip1    v1.2s, v0.2s, v2.2s     // v1 = { v0[0], v2[0] }
+ *   zip2    v0.2s, v0.2s, v1.2s     // v0 = { v0[1], v2[1] }
+ *
+ * Instead, we use a literal downcast, vmovn_u64 (XTN), and vshrn_n_u64 (SHRN):
+ *
+ *   shrn    v1.2s, v0.2d, #32  // v1 = (uint32x2_t)(v0 >> 32);
+ *   xtn     v0.2s, v0.2d       // v0 = (uint32x2_t)(v0 & 0xFFFFFFFF);
+ *
+ * This is available on ARMv7-A, but is less efficient than a single VZIP.32.
+ */
+
+/*!
+ * Function-like macro:
+ * void XXH_SPLIT_IN_PLACE(uint64x2_t &in, uint32x2_t &outLo, uint32x2_t &outHi)
+ * {
+ *     outLo = (uint32x2_t)(in & 0xFFFFFFFF);
+ *     outHi = (uint32x2_t)(in >> 32);
+ *     in = UNDEFINED;
+ * }
+ */
+# if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \
+   && defined(__GNUC__) \
+   && !defined(__aarch64__) && !defined(__arm64__) && !defined(_M_ARM64)
+#  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                              \
+    do {                                                                                    \
+      /* Undocumented GCC/Clang operand modifier: %e0 = lower D half, %f0 = upper D half */ \
+      /* https://github.com/gcc-mirror/gcc/blob/38cf91e5/gcc/config/arm/arm.c#L22486 */     \
+      /* https://github.com/llvm-mirror/llvm/blob/2c4ca683/lib/Target/ARM/ARMAsmPrinter.cpp#L399 */ \
+      __asm__("vzip.32  %e0, %f0" : "+w" (in));                                             \
+      (outLo) = vget_low_u32 (vreinterpretq_u32_u64(in));                                   \
+      (outHi) = vget_high_u32(vreinterpretq_u32_u64(in));                                   \
+   } while (0)
+# else
+#  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                            \
+    do {                                                                                  \
+      (outLo) = vmovn_u64    (in);                                                        \
+      (outHi) = vshrn_n_u64  ((in), 32);                                                  \
+    } while (0)
+# endif
+#endif  /* XXH_VECTOR == XXH_NEON */
+
+/*
+ * VSX and Z Vector helpers.
+ *
+ * This is very messy, and any pull requests to clean this up are welcome.
+ *
+ * There are a lot of problems with supporting VSX and s390x, due to
+ * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
+ */
+#if XXH_VECTOR == XXH_VSX
+#  if defined(__s390x__)
+#    include <s390intrin.h>
+#  else
+/* gcc's altivec.h can have the unwanted consequence to unconditionally
+ * #define bool, vector, and pixel keywords,
+ * with bad consequences for programs already using these keywords for other purposes.
+ * The paragraph defining these macros is skipped when __APPLE_ALTIVEC__ is defined.
+ * __APPLE_ALTIVEC__ is _generally_ defined automatically by the compiler,
+ * but it seems that, in some cases, it isn't.
+ * Force the build macro to be defined, so that keywords are not altered.
+ */
+#    if defined(__GNUC__) && !defined(__APPLE_ALTIVEC__)
+#      define __APPLE_ALTIVEC__
+#    endif
+#    include <altivec.h>
+#  endif
+
+typedef __vector unsigned long long xxh_u64x2;
+typedef __vector unsigned char xxh_u8x16;
+typedef __vector unsigned xxh_u32x4;
+
+# ifndef XXH_VSX_BE
+#  if defined(__BIG_ENDIAN__) \
+  || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#    define XXH_VSX_BE 1
+#  elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
+#    warning "-maltivec=be is not recommended. Please use native endianness."
+#    define XXH_VSX_BE 1
+#  else
+#    define XXH_VSX_BE 0
+#  endif
+# endif /* !defined(XXH_VSX_BE) */
+
+# if XXH_VSX_BE
+#  if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__))
+#    define XXH_vec_revb vec_revb
+#  else
+/*!
+ * A polyfill for POWER9's vec_revb().
+ */
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val)
+{
+    xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
+                                  0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
+    return vec_perm(val, val, vByteSwap);
+}
+#  endif
+# endif /* XXH_VSX_BE */
+
+/*!
+ * Performs an unaligned vector load and byte swaps it on big endian.
+ */
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
+{
+    xxh_u64x2 ret;
+    XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2));
+# if XXH_VSX_BE
+    ret = XXH_vec_revb(ret);
+# endif
+    return ret;
+}
+
+/*
+ * vec_mulo and vec_mule are very problematic intrinsics on PowerPC
+ *
+ * These intrinsics weren't added until GCC 8, despite existing for a while,
+ * and they are endian dependent. Also, their meaning swap depending on version.
+ * */
+# if defined(__s390x__)
+ /* s390x is always big endian, no issue on this platform */
+#  define XXH_vec_mulo vec_mulo
+#  define XXH_vec_mule vec_mule
+# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw)
+/* Clang has a better way to control this, we can just use the builtin which doesn't swap. */
+#  define XXH_vec_mulo __builtin_altivec_vmulouw
+#  define XXH_vec_mule __builtin_altivec_vmuleuw
+# else
+/* gcc needs inline assembly */
+/* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b)
+{
+    xxh_u64x2 result;
+    __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
+    return result;
+}
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)
+{
+    xxh_u64x2 result;
+    __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
+    return result;
+}
+# endif /* XXH_vec_mulo, XXH_vec_mule */
+#endif /* XXH_VECTOR == XXH_VSX */
+
+
+/* prefetch
+ * can be disabled, by declaring XXH_NO_PREFETCH build macro */
+#if defined(XXH_NO_PREFETCH)
+#  define XXH_PREFETCH(ptr)  (void)(ptr)  /* disabled */
+#else
+#  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))  /* _mm_prefetch() not defined outside of x86/x64 */
+#    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
+#    define XXH_PREFETCH(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
+#  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
+#    define XXH_PREFETCH(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+#  else
+#    define XXH_PREFETCH(ptr) (void)(ptr)  /* disabled */
+#  endif
+#endif  /* XXH_NO_PREFETCH */
+
+
+/* ==========================================
+ * XXH3 default settings
+ * ========================================== */
+
+#define XXH_SECRET_DEFAULT_SIZE 192   /* minimum XXH3_SECRET_SIZE_MIN */
+
+#if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)
+#  error "default keyset is not large enough"
+#endif
+
+/*! Pseudorandom secret taken directly from FARSH. */
+XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
+    0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
+    0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
+    0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
+    0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
+    0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
+    0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
+    0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
+    0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
+    0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
+    0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
+    0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
+    0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
+};
+
+
+#ifdef XXH_OLD_NAMES
+#  define kSecret XXH3_kSecret
+#endif
+
+#ifdef XXH_DOXYGEN
+/*!
+ * @brief Calculates a 32-bit to 64-bit long multiply.
+ *
+ * Implemented as a macro.
+ *
+ * Wraps `__emulu` on MSVC x86 because it tends to call `__allmul` when it doesn't
+ * need to (but it shouldn't need to anyways, it is about 7 instructions to do
+ * a 64x64 multiply...). Since we know that this will _always_ emit `MULL`, we
+ * use that instead of the normal method.
+ *
+ * If you are compiling for platforms like Thumb-1 and don't have a better option,
+ * you may also want to write your own long multiply routine here.
+ *
+ * @param x, y Numbers to be multiplied
+ * @return 64-bit product of the low 32 bits of @p x and @p y.
+ */
+XXH_FORCE_INLINE xxh_u64
+XXH_mult32to64(xxh_u64 x, xxh_u64 y)
+{
+   return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
+}
+#elif defined(_MSC_VER) && defined(_M_IX86)
+#    include <intrin.h>
+#    define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
+#else
+/*
+ * Downcast + upcast is usually better than masking on older compilers like
+ * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers.
+ *
+ * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands
+ * and perform a full 64x64 multiply -- entirely redundant on 32-bit.
+ */
+#    define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y))
+#endif
+
+/*!
+ * @brief Calculates a 64->128-bit long multiply.
+ *
+ * Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar
+ * version.
+ *
+ * @param lhs , rhs The 64-bit integers to be multiplied
+ * @return The 128-bit result represented in an @ref XXH128_hash_t.
+ */
+static XXH128_hash_t
+XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
+{
+    /*
+     * GCC/Clang __uint128_t method.
+     *
+     * On most 64-bit targets, GCC and Clang define a __uint128_t type.
+     * This is usually the best way as it usually uses a native long 64-bit
+     * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.
+     *
+     * Usually.
+     *
+     * Despite being a 32-bit platform, Clang (and emscripten) define this type
+     * despite not having the arithmetic for it. This results in a laggy
+     * compiler builtin call which calculates a full 128-bit multiply.
+     * In that case it is best to use the portable one.
+     * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
+     */
+#if defined(__GNUC__) && !defined(__wasm__) \
+    && defined(__SIZEOF_INT128__) \
+    || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
+
+    __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs;
+    XXH128_hash_t r128;
+    r128.low64  = (xxh_u64)(product);
+    r128.high64 = (xxh_u64)(product >> 64);
+    return r128;
+
+    /*
+     * MSVC for x64's _umul128 method.
+     *
+     * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct);
+     *
+     * This compiles to single operand MUL on x64.
+     */
+#elif defined(_M_X64) || defined(_M_IA64)
+
+#ifndef _MSC_VER
+#   pragma intrinsic(_umul128)
+#endif
+    xxh_u64 product_high;
+    xxh_u64 const product_low = _umul128(lhs, rhs, &product_high);
+    XXH128_hash_t r128;
+    r128.low64  = product_low;
+    r128.high64 = product_high;
+    return r128;
+
+    /*
+     * MSVC for ARM64's __umulh method.
+     *
+     * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.
+     */
+#elif defined(_M_ARM64)
+
+#ifndef _MSC_VER
+#   pragma intrinsic(__umulh)
+#endif
+    XXH128_hash_t r128;
+    r128.low64  = lhs * rhs;
+    r128.high64 = __umulh(lhs, rhs);
+    return r128;
+
+#else
+    /*
+     * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
+     *
+     * This is a fast and simple grade school multiply, which is shown below
+     * with base 10 arithmetic instead of base 0x100000000.
+     *
+     *           9 3 // D2 lhs = 93
+     *         x 7 5 // D2 rhs = 75
+     *     ----------
+     *           1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15
+     *         4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45
+     *         2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21
+     *     + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63
+     *     ---------
+     *         2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27
+     *     + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67
+     *     ---------
+     *       6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975
+     *
+     * The reasons for adding the products like this are:
+     *  1. It avoids manual carry tracking. Just like how
+     *     (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX.
+     *     This avoids a lot of complexity.
+     *
+     *  2. It hints for, and on Clang, compiles to, the powerful UMAAL
+     *     instruction available in ARM's Digital Signal Processing extension
+     *     in 32-bit ARMv6 and later, which is shown below:
+     *
+     *         void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm)
+     *         {
+     *             xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm;
+     *             *RdLo = (xxh_u32)(product & 0xFFFFFFFF);
+     *             *RdHi = (xxh_u32)(product >> 32);
+     *         }
+     *
+     *     This instruction was designed for efficient long multiplication, and
+     *     allows this to be calculated in only 4 instructions at speeds
+     *     comparable to some 64-bit ALUs.
+     *
+     *  3. It isn't terrible on other platforms. Usually this will be a couple
+     *     of 32-bit ADD/ADCs.
+     */
+
+    /* First calculate all of the cross products. */
+    xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF);
+    xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32,        rhs & 0xFFFFFFFF);
+    xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32);
+    xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32,        rhs >> 32);
+
+    /* Now add the products together. These will never overflow. */
+    xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
+    xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32)        + hi_hi;
+    xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
+
+    XXH128_hash_t r128;
+    r128.low64  = lower;
+    r128.high64 = upper;
+    return r128;
+#endif
+}
+
+/*!
+ * @brief Calculates a 64-bit to 128-bit multiply, then XOR folds it.
+ *
+ * The reason for the separate function is to prevent passing too many structs
+ * around by value. This will hopefully inline the multiply, but we don't force it.
+ *
+ * @param lhs , rhs The 64-bit integers to multiply
+ * @return The low 64 bits of the product XOR'd by the high 64 bits.
+ * @see XXH_mult64to128()
+ */
+static xxh_u64
+XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
+{
+    XXH128_hash_t product = XXH_mult64to128(lhs, rhs);
+    return product.low64 ^ product.high64;
+}
+
+/*! Seems to produce slightly better code on GCC for some reason. */
+XXH_FORCE_INLINE xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
+{
+    XXH_ASSERT(0 <= shift && shift < 64);
+    return v64 ^ (v64 >> shift);
+}
+
+/*
+ * This is a fast avalanche stage,
+ * suitable when input bits are already partially mixed
+ */
+static XXH64_hash_t XXH3_avalanche(xxh_u64 h64)
+{
+    h64 = XXH_xorshift64(h64, 37);
+    h64 *= 0x165667919E3779F9ULL;
+    h64 = XXH_xorshift64(h64, 32);
+    return h64;
+}
+
+/*
+ * This is a stronger avalanche,
+ * inspired by Pelle Evensen's rrmxmx
+ * preferable when input has not been previously mixed
+ */
+static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
+{
+    /* this mix is inspired by Pelle Evensen's rrmxmx */
+    h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24);
+    h64 *= 0x9FB21C651E98DF25ULL;
+    h64 ^= (h64 >> 35) + len ;
+    h64 *= 0x9FB21C651E98DF25ULL;
+    return XXH_xorshift64(h64, 28);
+}
+
+
+/* ==========================================
+ * Short keys
+ * ==========================================
+ * One of the shortcomings of XXH32 and XXH64 was that their performance was
+ * sub-optimal on short lengths. It used an iterative algorithm which strongly
+ * favored lengths that were a multiple of 4 or 8.
+ *
+ * Instead of iterating over individual inputs, we use a set of single shot
+ * functions which piece together a range of lengths and operate in constant time.
+ *
+ * Additionally, the number of multiplies has been significantly reduced. This
+ * reduces latency, especially when emulating 64-bit multiplies on 32-bit.
+ *
+ * Depending on the platform, this may or may not be faster than XXH32, but it
+ * is almost guaranteed to be faster than XXH64.
+ */
+
+/*
+ * At very short lengths, there isn't enough input to fully hide secrets, or use
+ * the entire secret.
+ *
+ * There is also only a limited amount of mixing we can do before significantly
+ * impacting performance.
+ *
+ * Therefore, we use different sections of the secret and always mix two secret
+ * samples with an XOR. This should have no effect on performance on the
+ * seedless or withSeed variants because everything _should_ be constant folded
+ * by modern compilers.
+ *
+ * The XOR mixing hides individual parts of the secret and increases entropy.
+ *
+ * This adds an extra layer of strength for custom secrets.
+ */
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(1 <= len && len <= 3);
+    XXH_ASSERT(secret != NULL);
+    /*
+     * len = 1: combined = { input[0], 0x01, input[0], input[0] }
+     * len = 2: combined = { input[1], 0x02, input[0], input[1] }
+     * len = 3: combined = { input[2], 0x03, input[0], input[1] }
+     */
+    {   xxh_u8  const c1 = input[0];
+        xxh_u8  const c2 = input[len >> 1];
+        xxh_u8  const c3 = input[len - 1];
+        xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2  << 24)
+                               | ((xxh_u32)c3 <<  0) | ((xxh_u32)len << 8);
+        xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
+        xxh_u64 const keyed = (xxh_u64)combined ^ bitflip;
+        return XXH64_avalanche(keyed);
+    }
+}
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(secret != NULL);
+    XXH_ASSERT(4 <= len && len <= 8);
+    seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
+    {   xxh_u32 const input1 = XXH_readLE32(input);
+        xxh_u32 const input2 = XXH_readLE32(input + len - 4);
+        xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed;
+        xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32);
+        xxh_u64 const keyed = input64 ^ bitflip;
+        return XXH3_rrmxmx(keyed, len);
+    }
+}
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(secret != NULL);
+    XXH_ASSERT(9 <= len && len <= 16);
+    {   xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed;
+        xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed;
+        xxh_u64 const input_lo = XXH_readLE64(input)           ^ bitflip1;
+        xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2;
+        xxh_u64 const acc = len
+                          + XXH_swap64(input_lo) + input_hi
+                          + XXH3_mul128_fold64(input_lo, input_hi);
+        return XXH3_avalanche(acc);
+    }
+}
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    XXH_ASSERT(len <= 16);
+    {   if (XXH_likely(len >  8)) return XXH3_len_9to16_64b(input, len, secret, seed);
+        if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed);
+        if (len) return XXH3_len_1to3_64b(input, len, secret, seed);
+        return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64)));
+    }
+}
+
+/*
+ * DISCLAIMER: There are known *seed-dependent* multicollisions here due to
+ * multiplication by zero, affecting hashes of lengths 17 to 240.
+ *
+ * However, they are very unlikely.
+ *
+ * Keep this in mind when using the unseeded XXH3_64bits() variant: As with all
+ * unseeded non-cryptographic hashes, it does not attempt to defend itself
+ * against specially crafted inputs, only random inputs.
+ *
+ * Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes
+ * cancelling out the secret is taken an arbitrary number of times (addressed
+ * in XXH3_accumulate_512), this collision is very unlikely with random inputs
+ * and/or proper seeding:
+ *
+ * This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a
+ * function that is only called up to 16 times per hash with up to 240 bytes of
+ * input.
+ *
+ * This is not too bad for a non-cryptographic hash function, especially with
+ * only 64 bit outputs.
+ *
+ * The 128-bit variant (which trades some speed for strength) is NOT affected
+ * by this, although it is always a good idea to use a proper seed if you care
+ * about strength.
+ */
+XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,
+                                     const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64)
+{
+#if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
+  && defined(__i386__) && defined(__SSE2__)  /* x86 + SSE2 */ \
+  && !defined(XXH_ENABLE_AUTOVECTORIZE)      /* Define to disable like XXH32 hack */
+    /*
+     * UGLY HACK:
+     * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in
+     * slower code.
+     *
+     * By forcing seed64 into a register, we disrupt the cost model and
+     * cause it to scalarize. See `XXH32_round()`
+     *
+     * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600,
+     * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on
+     * GCC 9.2, despite both emitting scalar code.
+     *
+     * GCC generates much better scalar code than Clang for the rest of XXH3,
+     * which is why finding a more optimal codepath is an interest.
+     */
+    XXH_COMPILER_GUARD(seed64);
+#endif
+    {   xxh_u64 const input_lo = XXH_readLE64(input);
+        xxh_u64 const input_hi = XXH_readLE64(input+8);
+        return XXH3_mul128_fold64(
+            input_lo ^ (XXH_readLE64(secret)   + seed64),
+            input_hi ^ (XXH_readLE64(secret+8) - seed64)
+        );
+    }
+}
+
+/* For mid range keys, XXH3 uses a Mum-hash variant. */
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
+                     const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+                     XXH64_hash_t seed)
+{
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
+    XXH_ASSERT(16 < len && len <= 128);
+
+    {   xxh_u64 acc = len * XXH_PRIME64_1;
+        if (len > 32) {
+            if (len > 64) {
+                if (len > 96) {
+                    acc += XXH3_mix16B(input+48, secret+96, seed);
+                    acc += XXH3_mix16B(input+len-64, secret+112, seed);
+                }
+                acc += XXH3_mix16B(input+32, secret+64, seed);
+                acc += XXH3_mix16B(input+len-48, secret+80, seed);
+            }
+            acc += XXH3_mix16B(input+16, secret+32, seed);
+            acc += XXH3_mix16B(input+len-32, secret+48, seed);
+        }
+        acc += XXH3_mix16B(input+0, secret+0, seed);
+        acc += XXH3_mix16B(input+len-16, secret+16, seed);
+
+        return XXH3_avalanche(acc);
+    }
+}
+
+#define XXH3_MIDSIZE_MAX 240
+
+XXH_NO_INLINE XXH64_hash_t
+XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
+                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+                      XXH64_hash_t seed)
+{
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
+    XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
+
+    #define XXH3_MIDSIZE_STARTOFFSET 3
+    #define XXH3_MIDSIZE_LASTOFFSET  17
+
+    {   xxh_u64 acc = len * XXH_PRIME64_1;
+        int const nbRounds = (int)len / 16;
+        int i;
+        for (i=0; i<8; i++) {
+            acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);
+        }
+        acc = XXH3_avalanche(acc);
+        XXH_ASSERT(nbRounds >= 8);
+#if defined(__clang__)                                /* Clang */ \
+    && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
+    && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */
+        /*
+         * UGLY HACK:
+         * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86.
+         * In everywhere else, it uses scalar code.
+         *
+         * For 64->128-bit multiplies, even if the NEON was 100% optimal, it
+         * would still be slower than UMAAL (see XXH_mult64to128).
+         *
+         * Unfortunately, Clang doesn't handle the long multiplies properly and
+         * converts them to the nonexistent "vmulq_u64" intrinsic, which is then
+         * scalarized into an ugly mess of VMOV.32 instructions.
+         *
+         * This mess is difficult to avoid without turning autovectorization
+         * off completely, but they are usually relatively minor and/or not
+         * worth it to fix.
+         *
+         * This loop is the easiest to fix, as unlike XXH32, this pragma
+         * _actually works_ because it is a loop vectorization instead of an
+         * SLP vectorization.
+         */
+        #pragma clang loop vectorize(disable)
+#endif
+        for (i=8 ; i < nbRounds; i++) {
+            acc += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
+        }
+        /* last bytes */
+        acc += XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
+        return XXH3_avalanche(acc);
+    }
+}
+
+
+/* =======     Long Keys     ======= */
+
+#define XXH_STRIPE_LEN 64
+#define XXH_SECRET_CONSUME_RATE 8   /* nb of secret bytes consumed at each accumulation */
+#define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))
+
+#ifdef XXH_OLD_NAMES
+#  define STRIPE_LEN XXH_STRIPE_LEN
+#  define ACC_NB XXH_ACC_NB
+#endif
+
+XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
+{
+    if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
+    XXH_memcpy(dst, &v64, sizeof(v64));
+}
+
+/* Several intrinsic functions below are supposed to accept __int64 as argument,
+ * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ .
+ * However, several environments do not define __int64 type,
+ * requiring a workaround.
+ */
+#if !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+    typedef int64_t xxh_i64;
+#else
+    /* the following type must have a width of 64-bit */
+    typedef long long xxh_i64;
+#endif
+
+/*
+ * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
+ *
+ * It is a hardened version of UMAC, based off of FARSH's implementation.
+ *
+ * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD
+ * implementations, and it is ridiculously fast.
+ *
+ * We harden it by mixing the original input to the accumulators as well as the product.
+ *
+ * This means that in the (relatively likely) case of a multiply by zero, the
+ * original input is preserved.
+ *
+ * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve
+ * cross-pollination, as otherwise the upper and lower halves would be
+ * essentially independent.
+ *
+ * This doesn't matter on 64-bit hashes since they all get merged together in
+ * the end, so we skip the extra step.
+ *
+ * Both XXH3_64bits and XXH3_128bits use this subroutine.
+ */
+
+#if (XXH_VECTOR == XXH_AVX512) \
+     || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0)
+
+#ifndef XXH_TARGET_AVX512
+# define XXH_TARGET_AVX512  /* disable attribute target */
+#endif
+
+XXH_FORCE_INLINE XXH_TARGET_AVX512 void
+XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
+                     const void* XXH_RESTRICT input,
+                     const void* XXH_RESTRICT secret)
+{
+    __m512i* const xacc = (__m512i *) acc;
+    XXH_ASSERT((((size_t)acc) & 63) == 0);
+    XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
+
+    {
+        /* data_vec    = input[0]; */
+        __m512i const data_vec    = _mm512_loadu_si512   (input);
+        /* key_vec     = secret[0]; */
+        __m512i const key_vec     = _mm512_loadu_si512   (secret);
+        /* data_key    = data_vec ^ key_vec; */
+        __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);
+        /* data_key_lo = data_key >> 32; */
+        __m512i const data_key_lo = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
+        /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
+        __m512i const product     = _mm512_mul_epu32     (data_key, data_key_lo);
+        /* xacc[0] += swap(data_vec); */
+        __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2));
+        __m512i const sum       = _mm512_add_epi64(*xacc, data_swap);
+        /* xacc[0] += product; */
+        *xacc = _mm512_add_epi64(product, sum);
+    }
+}
+
+/*
+ * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.
+ *
+ * Multiplication isn't perfect, as explained by Google in HighwayHash:
+ *
+ *  // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to
+ *  // varying degrees. In descending order of goodness, bytes
+ *  // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32.
+ *  // As expected, the upper and lower bytes are much worse.
+ *
+ * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291
+ *
+ * Since our algorithm uses a pseudorandom secret to add some variance into the
+ * mix, we don't need to (or want to) mix as often or as much as HighwayHash does.
+ *
+ * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid
+ * extraction.
+ *
+ * Both XXH3_64bits and XXH3_128bits use this subroutine.
+ */
+
+XXH_FORCE_INLINE XXH_TARGET_AVX512 void
+XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+    XXH_ASSERT((((size_t)acc) & 63) == 0);
+    XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
+    {   __m512i* const xacc = (__m512i*) acc;
+        const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
+
+        /* xacc[0] ^= (xacc[0] >> 47) */
+        __m512i const acc_vec     = *xacc;
+        __m512i const shifted     = _mm512_srli_epi64    (acc_vec, 47);
+        __m512i const data_vec    = _mm512_xor_si512     (acc_vec, shifted);
+        /* xacc[0] ^= secret; */
+        __m512i const key_vec     = _mm512_loadu_si512   (secret);
+        __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);
+
+        /* xacc[0] *= XXH_PRIME32_1; */
+        __m512i const data_key_hi = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
+        __m512i const prod_lo     = _mm512_mul_epu32     (data_key, prime32);
+        __m512i const prod_hi     = _mm512_mul_epu32     (data_key_hi, prime32);
+        *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
+    }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_AVX512 void
+XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
+{
+    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0);
+    XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64);
+    XXH_ASSERT(((size_t)customSecret & 63) == 0);
+    (void)(&XXH_writeLE64);
+    {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
+        __m512i const seed = _mm512_mask_set1_epi64(_mm512_set1_epi64((xxh_i64)seed64), 0xAA, (xxh_i64)(0U - seed64));
+
+        const __m512i* const src  = (const __m512i*) ((const void*) XXH3_kSecret);
+              __m512i* const dest = (      __m512i*) customSecret;
+        int i;
+        XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */
+        XXH_ASSERT(((size_t)dest & 63) == 0);
+        for (i=0; i < nbRounds; ++i) {
+            /* GCC has a bug, _mm512_stream_load_si512 accepts 'void*', not 'void const*',
+             * this will warn "discards 'const' qualifier". */
+            union {
+                const __m512i* cp;
+                void* p;
+            } remote_const_void;
+            remote_const_void.cp = src + i;
+            dest[i] = _mm512_add_epi64(_mm512_stream_load_si512(remote_const_void.p), seed);
+    }   }
+}
+
+#endif
+
+#if (XXH_VECTOR == XXH_AVX2) \
+    || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0)
+
+#ifndef XXH_TARGET_AVX2
+# define XXH_TARGET_AVX2  /* disable attribute target */
+#endif
+
+XXH_FORCE_INLINE XXH_TARGET_AVX2 void
+XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
+                    const void* XXH_RESTRICT input,
+                    const void* XXH_RESTRICT secret)
+{
+    XXH_ASSERT((((size_t)acc) & 31) == 0);
+    {   __m256i* const xacc    =       (__m256i *) acc;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm256_loadu_si256 requires  a const __m256i * pointer for some reason. */
+        const         __m256i* const xinput  = (const __m256i *) input;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
+        const         __m256i* const xsecret = (const __m256i *) secret;
+
+        size_t i;
+        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
+            /* data_vec    = xinput[i]; */
+            __m256i const data_vec    = _mm256_loadu_si256    (xinput+i);
+            /* key_vec     = xsecret[i]; */
+            __m256i const key_vec     = _mm256_loadu_si256   (xsecret+i);
+            /* data_key    = data_vec ^ key_vec; */
+            __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
+            /* data_key_lo = data_key >> 32; */
+            __m256i const data_key_lo = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+            /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
+            __m256i const product     = _mm256_mul_epu32     (data_key, data_key_lo);
+            /* xacc[i] += swap(data_vec); */
+            __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
+            __m256i const sum       = _mm256_add_epi64(xacc[i], data_swap);
+            /* xacc[i] += product; */
+            xacc[i] = _mm256_add_epi64(product, sum);
+    }   }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_AVX2 void
+XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+    XXH_ASSERT((((size_t)acc) & 31) == 0);
+    {   __m256i* const xacc = (__m256i*) acc;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
+        const         __m256i* const xsecret = (const __m256i *) secret;
+        const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1);
+
+        size_t i;
+        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
+            /* xacc[i] ^= (xacc[i] >> 47) */
+            __m256i const acc_vec     = xacc[i];
+            __m256i const shifted     = _mm256_srli_epi64    (acc_vec, 47);
+            __m256i const data_vec    = _mm256_xor_si256     (acc_vec, shifted);
+            /* xacc[i] ^= xsecret; */
+            __m256i const key_vec     = _mm256_loadu_si256   (xsecret+i);
+            __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
+
+            /* xacc[i] *= XXH_PRIME32_1; */
+            __m256i const data_key_hi = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+            __m256i const prod_lo     = _mm256_mul_epu32     (data_key, prime32);
+            __m256i const prod_hi     = _mm256_mul_epu32     (data_key_hi, prime32);
+            xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
+        }
+    }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
+{
+    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0);
+    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6);
+    XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64);
+    (void)(&XXH_writeLE64);
+    XXH_PREFETCH(customSecret);
+    {   __m256i const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64);
+
+        const __m256i* const src  = (const __m256i*) ((const void*) XXH3_kSecret);
+              __m256i*       dest = (      __m256i*) customSecret;
+
+#       if defined(__GNUC__) || defined(__clang__)
+        /*
+         * On GCC & Clang, marking 'dest' as modified will cause the compiler:
+         *   - do not extract the secret from sse registers in the internal loop
+         *   - use less common registers, and avoid pushing these reg into stack
+         */
+        XXH_COMPILER_GUARD(dest);
+#       endif
+        XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */
+        XXH_ASSERT(((size_t)dest & 31) == 0);
+
+        /* GCC -O2 need unroll loop manually */
+        dest[0] = _mm256_add_epi64(_mm256_stream_load_si256(src+0), seed);
+        dest[1] = _mm256_add_epi64(_mm256_stream_load_si256(src+1), seed);
+        dest[2] = _mm256_add_epi64(_mm256_stream_load_si256(src+2), seed);
+        dest[3] = _mm256_add_epi64(_mm256_stream_load_si256(src+3), seed);
+        dest[4] = _mm256_add_epi64(_mm256_stream_load_si256(src+4), seed);
+        dest[5] = _mm256_add_epi64(_mm256_stream_load_si256(src+5), seed);
+    }
+}
+
+#endif
+
+/* x86dispatch always generates SSE2 */
+#if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)
+
+#ifndef XXH_TARGET_SSE2
+# define XXH_TARGET_SSE2  /* disable attribute target */
+#endif
+
+XXH_FORCE_INLINE XXH_TARGET_SSE2 void
+XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
+                    const void* XXH_RESTRICT input,
+                    const void* XXH_RESTRICT secret)
+{
+    /* SSE2 is just a half-scale version of the AVX2 version. */
+    XXH_ASSERT((((size_t)acc) & 15) == 0);
+    {   __m128i* const xacc    =       (__m128i *) acc;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
+        const         __m128i* const xinput  = (const __m128i *) input;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
+        const         __m128i* const xsecret = (const __m128i *) secret;
+
+        size_t i;
+        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
+            /* data_vec    = xinput[i]; */
+            __m128i const data_vec    = _mm_loadu_si128   (xinput+i);
+            /* key_vec     = xsecret[i]; */
+            __m128i const key_vec     = _mm_loadu_si128   (xsecret+i);
+            /* data_key    = data_vec ^ key_vec; */
+            __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);
+            /* data_key_lo = data_key >> 32; */
+            __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+            /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
+            __m128i const product     = _mm_mul_epu32     (data_key, data_key_lo);
+            /* xacc[i] += swap(data_vec); */
+            __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
+            __m128i const sum       = _mm_add_epi64(xacc[i], data_swap);
+            /* xacc[i] += product; */
+            xacc[i] = _mm_add_epi64(product, sum);
+    }   }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_SSE2 void
+XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+    XXH_ASSERT((((size_t)acc) & 15) == 0);
+    {   __m128i* const xacc = (__m128i*) acc;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
+        const         __m128i* const xsecret = (const __m128i *) secret;
+        const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1);
+
+        size_t i;
+        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
+            /* xacc[i] ^= (xacc[i] >> 47) */
+            __m128i const acc_vec     = xacc[i];
+            __m128i const shifted     = _mm_srli_epi64    (acc_vec, 47);
+            __m128i const data_vec    = _mm_xor_si128     (acc_vec, shifted);
+            /* xacc[i] ^= xsecret[i]; */
+            __m128i const key_vec     = _mm_loadu_si128   (xsecret+i);
+            __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);
+
+            /* xacc[i] *= XXH_PRIME32_1; */
+            __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+            __m128i const prod_lo     = _mm_mul_epu32     (data_key, prime32);
+            __m128i const prod_hi     = _mm_mul_epu32     (data_key_hi, prime32);
+            xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));
+        }
+    }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
+{
+    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
+    (void)(&XXH_writeLE64);
+    {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i);
+
+#       if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
+        /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */
+        XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) };
+        __m128i const seed = _mm_load_si128((__m128i const*)seed64x2);
+#       else
+        __m128i const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64);
+#       endif
+        int i;
+
+        const void* const src16 = XXH3_kSecret;
+        __m128i* dst16 = (__m128i*) customSecret;
+#       if defined(__GNUC__) || defined(__clang__)
+        /*
+         * On GCC & Clang, marking 'dest' as modified will cause the compiler:
+         *   - do not extract the secret from sse registers in the internal loop
+         *   - use less common registers, and avoid pushing these reg into stack
+         */
+        XXH_COMPILER_GUARD(dst16);
+#       endif
+        XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */
+        XXH_ASSERT(((size_t)dst16 & 15) == 0);
+
+        for (i=0; i < nbRounds; ++i) {
+            dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16+i), seed);
+    }   }
+}
+
+#endif
+
+#if (XXH_VECTOR == XXH_NEON)
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
+                    const void* XXH_RESTRICT input,
+                    const void* XXH_RESTRICT secret)
+{
+    XXH_ASSERT((((size_t)acc) & 15) == 0);
+    {
+        uint64x2_t* const xacc = (uint64x2_t *) acc;
+        /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
+        uint8_t const* const xinput = (const uint8_t *) input;
+        uint8_t const* const xsecret  = (const uint8_t *) secret;
+
+        size_t i;
+        for (i=0; i < XXH_STRIPE_LEN / sizeof(uint64x2_t); i++) {
+            /* data_vec = xinput[i]; */
+            uint8x16_t data_vec    = vld1q_u8(xinput  + (i * 16));
+            /* key_vec  = xsecret[i];  */
+            uint8x16_t key_vec     = vld1q_u8(xsecret + (i * 16));
+            uint64x2_t data_key;
+            uint32x2_t data_key_lo, data_key_hi;
+            /* xacc[i] += swap(data_vec); */
+            uint64x2_t const data64  = vreinterpretq_u64_u8(data_vec);
+            uint64x2_t const swapped = vextq_u64(data64, data64, 1);
+            xacc[i] = vaddq_u64 (xacc[i], swapped);
+            /* data_key = data_vec ^ key_vec; */
+            data_key = vreinterpretq_u64_u8(veorq_u8(data_vec, key_vec));
+            /* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF);
+             * data_key_hi = (uint32x2_t) (data_key >> 32);
+             * data_key = UNDEFINED; */
+            XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
+            /* xacc[i] += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
+            xacc[i] = vmlal_u32 (xacc[i], data_key_lo, data_key_hi);
+
+        }
+    }
+}
+
+XXH_FORCE_INLINE void
+XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+    XXH_ASSERT((((size_t)acc) & 15) == 0);
+
+    {   uint64x2_t* xacc       = (uint64x2_t*) acc;
+        uint8_t const* xsecret = (uint8_t const*) secret;
+        uint32x2_t prime       = vdup_n_u32 (XXH_PRIME32_1);
+
+        size_t i;
+        for (i=0; i < XXH_STRIPE_LEN/sizeof(uint64x2_t); i++) {
+            /* xacc[i] ^= (xacc[i] >> 47); */
+            uint64x2_t acc_vec  = xacc[i];
+            uint64x2_t shifted  = vshrq_n_u64 (acc_vec, 47);
+            uint64x2_t data_vec = veorq_u64   (acc_vec, shifted);
+
+            /* xacc[i] ^= xsecret[i]; */
+            uint8x16_t key_vec  = vld1q_u8    (xsecret + (i * 16));
+            uint64x2_t data_key = veorq_u64   (data_vec, vreinterpretq_u64_u8(key_vec));
+
+            /* xacc[i] *= XXH_PRIME32_1 */
+            uint32x2_t data_key_lo, data_key_hi;
+            /* data_key_lo = (uint32x2_t) (xacc[i] & 0xFFFFFFFF);
+             * data_key_hi = (uint32x2_t) (xacc[i] >> 32);
+             * xacc[i] = UNDEFINED; */
+            XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
+            {   /*
+                 * prod_hi = (data_key >> 32) * XXH_PRIME32_1;
+                 *
+                 * Avoid vmul_u32 + vshll_n_u32 since Clang 6 and 7 will
+                 * incorrectly "optimize" this:
+                 *   tmp     = vmul_u32(vmovn_u64(a), vmovn_u64(b));
+                 *   shifted = vshll_n_u32(tmp, 32);
+                 * to this:
+                 *   tmp     = "vmulq_u64"(a, b); // no such thing!
+                 *   shifted = vshlq_n_u64(tmp, 32);
+                 *
+                 * However, unlike SSE, Clang lacks a 64-bit multiply routine
+                 * for NEON, and it scalarizes two 64-bit multiplies instead.
+                 *
+                 * vmull_u32 has the same timing as vmul_u32, and it avoids
+                 * this bug completely.
+                 * See https://bugs.llvm.org/show_bug.cgi?id=39967
+                 */
+                uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
+                /* xacc[i] = prod_hi << 32; */
+                xacc[i] = vshlq_n_u64(prod_hi, 32);
+                /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
+                xacc[i] = vmlal_u32(xacc[i], data_key_lo, prime);
+            }
+    }   }
+}
+
+#endif
+
+#if (XXH_VECTOR == XXH_VSX)
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_vsx(  void* XXH_RESTRICT acc,
+                    const void* XXH_RESTRICT input,
+                    const void* XXH_RESTRICT secret)
+{
+    /* presumed aligned */
+    unsigned long long* const xacc = (unsigned long long*) acc;
+    xxh_u64x2 const* const xinput   = (xxh_u64x2 const*) input;   /* no alignment restriction */
+    xxh_u64x2 const* const xsecret  = (xxh_u64x2 const*) secret;    /* no alignment restriction */
+    xxh_u64x2 const v32 = { 32, 32 };
+    size_t i;
+    for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
+        /* data_vec = xinput[i]; */
+        xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + i);
+        /* key_vec = xsecret[i]; */
+        xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);
+        xxh_u64x2 const data_key = data_vec ^ key_vec;
+        /* shuffled = (data_key << 32) | (data_key >> 32); */
+        xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
+        /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
+        xxh_u64x2 const product  = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
+        /* acc_vec = xacc[i]; */
+        xxh_u64x2 acc_vec        = vec_xl(0, xacc + 2 * i);
+        acc_vec += product;
+
+        /* swap high and low halves */
+#ifdef __s390x__
+        acc_vec += vec_permi(data_vec, data_vec, 2);
+#else
+        acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
+#endif
+        /* xacc[i] = acc_vec; */
+        vec_xst(acc_vec, 0, xacc + 2 * i);
+    }
+}
+
+XXH_FORCE_INLINE void
+XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+    XXH_ASSERT((((size_t)acc) & 15) == 0);
+
+    {         xxh_u64x2* const xacc    =       (xxh_u64x2*) acc;
+        const xxh_u64x2* const xsecret = (const xxh_u64x2*) secret;
+        /* constants */
+        xxh_u64x2 const v32  = { 32, 32 };
+        xxh_u64x2 const v47 = { 47, 47 };
+        xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 };
+        size_t i;
+        for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
+            /* xacc[i] ^= (xacc[i] >> 47); */
+            xxh_u64x2 const acc_vec  = xacc[i];
+            xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
+
+            /* xacc[i] ^= xsecret[i]; */
+            xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);
+            xxh_u64x2 const data_key = data_vec ^ key_vec;
+
+            /* xacc[i] *= XXH_PRIME32_1 */
+            /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF);  */
+            xxh_u64x2 const prod_even  = XXH_vec_mule((xxh_u32x4)data_key, prime);
+            /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32);  */
+            xxh_u64x2 const prod_odd  = XXH_vec_mulo((xxh_u32x4)data_key, prime);
+            xacc[i] = prod_odd + (prod_even << v32);
+    }   }
+}
+
+#endif
+
+/* scalar variants - universal */
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
+                     const void* XXH_RESTRICT input,
+                     const void* XXH_RESTRICT secret)
+{
+    xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
+    const xxh_u8* const xinput  = (const xxh_u8*) input;  /* no alignment restriction */
+    const xxh_u8* const xsecret = (const xxh_u8*) secret;   /* no alignment restriction */
+    size_t i;
+    XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
+    for (i=0; i < XXH_ACC_NB; i++) {
+        xxh_u64 const data_val = XXH_readLE64(xinput + 8*i);
+        xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + i*8);
+        xacc[i ^ 1] += data_val; /* swap adjacent lanes */
+        xacc[i] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
+    }
+}
+
+XXH_FORCE_INLINE void
+XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+    xxh_u64* const xacc = (xxh_u64*) acc;   /* presumed aligned */
+    const xxh_u8* const xsecret = (const xxh_u8*) secret;   /* no alignment restriction */
+    size_t i;
+    XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
+    for (i=0; i < XXH_ACC_NB; i++) {
+        xxh_u64 const key64 = XXH_readLE64(xsecret + 8*i);
+        xxh_u64 acc64 = xacc[i];
+        acc64 = XXH_xorshift64(acc64, 47);
+        acc64 ^= key64;
+        acc64 *= XXH_PRIME32_1;
+        xacc[i] = acc64;
+    }
+}
+
+XXH_FORCE_INLINE void
+XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
+{
+    /*
+     * We need a separate pointer for the hack below,
+     * which requires a non-const pointer.
+     * Any decent compiler will optimize this out otherwise.
+     */
+    const xxh_u8* kSecretPtr = XXH3_kSecret;
+    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
+
+#if defined(__clang__) && defined(__aarch64__)
+    /*
+     * UGLY HACK:
+     * Clang generates a bunch of MOV/MOVK pairs for aarch64, and they are
+     * placed sequentially, in order, at the top of the unrolled loop.
+     *
+     * While MOVK is great for generating constants (2 cycles for a 64-bit
+     * constant compared to 4 cycles for LDR), long MOVK chains stall the
+     * integer pipelines:
+     *   I   L   S
+     * MOVK
+     * MOVK
+     * MOVK
+     * MOVK
+     * ADD
+     * SUB      STR
+     *          STR
+     * By forcing loads from memory (as the asm line causes Clang to assume
+     * that XXH3_kSecretPtr has been changed), the pipelines are used more
+     * efficiently:
+     *   I   L   S
+     *      LDR
+     *  ADD LDR
+     *  SUB     STR
+     *          STR
+     * XXH3_64bits_withSeed, len == 256, Snapdragon 835
+     *   without hack: 2654.4 MB/s
+     *   with hack:    3202.9 MB/s
+     */
+    XXH_COMPILER_GUARD(kSecretPtr);
+#endif
+    /*
+     * Note: in debug mode, this overrides the asm optimization
+     * and Clang will emit MOVK chains again.
+     */
+    XXH_ASSERT(kSecretPtr == XXH3_kSecret);
+
+    {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
+        int i;
+        for (i=0; i < nbRounds; i++) {
+            /*
+             * The asm hack causes Clang to assume that kSecretPtr aliases with
+             * customSecret, and on aarch64, this prevented LDP from merging two
+             * loads together for free. Putting the loads together before the stores
+             * properly generates LDP.
+             */
+            xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i)     + seed64;
+            xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64;
+            XXH_writeLE64((xxh_u8*)customSecret + 16*i,     lo);
+            XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi);
+    }   }
+}
+
+
+typedef void (*XXH3_f_accumulate_512)(void* XXH_RESTRICT, const void*, const void*);
+typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);
+typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
+
+
+#if (XXH_VECTOR == XXH_AVX512)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_avx512
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_avx512
+#define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
+
+#elif (XXH_VECTOR == XXH_AVX2)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_avx2
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_avx2
+#define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
+
+#elif (XXH_VECTOR == XXH_SSE2)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_sse2
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_sse2
+#define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
+
+#elif (XXH_VECTOR == XXH_NEON)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_neon
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_neon
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
+#elif (XXH_VECTOR == XXH_VSX)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_vsx
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_vsx
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
+#else /* scalar */
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_scalar
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_scalar
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
+#endif
+
+
+
+#ifndef XXH_PREFETCH_DIST
+#  ifdef __clang__
+#    define XXH_PREFETCH_DIST 320
+#  else
+#    if (XXH_VECTOR == XXH_AVX512)
+#      define XXH_PREFETCH_DIST 512
+#    else
+#      define XXH_PREFETCH_DIST 384
+#    endif
+#  endif  /* __clang__ */
+#endif  /* XXH_PREFETCH_DIST */
+
+/*
+ * XXH3_accumulate()
+ * Loops over XXH3_accumulate_512().
+ * Assumption: nbStripes will not overflow the secret size
+ */
+XXH_FORCE_INLINE void
+XXH3_accumulate(     xxh_u64* XXH_RESTRICT acc,
+                const xxh_u8* XXH_RESTRICT input,
+                const xxh_u8* XXH_RESTRICT secret,
+                      size_t nbStripes,
+                      XXH3_f_accumulate_512 f_acc512)
+{
+    size_t n;
+    for (n = 0; n < nbStripes; n++ ) {
+        const xxh_u8* const in = input + n*XXH_STRIPE_LEN;
+        XXH_PREFETCH(in + XXH_PREFETCH_DIST);
+        f_acc512(acc,
+                 in,
+                 secret + n*XXH_SECRET_CONSUME_RATE);
+    }
+}
+
+XXH_FORCE_INLINE void
+XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
+                      const xxh_u8* XXH_RESTRICT input, size_t len,
+                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+                            XXH3_f_accumulate_512 f_acc512,
+                            XXH3_f_scrambleAcc f_scramble)
+{
+    size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
+    size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
+    size_t const nb_blocks = (len - 1) / block_len;
+
+    size_t n;
+
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+
+    for (n = 0; n < nb_blocks; n++) {
+        XXH3_accumulate(acc, input + n*block_len, secret, nbStripesPerBlock, f_acc512);
+        f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
+    }
+
+    /* last partial block */
+    XXH_ASSERT(len > XXH_STRIPE_LEN);
+    {   size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
+        XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
+        XXH3_accumulate(acc, input + nb_blocks*block_len, secret, nbStripes, f_acc512);
+
+        /* last stripe */
+        {   const xxh_u8* const p = input + len - XXH_STRIPE_LEN;
+#define XXH_SECRET_LASTACC_START 7  /* not aligned on 8, last secret is different from acc & scrambler */
+            f_acc512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
+    }   }
+}
+
+XXH_FORCE_INLINE xxh_u64
+XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret)
+{
+    return XXH3_mul128_fold64(
+               acc[0] ^ XXH_readLE64(secret),
+               acc[1] ^ XXH_readLE64(secret+8) );
+}
+
+static XXH64_hash_t
+XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start)
+{
+    xxh_u64 result64 = start;
+    size_t i = 0;
+
+    for (i = 0; i < 4; i++) {
+        result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i);
+#if defined(__clang__)                                /* Clang */ \
+    && (defined(__arm__) || defined(__thumb__))       /* ARMv7 */ \
+    && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */  \
+    && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */
+        /*
+         * UGLY HACK:
+         * Prevent autovectorization on Clang ARMv7-a. Exact same problem as
+         * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b.
+         * XXH3_64bits, len == 256, Snapdragon 835:
+         *   without hack: 2063.7 MB/s
+         *   with hack:    2560.7 MB/s
+         */
+        XXH_COMPILER_GUARD(result64);
+#endif
+    }
+
+    return XXH3_avalanche(result64);
+}
+
+#define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \
+                        XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 }
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
+                           const void* XXH_RESTRICT secret, size_t secretSize,
+                           XXH3_f_accumulate_512 f_acc512,
+                           XXH3_f_scrambleAcc f_scramble)
+{
+    XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
+
+    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc512, f_scramble);
+
+    /* converge into final hash */
+    XXH_STATIC_ASSERT(sizeof(acc) == 64);
+    /* do not align on 8, so that the secret is different from the accumulator */
+#define XXH_SECRET_MERGEACCS_START 11
+    XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+    return XXH3_mergeAccs(acc, (const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1);
+}
+
+/*
+ * It's important for performance to transmit secret's size (when it's static)
+ * so that the compiler can properly optimize the vectorized loop.
+ * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
+ */
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
+                             XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
+{
+    (void)seed64;
+    return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+/*
+ * It's preferable for performance that XXH3_hashLong is not inlined,
+ * as it results in a smaller function for small data, easier to the instruction cache.
+ * Note that inside this no_inline function, we do inline the internal loop,
+ * and provide a statically defined secret size to allow optimization of vector loop.
+ */
+XXH_NO_INLINE XXH64_hash_t
+XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
+                          XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
+{
+    (void)seed64; (void)secret; (void)secretLen;
+    return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+/*
+ * XXH3_hashLong_64b_withSeed():
+ * Generate a custom key based on alteration of default XXH3_kSecret with the seed,
+ * and then use this key for long mode hashing.
+ *
+ * This operation is decently fast but nonetheless costs a little bit of time.
+ * Try to avoid it whenever possible (typically when seed==0).
+ *
+ * It's important for performance that XXH3_hashLong is not inlined. Not sure
+ * why (uop cache maybe?), but the difference is large and easily measurable.
+ */
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
+                                    XXH64_hash_t seed,
+                                    XXH3_f_accumulate_512 f_acc512,
+                                    XXH3_f_scrambleAcc f_scramble,
+                                    XXH3_f_initCustomSecret f_initSec)
+{
+    if (seed == 0)
+        return XXH3_hashLong_64b_internal(input, len,
+                                          XXH3_kSecret, sizeof(XXH3_kSecret),
+                                          f_acc512, f_scramble);
+    {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
+        f_initSec(secret, seed);
+        return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
+                                          f_acc512, f_scramble);
+    }
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ */
+XXH_NO_INLINE XXH64_hash_t
+XXH3_hashLong_64b_withSeed(const void* input, size_t len,
+                           XXH64_hash_t seed, const xxh_u8* secret, size_t secretLen)
+{
+    (void)secret; (void)secretLen;
+    return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
+                XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);
+}
+
+
+typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t,
+                                          XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t);
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,
+                     XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
+                     XXH3_hashLong64_f f_hashLong)
+{
+    XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
+    /*
+     * If an action is to be taken if `secretLen` condition is not respected,
+     * it should be done here.
+     * For now, it's a contract pre-condition.
+     * Adding a check and a branch here would cost performance at every hash.
+     * Also, note that function signature doesn't offer room to return an error.
+     */
+    if (len <= 16)
+        return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
+    if (len <= 128)
+        return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+    if (len <= XXH3_MIDSIZE_MAX)
+        return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+    return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen);
+}
+
+
+/* ===   Public entry point   === */
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t len)
+{
+    return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH64_hash_t
+XXH3_64bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
+{
+    return XXH3_64bits_internal(input, len, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH64_hash_t
+XXH3_64bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
+{
+    return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
+}
+
+XXH_PUBLIC_API XXH64_hash_t
+XXH3_64bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)
+{
+    if (len <= XXH3_MIDSIZE_MAX)
+        return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
+    return XXH3_hashLong_64b_withSecret(input, len, seed, (const xxh_u8*)secret, secretSize);
+}
+
+
+/* ===   XXH3 streaming   === */
+
+/*
+ * Malloc's a pointer that is always aligned to align.
+ *
+ * This must be freed with `XXH_alignedFree()`.
+ *
+ * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte
+ * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2
+ * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON.
+ *
+ * This underalignment previously caused a rather obvious crash which went
+ * completely unnoticed due to XXH3_createState() not actually being tested.
+ * Credit to RedSpah for noticing this bug.
+ *
+ * The alignment is done manually: Functions like posix_memalign or _mm_malloc
+ * are avoided: To maintain portability, we would have to write a fallback
+ * like this anyways, and besides, testing for the existence of library
+ * functions without relying on external build tools is impossible.
+ *
+ * The method is simple: Overallocate, manually align, and store the offset
+ * to the original behind the returned pointer.
+ *
+ * Align must be a power of 2 and 8 <= align <= 128.
+ */
+static void* XXH_alignedMalloc(size_t s, size_t align)
+{
+    XXH_ASSERT(align <= 128 && align >= 8); /* range check */
+    XXH_ASSERT((align & (align-1)) == 0);   /* power of 2 */
+    XXH_ASSERT(s != 0 && s < (s + align));  /* empty/overflow */
+    {   /* Overallocate to make room for manual realignment and an offset byte */
+        xxh_u8* base = (xxh_u8*)XXH_malloc(s + align);
+        if (base != NULL) {
+            /*
+             * Get the offset needed to align this pointer.
+             *
+             * Even if the returned pointer is aligned, there will always be
+             * at least one byte to store the offset to the original pointer.
+             */
+            size_t offset = align - ((size_t)base & (align - 1)); /* base % align */
+            /* Add the offset for the now-aligned pointer */
+            xxh_u8* ptr = base + offset;
+
+            XXH_ASSERT((size_t)ptr % align == 0);
+
+            /* Store the offset immediately before the returned pointer. */
+            ptr[-1] = (xxh_u8)offset;
+            return ptr;
+        }
+        return NULL;
+    }
+}
+/*
+ * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass
+ * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout.
+ */
+static void XXH_alignedFree(void* p)
+{
+    if (p != NULL) {
+        xxh_u8* ptr = (xxh_u8*)p;
+        /* Get the offset byte we added in XXH_malloc. */
+        xxh_u8 offset = ptr[-1];
+        /* Free the original malloc'd pointer */
+        xxh_u8* base = ptr - offset;
+        XXH_free(base);
+    }
+}
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
+{
+    XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
+    if (state==NULL) return NULL;
+    XXH3_INITSTATE(state);
+    return state;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
+{
+    XXH_alignedFree(statePtr);
+    return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API void
+XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state)
+{
+    XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
+}
+
+static void
+XXH3_reset_internal(XXH3_state_t* statePtr,
+                    XXH64_hash_t seed,
+                    const void* secret, size_t secretSize)
+{
+    size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
+    size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
+    XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart);
+    XXH_ASSERT(statePtr != NULL);
+    /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */
+    memset((char*)statePtr + initStart, 0, initLength);
+    statePtr->acc[0] = XXH_PRIME32_3;
+    statePtr->acc[1] = XXH_PRIME64_1;
+    statePtr->acc[2] = XXH_PRIME64_2;
+    statePtr->acc[3] = XXH_PRIME64_3;
+    statePtr->acc[4] = XXH_PRIME64_4;
+    statePtr->acc[5] = XXH_PRIME32_2;
+    statePtr->acc[6] = XXH_PRIME64_5;
+    statePtr->acc[7] = XXH_PRIME32_1;
+    statePtr->seed = seed;
+    statePtr->useSeed = (seed != 0);
+    statePtr->extSecret = (const unsigned char*)secret;
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+    statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
+    statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset(XXH3_state_t* statePtr)
+{
+    if (statePtr == NULL) return XXH_ERROR;
+    XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
+    return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
+{
+    if (statePtr == NULL) return XXH_ERROR;
+    XXH3_reset_internal(statePtr, 0, secret, secretSize);
+    if (secret == NULL) return XXH_ERROR;
+    if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
+    return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
+{
+    if (statePtr == NULL) return XXH_ERROR;
+    if (seed==0) return XXH3_64bits_reset(statePtr);
+    if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))
+        XXH3_initCustomSecret(statePtr->customSecret, seed);
+    XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
+    return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed64)
+{
+    if (statePtr == NULL) return XXH_ERROR;
+    if (secret == NULL) return XXH_ERROR;
+    if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
+    XXH3_reset_internal(statePtr, seed64, secret, secretSize);
+    statePtr->useSeed = 1; /* always, even if seed64==0 */
+    return XXH_OK;
+}
+
+/* Note : when XXH3_consumeStripes() is invoked,
+ * there must be a guarantee that at least one more byte must be consumed from input
+ * so that the function can blindly consume all stripes using the "normal" secret segment */
+XXH_FORCE_INLINE void
+XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
+                    size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
+                    const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
+                    const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
+                    XXH3_f_accumulate_512 f_acc512,
+                    XXH3_f_scrambleAcc f_scramble)
+{
+    XXH_ASSERT(nbStripes <= nbStripesPerBlock);  /* can handle max 1 scramble per invocation */
+    XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock);
+    if (nbStripesPerBlock - *nbStripesSoFarPtr <= nbStripes) {
+        /* need a scrambling operation */
+        size_t const nbStripesToEndofBlock = nbStripesPerBlock - *nbStripesSoFarPtr;
+        size_t const nbStripesAfterBlock = nbStripes - nbStripesToEndofBlock;
+        XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripesToEndofBlock, f_acc512);
+        f_scramble(acc, secret + secretLimit);
+        XXH3_accumulate(acc, input + nbStripesToEndofBlock * XXH_STRIPE_LEN, secret, nbStripesAfterBlock, f_acc512);
+        *nbStripesSoFarPtr = nbStripesAfterBlock;
+    } else {
+        XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, f_acc512);
+        *nbStripesSoFarPtr += nbStripes;
+    }
+}
+
+#ifndef XXH3_STREAM_USE_STACK
+# ifndef __clang__ /* clang doesn't need additional stack space */
+#   define XXH3_STREAM_USE_STACK 1
+# endif
+#endif
+/*
+ * Both XXH3_64bits_update and XXH3_128bits_update use this routine.
+ */
+XXH_FORCE_INLINE XXH_errorcode
+XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
+            const xxh_u8* XXH_RESTRICT input, size_t len,
+            XXH3_f_accumulate_512 f_acc512,
+            XXH3_f_scrambleAcc f_scramble)
+{
+    if (input==NULL) {
+        XXH_ASSERT(len == 0);
+        return XXH_OK;
+    }
+
+    XXH_ASSERT(state != NULL);
+    {   const xxh_u8* const bEnd = input + len;
+        const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
+#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
+        /* For some reason, gcc and MSVC seem to suffer greatly
+         * when operating accumulators directly into state.
+         * Operating into stack space seems to enable proper optimization.
+         * clang, on the other hand, doesn't seem to need this trick */
+        XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; memcpy(acc, state->acc, sizeof(acc));
+#else
+        xxh_u64* XXH_RESTRICT const acc = state->acc;
+#endif
+        state->totalLen += len;
+        XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
+
+        /* small input : just fill in tmp buffer */
+        if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) {
+            XXH_memcpy(state->buffer + state->bufferedSize, input, len);
+            state->bufferedSize += (XXH32_hash_t)len;
+            return XXH_OK;
+        }
+
+        /* total input is now > XXH3_INTERNALBUFFER_SIZE */
+        #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
+        XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0);   /* clean multiple */
+
+        /*
+         * Internal buffer is partially filled (always, except at beginning)
+         * Complete it, then consume it.
+         */
+        if (state->bufferedSize) {
+            size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
+            XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
+            input += loadSize;
+            XXH3_consumeStripes(acc,
+                               &state->nbStripesSoFar, state->nbStripesPerBlock,
+                                state->buffer, XXH3_INTERNALBUFFER_STRIPES,
+                                secret, state->secretLimit,
+                                f_acc512, f_scramble);
+            state->bufferedSize = 0;
+        }
+        XXH_ASSERT(input < bEnd);
+
+        /* large input to consume : ingest per full block */
+        if ((size_t)(bEnd - input) > state->nbStripesPerBlock * XXH_STRIPE_LEN) {
+            size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
+            XXH_ASSERT(state->nbStripesPerBlock >= state->nbStripesSoFar);
+            /* join to current block's end */
+            {   size_t const nbStripesToEnd = state->nbStripesPerBlock - state->nbStripesSoFar;
+                XXH_ASSERT(nbStripes <= nbStripes);
+                XXH3_accumulate(acc, input, secret + state->nbStripesSoFar * XXH_SECRET_CONSUME_RATE, nbStripesToEnd, f_acc512);
+                f_scramble(acc, secret + state->secretLimit);
+                state->nbStripesSoFar = 0;
+                input += nbStripesToEnd * XXH_STRIPE_LEN;
+                nbStripes -= nbStripesToEnd;
+            }
+            /* consume per entire blocks */
+            while(nbStripes >= state->nbStripesPerBlock) {
+                XXH3_accumulate(acc, input, secret, state->nbStripesPerBlock, f_acc512);
+                f_scramble(acc, secret + state->secretLimit);
+                input += state->nbStripesPerBlock * XXH_STRIPE_LEN;
+                nbStripes -= state->nbStripesPerBlock;
+            }
+            /* consume last partial block */
+            XXH3_accumulate(acc, input, secret, nbStripes, f_acc512);
+            input += nbStripes * XXH_STRIPE_LEN;
+            XXH_ASSERT(input < bEnd);  /* at least some bytes left */
+            state->nbStripesSoFar = nbStripes;
+            /* buffer predecessor of last partial stripe */
+            XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
+            XXH_ASSERT(bEnd - input <= XXH_STRIPE_LEN);
+        } else {
+            /* content to consume <= block size */
+            /* Consume input by a multiple of internal buffer size */
+            if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
+                const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
+                do {
+                    XXH3_consumeStripes(acc,
+                                       &state->nbStripesSoFar, state->nbStripesPerBlock,
+                                        input, XXH3_INTERNALBUFFER_STRIPES,
+                                        secret, state->secretLimit,
+                                        f_acc512, f_scramble);
+                    input += XXH3_INTERNALBUFFER_SIZE;
+                } while (input<limit);
+                /* buffer predecessor of last partial stripe */
+                XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
+            }
+        }
+
+        /* Some remaining input (always) : buffer it */
+        XXH_ASSERT(input < bEnd);
+        XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
+        XXH_ASSERT(state->bufferedSize == 0);
+        XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));
+        state->bufferedSize = (XXH32_hash_t)(bEnd-input);
+#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
+        /* save stack accumulators into state */
+        memcpy(state->acc, acc, sizeof(acc));
+#endif
+    }
+
+    return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len)
+{
+    return XXH3_update(state, (const xxh_u8*)input, len,
+                       XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+
+XXH_FORCE_INLINE void
+XXH3_digest_long (XXH64_hash_t* acc,
+                  const XXH3_state_t* state,
+                  const unsigned char* secret)
+{
+    /*
+     * Digest on a local copy. This way, the state remains unaltered, and it can
+     * continue ingesting more input afterwards.
+     */
+    XXH_memcpy(acc, state->acc, sizeof(state->acc));
+    if (state->bufferedSize >= XXH_STRIPE_LEN) {
+        size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
+        size_t nbStripesSoFar = state->nbStripesSoFar;
+        XXH3_consumeStripes(acc,
+                           &nbStripesSoFar, state->nbStripesPerBlock,
+                            state->buffer, nbStripes,
+                            secret, state->secretLimit,
+                            XXH3_accumulate_512, XXH3_scrambleAcc);
+        /* last stripe */
+        XXH3_accumulate_512(acc,
+                            state->buffer + state->bufferedSize - XXH_STRIPE_LEN,
+                            secret + state->secretLimit - XXH_SECRET_LASTACC_START);
+    } else {  /* bufferedSize < XXH_STRIPE_LEN */
+        xxh_u8 lastStripe[XXH_STRIPE_LEN];
+        size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
+        XXH_ASSERT(state->bufferedSize > 0);  /* there is always some input buffered */
+        XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
+        XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
+        XXH3_accumulate_512(acc,
+                            lastStripe,
+                            secret + state->secretLimit - XXH_SECRET_LASTACC_START);
+    }
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
+{
+    const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
+    if (state->totalLen > XXH3_MIDSIZE_MAX) {
+        XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
+        XXH3_digest_long(acc, state, secret);
+        return XXH3_mergeAccs(acc,
+                              secret + XXH_SECRET_MERGEACCS_START,
+                              (xxh_u64)state->totalLen * XXH_PRIME64_1);
+    }
+    /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
+    if (state->useSeed)
+        return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
+    return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
+                                  secret, state->secretLimit + XXH_STRIPE_LEN);
+}
+
+
+
+/* ==========================================
+ * XXH3 128 bits (a.k.a XXH128)
+ * ==========================================
+ * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant,
+ * even without counting the significantly larger output size.
+ *
+ * For example, extra steps are taken to avoid the seed-dependent collisions
+ * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
+ *
+ * This strength naturally comes at the cost of some speed, especially on short
+ * lengths. Note that longer hashes are about as fast as the 64-bit version
+ * due to it using only a slight modification of the 64-bit loop.
+ *
+ * XXH128 is also more oriented towards 64-bit machines. It is still extremely
+ * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
+ */
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    /* A doubled version of 1to3_64b with different constants. */
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(1 <= len && len <= 3);
+    XXH_ASSERT(secret != NULL);
+    /*
+     * len = 1: combinedl = { input[0], 0x01, input[0], input[0] }
+     * len = 2: combinedl = { input[1], 0x02, input[0], input[1] }
+     * len = 3: combinedl = { input[2], 0x03, input[0], input[1] }
+     */
+    {   xxh_u8 const c1 = input[0];
+        xxh_u8 const c2 = input[len >> 1];
+        xxh_u8 const c3 = input[len - 1];
+        xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24)
+                                | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);
+        xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13);
+        xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
+        xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed;
+        xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl;
+        xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph;
+        XXH128_hash_t h128;
+        h128.low64  = XXH64_avalanche(keyed_lo);
+        h128.high64 = XXH64_avalanche(keyed_hi);
+        return h128;
+    }
+}
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(secret != NULL);
+    XXH_ASSERT(4 <= len && len <= 8);
+    seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
+    {   xxh_u32 const input_lo = XXH_readLE32(input);
+        xxh_u32 const input_hi = XXH_readLE32(input + len - 4);
+        xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32);
+        xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed;
+        xxh_u64 const keyed = input_64 ^ bitflip;
+
+        /* Shift len to the left to ensure it is even, this avoids even multiplies. */
+        XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2));
+
+        m128.high64 += (m128.low64 << 1);
+        m128.low64  ^= (m128.high64 >> 3);
+
+        m128.low64   = XXH_xorshift64(m128.low64, 35);
+        m128.low64  *= 0x9FB21C651E98DF25ULL;
+        m128.low64   = XXH_xorshift64(m128.low64, 28);
+        m128.high64  = XXH3_avalanche(m128.high64);
+        return m128;
+    }
+}
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(secret != NULL);
+    XXH_ASSERT(9 <= len && len <= 16);
+    {   xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed;
+        xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed;
+        xxh_u64 const input_lo = XXH_readLE64(input);
+        xxh_u64       input_hi = XXH_readLE64(input + len - 8);
+        XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1);
+        /*
+         * Put len in the middle of m128 to ensure that the length gets mixed to
+         * both the low and high bits in the 128x64 multiply below.
+         */
+        m128.low64 += (xxh_u64)(len - 1) << 54;
+        input_hi   ^= bitfliph;
+        /*
+         * Add the high 32 bits of input_hi to the high 32 bits of m128, then
+         * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to
+         * the high 64 bits of m128.
+         *
+         * The best approach to this operation is different on 32-bit and 64-bit.
+         */
+        if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */
+            /*
+             * 32-bit optimized version, which is more readable.
+             *
+             * On 32-bit, it removes an ADC and delays a dependency between the two
+             * halves of m128.high64, but it generates an extra mask on 64-bit.
+             */
+            m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2);
+        } else {
+            /*
+             * 64-bit optimized (albeit more confusing) version.
+             *
+             * Uses some properties of addition and multiplication to remove the mask:
+             *
+             * Let:
+             *    a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF)
+             *    b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000)
+             *    c = XXH_PRIME32_2
+             *
+             *    a + (b * c)
+             * Inverse Property: x + y - x == y
+             *    a + (b * (1 + c - 1))
+             * Distributive Property: x * (y + z) == (x * y) + (x * z)
+             *    a + (b * 1) + (b * (c - 1))
+             * Identity Property: x * 1 == x
+             *    a + b + (b * (c - 1))
+             *
+             * Substitute a, b, and c:
+             *    input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
+             *
+             * Since input_hi.hi + input_hi.lo == input_hi, we get this:
+             *    input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
+             */
+            m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1);
+        }
+        /* m128 ^= XXH_swap64(m128 >> 64); */
+        m128.low64  ^= XXH_swap64(m128.high64);
+
+        {   /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */
+            XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2);
+            h128.high64 += m128.high64 * XXH_PRIME64_2;
+
+            h128.low64   = XXH3_avalanche(h128.low64);
+            h128.high64  = XXH3_avalanche(h128.high64);
+            return h128;
+    }   }
+}
+
+/*
+ * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
+ */
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    XXH_ASSERT(len <= 16);
+    {   if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed);
+        if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed);
+        if (len) return XXH3_len_1to3_128b(input, len, secret, seed);
+        {   XXH128_hash_t h128;
+            xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72);
+            xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88);
+            h128.low64 = XXH64_avalanche(seed ^ bitflipl);
+            h128.high64 = XXH64_avalanche( seed ^ bitfliph);
+            return h128;
+    }   }
+}
+
+/*
+ * A bit slower than XXH3_mix16B, but handles multiply by zero better.
+ */
+XXH_FORCE_INLINE XXH128_hash_t
+XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,
+              const xxh_u8* secret, XXH64_hash_t seed)
+{
+    acc.low64  += XXH3_mix16B (input_1, secret+0, seed);
+    acc.low64  ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8);
+    acc.high64 += XXH3_mix16B (input_2, secret+16, seed);
+    acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8);
+    return acc;
+}
+
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
+                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+                      XXH64_hash_t seed)
+{
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
+    XXH_ASSERT(16 < len && len <= 128);
+
+    {   XXH128_hash_t acc;
+        acc.low64 = len * XXH_PRIME64_1;
+        acc.high64 = 0;
+        if (len > 32) {
+            if (len > 64) {
+                if (len > 96) {
+                    acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed);
+                }
+                acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed);
+            }
+            acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
+        }
+        acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
+        {   XXH128_hash_t h128;
+            h128.low64  = acc.low64 + acc.high64;
+            h128.high64 = (acc.low64    * XXH_PRIME64_1)
+                        + (acc.high64   * XXH_PRIME64_4)
+                        + ((len - seed) * XXH_PRIME64_2);
+            h128.low64  = XXH3_avalanche(h128.low64);
+            h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
+            return h128;
+        }
+    }
+}
+
+XXH_NO_INLINE XXH128_hash_t
+XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
+                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+                       XXH64_hash_t seed)
+{
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
+    XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
+
+    {   XXH128_hash_t acc;
+        int const nbRounds = (int)len / 32;
+        int i;
+        acc.low64 = len * XXH_PRIME64_1;
+        acc.high64 = 0;
+        for (i=0; i<4; i++) {
+            acc = XXH128_mix32B(acc,
+                                input  + (32 * i),
+                                input  + (32 * i) + 16,
+                                secret + (32 * i),
+                                seed);
+        }
+        acc.low64 = XXH3_avalanche(acc.low64);
+        acc.high64 = XXH3_avalanche(acc.high64);
+        XXH_ASSERT(nbRounds >= 4);
+        for (i=4 ; i < nbRounds; i++) {
+            acc = XXH128_mix32B(acc,
+                                input + (32 * i),
+                                input + (32 * i) + 16,
+                                secret + XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)),
+                                seed);
+        }
+        /* last bytes */
+        acc = XXH128_mix32B(acc,
+                            input + len - 16,
+                            input + len - 32,
+                            secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
+                            0ULL - seed);
+
+        {   XXH128_hash_t h128;
+            h128.low64  = acc.low64 + acc.high64;
+            h128.high64 = (acc.low64    * XXH_PRIME64_1)
+                        + (acc.high64   * XXH_PRIME64_4)
+                        + ((len - seed) * XXH_PRIME64_2);
+            h128.low64  = XXH3_avalanche(h128.low64);
+            h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
+            return h128;
+        }
+    }
+}
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
+                            const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+                            XXH3_f_accumulate_512 f_acc512,
+                            XXH3_f_scrambleAcc f_scramble)
+{
+    XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
+
+    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc512, f_scramble);
+
+    /* converge into final hash */
+    XXH_STATIC_ASSERT(sizeof(acc) == 64);
+    XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+    {   XXH128_hash_t h128;
+        h128.low64  = XXH3_mergeAccs(acc,
+                                     secret + XXH_SECRET_MERGEACCS_START,
+                                     (xxh_u64)len * XXH_PRIME64_1);
+        h128.high64 = XXH3_mergeAccs(acc,
+                                     secret + secretSize
+                                            - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
+                                     ~((xxh_u64)len * XXH_PRIME64_2));
+        return h128;
+    }
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ */
+XXH_NO_INLINE XXH128_hash_t
+XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
+                           XXH64_hash_t seed64,
+                           const void* XXH_RESTRICT secret, size_t secretLen)
+{
+    (void)seed64; (void)secret; (void)secretLen;
+    return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
+                                       XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+/*
+ * It's important for performance to pass @secretLen (when it's static)
+ * to the compiler, so that it can properly optimize the vectorized loop.
+ */
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
+                              XXH64_hash_t seed64,
+                              const void* XXH_RESTRICT secret, size_t secretLen)
+{
+    (void)seed64;
+    return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
+                                       XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,
+                                XXH64_hash_t seed64,
+                                XXH3_f_accumulate_512 f_acc512,
+                                XXH3_f_scrambleAcc f_scramble,
+                                XXH3_f_initCustomSecret f_initSec)
+{
+    if (seed64 == 0)
+        return XXH3_hashLong_128b_internal(input, len,
+                                           XXH3_kSecret, sizeof(XXH3_kSecret),
+                                           f_acc512, f_scramble);
+    {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
+        f_initSec(secret, seed64);
+        return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),
+                                           f_acc512, f_scramble);
+    }
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ */
+XXH_NO_INLINE XXH128_hash_t
+XXH3_hashLong_128b_withSeed(const void* input, size_t len,
+                            XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen)
+{
+    (void)secret; (void)secretLen;
+    return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,
+                XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);
+}
+
+typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,
+                                            XXH64_hash_t, const void* XXH_RESTRICT, size_t);
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_128bits_internal(const void* input, size_t len,
+                      XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
+                      XXH3_hashLong128_f f_hl128)
+{
+    XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
+    /*
+     * If an action is to be taken if `secret` conditions are not respected,
+     * it should be done here.
+     * For now, it's a contract pre-condition.
+     * Adding a check and a branch here would cost performance at every hash.
+     */
+    if (len <= 16)
+        return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
+    if (len <= 128)
+        return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+    if (len <= XXH3_MIDSIZE_MAX)
+        return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+    return f_hl128(input, len, seed64, secret, secretLen);
+}
+
+
+/* ===   Public XXH128 API   === */
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* input, size_t len)
+{
+    return XXH3_128bits_internal(input, len, 0,
+                                 XXH3_kSecret, sizeof(XXH3_kSecret),
+                                 XXH3_hashLong_128b_default);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
+{
+    return XXH3_128bits_internal(input, len, 0,
+                                 (const xxh_u8*)secret, secretSize,
+                                 XXH3_hashLong_128b_withSecret);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
+{
+    return XXH3_128bits_internal(input, len, seed,
+                                 XXH3_kSecret, sizeof(XXH3_kSecret),
+                                 XXH3_hashLong_128b_withSeed);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH3_128bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)
+{
+    if (len <= XXH3_MIDSIZE_MAX)
+        return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
+    return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH128(const void* input, size_t len, XXH64_hash_t seed)
+{
+    return XXH3_128bits_withSeed(input, len, seed);
+}
+
+
+/* ===   XXH3 128-bit streaming   === */
+
+/*
+ * All initialization and update functions are identical to 64-bit streaming variant.
+ * The only difference is the finalization routine.
+ */
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset(XXH3_state_t* statePtr)
+{
+    return XXH3_64bits_reset(statePtr);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
+{
+    return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
+{
+    return XXH3_64bits_reset_withSeed(statePtr, seed);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed)
+{
+    return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)
+{
+    return XXH3_update(state, (const xxh_u8*)input, len,
+                       XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
+{
+    const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
+    if (state->totalLen > XXH3_MIDSIZE_MAX) {
+        XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
+        XXH3_digest_long(acc, state, secret);
+        XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+        {   XXH128_hash_t h128;
+            h128.low64  = XXH3_mergeAccs(acc,
+                                         secret + XXH_SECRET_MERGEACCS_START,
+                                         (xxh_u64)state->totalLen * XXH_PRIME64_1);
+            h128.high64 = XXH3_mergeAccs(acc,
+                                         secret + state->secretLimit + XXH_STRIPE_LEN
+                                                - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
+                                         ~((xxh_u64)state->totalLen * XXH_PRIME64_2));
+            return h128;
+        }
+    }
+    /* len <= XXH3_MIDSIZE_MAX : short code */
+    if (state->seed)
+        return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
+    return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
+                                   secret, state->secretLimit + XXH_STRIPE_LEN);
+}
+
+/* 128-bit utility functions */
+
+#include <string.h>   /* memcmp, memcpy */
+
+/* return : 1 is equal, 0 if different */
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
+{
+    /* note : XXH128_hash_t is compact, it has no padding byte */
+    return !(memcmp(&h1, &h2, sizeof(h1)));
+}
+
+/* This prototype is compatible with stdlib's qsort().
+ * return : >0 if *h128_1  > *h128_2
+ *          <0 if *h128_1  < *h128_2
+ *          =0 if *h128_1 == *h128_2  */
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
+{
+    XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
+    XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
+    int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);
+    /* note : bets that, in most cases, hash values are different */
+    if (hcmp) return hcmp;
+    return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);
+}
+
+
+/*======   Canonical representation   ======*/
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API void
+XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) {
+        hash.high64 = XXH_swap64(hash.high64);
+        hash.low64  = XXH_swap64(hash.low64);
+    }
+    XXH_memcpy(dst, &hash.high64, sizeof(hash.high64));
+    XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH128_hashFromCanonical(const XXH128_canonical_t* src)
+{
+    XXH128_hash_t h;
+    h.high64 = XXH_readBE64(src);
+    h.low64  = XXH_readBE64(src->digest + 8);
+    return h;
+}
+
+
+
+/* ==========================================
+ * Secret generators
+ * ==========================================
+ */
+#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
+
+static void XXH3_combine16(void* dst, XXH128_hash_t h128)
+{
+    XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 );
+    XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize)
+{
+    XXH_ASSERT(secretBuffer != NULL);
+    if (secretBuffer == NULL) return XXH_ERROR;
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+    if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
+    if (customSeedSize == 0) {
+        customSeed = XXH3_kSecret;
+        customSeedSize = XXH_SECRET_DEFAULT_SIZE;
+    }
+    XXH_ASSERT(customSeed != NULL);
+    if (customSeed == NULL) return XXH_ERROR;
+
+    /* Fill secretBuffer with a copy of customSeed - repeat as needed */
+    {   size_t pos = 0;
+        while (pos < secretSize) {
+            size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize);
+            memcpy((char*)secretBuffer + pos, customSeed, toCopy);
+            pos += toCopy;
+    }   }
+
+    {   size_t const nbSeg16 = secretSize / 16;
+        size_t n;
+        XXH128_canonical_t scrambler;
+        XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
+        for (n=0; n<nbSeg16; n++) {
+            XXH128_hash_t const h128 = XXH128(&scrambler, sizeof(scrambler), n);
+            XXH3_combine16((char*)secretBuffer + n*16, h128);
+        }
+        /* last segment */
+        XXH3_combine16((char*)secretBuffer + secretSize - 16, XXH128_hashFromCanonical(&scrambler));
+    }
+    return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API void
+XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed)
+{
+    XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
+    XXH3_initCustomSecret(secret, seed);
+    XXH_ASSERT(secretBuffer != NULL);
+    memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);
+}
+
+
+
+/* Pop our optimization override from above */
+#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
+  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
+  && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
+#  pragma GCC pop_options
+#endif
+
+#endif  /* XXH_NO_LONG_LONG */
+
+#endif  /* XXH_NO_XXH3 */
+
+/*!
+ * @}
+ */
+#endif  /* XXH_IMPLEMENTATION */
+
+
+#if defined (__cplusplus)
+}
+#endif