From 706eaa128982133ed6d8824d96ea853bb242f633 Mon Sep 17 00:00:00 2001
From: SisMaker <1713699517@qq.com>
Date: Sun, 23 Feb 2020 00:05:46 +0800
Subject: [PATCH] some nif sample

---
 .gitignore                                   |    5 +-
 c_src/.enq/enq_nif.c                         |  442 +++
 c_src/.enq/fifo.h                            |   71 +
 c_src/.enq/lifo.h                            |   63 +
 c_src/.enq/rebar.config                      |   12 +
 c_src/bitmap_filter/bitmap_filter.c          |   80 +
 c_src/{cq => bitmap_filter}/rebar.config     |    6 +-
 c_src/bsn/bsn_ext.c                          |  448 +++
 c_src/bsn/bsn_int.c                          |  331 ++
 c_src/bsn/c_src/bsn_ext.c                    |  448 +++
 c_src/bsn/c_src/bsn_int.c                    |  331 ++
 c_src/{cq2 => bsn}/rebar.config              |   11 +-
 c_src/couchdb_hqueue/c_src/hqueue.c          |  318 ++
 c_src/couchdb_hqueue/c_src/hqueue.d          |    5 +
 c_src/couchdb_hqueue/c_src/hqueue.h          |   60 +
 c_src/couchdb_hqueue/c_src/hqueue_nif.c      |  601 +++
 c_src/couchdb_hqueue/c_src/hqueue_nif.d      |    5 +
 c_src/couchdb_hqueue/c_src/valgrind_sample.c |   72 +
 c_src/couchdb_hqueue/hqueue.c                |  318 ++
 c_src/couchdb_hqueue/hqueue.h                |   60 +
 c_src/couchdb_hqueue/hqueue_nif.c            |  601 +++
 c_src/couchdb_hqueue/rebar.config            |   13 +
 c_src/couchdb_hqueue/valgrind_sample.c       |   72 +
 c_src/cq/cq_nif.c                            |  564 ---
 c_src/cq/cq_nif.h                            |   71 -
 c_src/cq1/cq_nif.c                           |  564 ---
 c_src/cq1/cq_nif.h                           |   71 -
 c_src/cq1/rebar.config                       |   26 -
 c_src/cq2/cq_nif.c                           |  564 ---
 c_src/cq2/cq_nif.h                           |   71 -
 c_src/enlfq/Makefile                         |   80 +
 c_src/enlfq/concurrentqueue.h                | 3637 ++++++++++++++++++
 c_src/enlfq/enlfq.cc                         |   84 +
 c_src/enlfq/enlfq.h                          |   10 +
 c_src/enlfq/enlfq_nif.cc                     |   57 +
 c_src/enlfq/enlfq_nif.h                      |   19 +
 c_src/enlfq/nif_utils.cc                     |   27 +
 c_src/enlfq/nif_utils.h                      |    6 +
 c_src/enlfq/rebar.config                     |    7 +
 c_src/etsq/etsq.cpp                          |  172 +
 c_src/etsq/etsq.h                            |  130 +
 c_src/etsq/rebar.config                      |    7 +
 c_src/gb_lru/binary.h                        |  103 +
 c_src/gb_lru/btree.h                         | 2394 ++++++++++++
 c_src/gb_lru/btree_container.h               |  349 ++
 c_src/gb_lru/btree_map.h                     |  130 +
 c_src/gb_lru/btreelru_nif.cpp                |  619 +++
 c_src/gb_lru/erlterm.h                       |   71 +
 c_src/gb_lru/lru.h                           |  266 ++
 c_src/gb_lru/murmurhash2.h                   |   73 +
 c_src/gb_lru/rebar.config                    |    7 +
 c_src/native_array/native_array_nif.c        |   90 +
 c_src/native_array/rebar.config              |    7 +
 c_src/neural/NeuralTable.cpp                 |  905 +++++
 c_src/neural/NeuralTable.h                   |  121 +
 c_src/neural/neural.cpp                      |  134 +
 c_src/neural/neural_utils.cpp                |   46 +
 c_src/neural/neural_utils.h                  |    9 +
 c_src/neural/rebar.config                    |   14 +
 src/dataType/utTermSize.erl                  |    2 +-
 src/nifSrc/bitmap_filter/bitmap_filter.erl   |   20 +
 src/nifSrc/bsn/bsn.erl                       |   77 +
 src/nifSrc/bsn/bsn_ext.erl                   |   56 +
 src/nifSrc/bsn/bsn_int.erl                   |   45 +
 src/nifSrc/bsn/bsn_measure.erl               |  236 ++
 src/nifSrc/couchdb_hqeue/hqueue.erl          |  160 +
 src/nifSrc/cq/cq.erl                         |    0
 src/nifSrc/enlfq/enlfq.erl                   |   51 +
 src/nifSrc/enlfq/testing/benchmark.erl       |   71 +
 src/nifSrc/enlfq/testing/multi_spawn.erl     |   23 +
 src/nifSrc/enq/enq.erl                       |  159 +
 src/nifSrc/enq/enq_nif.erl                   |   63 +
 src/nifSrc/etsq/etsq.erl                     |  103 +
 src/nifSrc/etsq/etsq_tests.erl               |   65 +
 src/nifSrc/gb_lru/btree_lru.erl              |  102 +
 src/nifSrc/gb_lru/btree_lru_test.erl         |   59 +
 src/nifSrc/gb_lru/gb_lru.app.src             |    6 +
 src/nifSrc/native_array/native_array.erl     |   19 +
 78 files changed, 15255 insertions(+), 1940 deletions(-)
 create mode 100644 c_src/.enq/enq_nif.c
 create mode 100644 c_src/.enq/fifo.h
 create mode 100644 c_src/.enq/lifo.h
 create mode 100644 c_src/.enq/rebar.config
 create mode 100644 c_src/bitmap_filter/bitmap_filter.c
 rename c_src/{cq => bitmap_filter}/rebar.config (88%)
 create mode 100644 c_src/bsn/bsn_ext.c
 create mode 100644 c_src/bsn/bsn_int.c
 create mode 100644 c_src/bsn/c_src/bsn_ext.c
 create mode 100644 c_src/bsn/c_src/bsn_int.c
 rename c_src/{cq2 => bsn}/rebar.config (84%)
 create mode 100644 c_src/couchdb_hqueue/c_src/hqueue.c
 create mode 100644 c_src/couchdb_hqueue/c_src/hqueue.d
 create mode 100644 c_src/couchdb_hqueue/c_src/hqueue.h
 create mode 100644 c_src/couchdb_hqueue/c_src/hqueue_nif.c
 create mode 100644 c_src/couchdb_hqueue/c_src/hqueue_nif.d
 create mode 100644 c_src/couchdb_hqueue/c_src/valgrind_sample.c
 create mode 100644 c_src/couchdb_hqueue/hqueue.c
 create mode 100644 c_src/couchdb_hqueue/hqueue.h
 create mode 100644 c_src/couchdb_hqueue/hqueue_nif.c
 create mode 100644 c_src/couchdb_hqueue/rebar.config
 create mode 100644 c_src/couchdb_hqueue/valgrind_sample.c
 delete mode 100644 c_src/cq/cq_nif.c
 delete mode 100644 c_src/cq/cq_nif.h
 delete mode 100644 c_src/cq1/cq_nif.c
 delete mode 100644 c_src/cq1/cq_nif.h
 delete mode 100644 c_src/cq1/rebar.config
 delete mode 100644 c_src/cq2/cq_nif.c
 delete mode 100644 c_src/cq2/cq_nif.h
 create mode 100644 c_src/enlfq/Makefile
 create mode 100644 c_src/enlfq/concurrentqueue.h
 create mode 100644 c_src/enlfq/enlfq.cc
 create mode 100644 c_src/enlfq/enlfq.h
 create mode 100644 c_src/enlfq/enlfq_nif.cc
 create mode 100644 c_src/enlfq/enlfq_nif.h
 create mode 100644 c_src/enlfq/nif_utils.cc
 create mode 100644 c_src/enlfq/nif_utils.h
 create mode 100644 c_src/enlfq/rebar.config
 create mode 100644 c_src/etsq/etsq.cpp
 create mode 100644 c_src/etsq/etsq.h
 create mode 100644 c_src/etsq/rebar.config
 create mode 100644 c_src/gb_lru/binary.h
 create mode 100644 c_src/gb_lru/btree.h
 create mode 100644 c_src/gb_lru/btree_container.h
 create mode 100644 c_src/gb_lru/btree_map.h
 create mode 100644 c_src/gb_lru/btreelru_nif.cpp
 create mode 100644 c_src/gb_lru/erlterm.h
 create mode 100644 c_src/gb_lru/lru.h
 create mode 100644 c_src/gb_lru/murmurhash2.h
 create mode 100644 c_src/gb_lru/rebar.config
 create mode 100644 c_src/native_array/native_array_nif.c
 create mode 100644 c_src/native_array/rebar.config
 create mode 100644 c_src/neural/NeuralTable.cpp
 create mode 100644 c_src/neural/NeuralTable.h
 create mode 100644 c_src/neural/neural.cpp
 create mode 100644 c_src/neural/neural_utils.cpp
 create mode 100644 c_src/neural/neural_utils.h
 create mode 100644 c_src/neural/rebar.config
 create mode 100644 src/nifSrc/bitmap_filter/bitmap_filter.erl
 create mode 100644 src/nifSrc/bsn/bsn.erl
 create mode 100644 src/nifSrc/bsn/bsn_ext.erl
 create mode 100644 src/nifSrc/bsn/bsn_int.erl
 create mode 100644 src/nifSrc/bsn/bsn_measure.erl
 create mode 100644 src/nifSrc/couchdb_hqeue/hqueue.erl
 delete mode 100644 src/nifSrc/cq/cq.erl
 create mode 100644 src/nifSrc/enlfq/enlfq.erl
 create mode 100644 src/nifSrc/enlfq/testing/benchmark.erl
 create mode 100644 src/nifSrc/enlfq/testing/multi_spawn.erl
 create mode 100644 src/nifSrc/enq/enq.erl
 create mode 100644 src/nifSrc/enq/enq_nif.erl
 create mode 100644 src/nifSrc/etsq/etsq.erl
 create mode 100644 src/nifSrc/etsq/etsq_tests.erl
 create mode 100644 src/nifSrc/gb_lru/btree_lru.erl
 create mode 100644 src/nifSrc/gb_lru/btree_lru_test.erl
 create mode 100644 src/nifSrc/gb_lru/gb_lru.app.src
 create mode 100644 src/nifSrc/native_array/native_array.erl

diff --git a/.gitignore b/.gitignore
index 18df924..d566b05 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,4 +22,7 @@ priv
 .idea
 *.iml
 cmake-build*
-CMakeLists.txt
\ No newline at end of file
+CMakeLists.txt
+
+*.pdb
+compile_commands.json
\ No newline at end of file
diff --git a/c_src/.enq/enq_nif.c b/c_src/.enq/enq_nif.c
new file mode 100644
index 0000000..a855a36
--- /dev/null
+++ b/c_src/.enq/enq_nif.c
@@ -0,0 +1,442 @@
+#define _GNU_SOURCE
+
+#include "erl_nif.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <time.h>
+
+// #include "fifo.h"
+#include "lifo.h"
+
+typedef struct {
+    ERL_NIF_TERM ok;
+    ERL_NIF_TERM error;
+    ERL_NIF_TERM fifo;
+    ERL_NIF_TERM lifo;
+    ERL_NIF_TERM ttl;
+    ERL_NIF_TERM max_size;
+} atoms_t;
+
+typedef struct {
+    ErlNifResourceType *queue;
+    atoms_t atoms;
+} priv_t;
+
+typedef struct {
+    union {
+        fifo_handle_t fifo;
+        lifo_handle_t lifo;
+    } handle;
+    ErlNifBinary data;
+    struct timespec added;
+} item_t;
+
+typedef enum {
+    QTYPE_FIFO = 0,
+    QTYPE_LIFO
+} queue_type_t;
+
+typedef struct queue {
+    union {
+        fifo_t fifo;
+        lifo_t lifo;
+    } queue;
+    uint64_t ttl;
+    uint64_t max_size;
+    void (*push) (struct queue *inst, item_t *item);
+    item_t* (*pop) (struct queue *inst);
+    void (*free) (struct queue *inst);
+    uint64_t (*size) (struct queue *inst);
+    void (*cleanup) (struct queue *inst);
+} queue_t;
+
+// returns tuple {error, atom()}
+static inline ERL_NIF_TERM
+make_error(ErlNifEnv* env, const char *error) {
+    priv_t *priv = (priv_t *) enif_priv_data(env);
+
+    return enif_make_tuple2(env, priv->atoms.error, enif_make_atom(env, error));
+}
+
+// returns time diff in milliseconds
+static inline int64_t
+tdiff(struct timespec *t2, struct timespec *t1) {
+    return (t2->tv_sec * 1000 + t2->tv_nsec / 1000000UL) -
+           (t1->tv_sec * 1000 + t1->tv_nsec / 1000000UL);
+}
+
+static inline void
+gettime(struct timespec *tp) {
+    int rc = clock_gettime(CLOCK_MONOTONIC_RAW, tp);
+    assert(rc == 0);
+}
+
+/******************************************************************************/
+/* FIFO callbacks */
+/******************************************************************************/
+
+static void
+cleanup_fifo(queue_t *inst) {
+    struct timespec now;
+
+    gettime(&now);
+
+    for (;;) {
+        item_t *item = NULL;
+        __fifo_peak(&inst->queue.fifo, item, handle.fifo);
+
+        if (item == NULL)
+            return;
+
+        int64_t diff = tdiff(&now, &item->added);
+        if (diff < inst->ttl) {
+            return;
+        } else {
+            __fifo_pop(&inst->queue.fifo, item, handle.fifo);
+            enif_release_binary(&item->data);
+            enif_free(item);
+        }
+    }
+}
+
+static void
+push_fifo(queue_t *inst, item_t *item) {
+    __fifo_push(&inst->queue.fifo, item, handle.fifo);
+}
+
+static item_t *
+pop_fifo(queue_t *inst) {
+    item_t *item = NULL;
+
+    if (inst->ttl > 0) {
+        struct timespec now;
+
+        gettime(&now);
+
+        for (;;) {
+            __fifo_pop(&inst->queue.fifo, item, handle.fifo);
+
+            if (item == NULL)
+                return NULL;
+
+            int64_t diff = tdiff(&now, &item->added);
+            if (diff < inst->ttl) {
+                return item;
+            } else {
+                enif_release_binary(&item->data);
+                enif_free(item);
+            }
+        }
+    } else {
+        __fifo_pop(&inst->queue.fifo, item, handle.fifo);
+    }
+
+    return item;
+}
+
+static void
+free_fifo(queue_t *inst) {
+    item_t *item;
+
+    for(;;) {
+        __fifo_pop(&inst->queue.fifo, item, handle.fifo);
+
+        if (item == NULL)
+            return;
+
+        enif_release_binary(&item->data);
+        enif_free(item);
+    }
+}
+
+static uint64_t
+size_fifo(queue_t *inst) {
+    return fifo_length(&inst->queue.fifo);
+}
+
+/******************************************************************************/
+/* LIFO callbacks */
+/******************************************************************************/
+
+static void
+cleanup_lifo(queue_t *inst) {
+    struct timespec now;
+
+    gettime(&now);
+
+    for(;;) {
+        item_t *item = inst->queue.lifo.tail;
+
+        if (item == NULL)
+            return;
+
+        int64_t diff = tdiff(&now, &item->added);
+        if (diff < inst->ttl) {
+            return;
+        } else {
+            item_t *prev = item->handle.lifo.prev;
+
+            if (prev != NULL)
+                prev->handle.lifo.next = NULL;
+
+            inst->queue.lifo.tail = prev;
+
+            enif_release_binary(&item->data);
+            enif_free(item);
+        }
+    }
+}
+
+static void
+push_lifo(queue_t *inst, item_t *item) {
+    __lifo_push(&inst->queue.lifo, item, handle.lifo);
+}
+
+static item_t *
+pop_lifo(queue_t *inst) {
+    item_t *item = NULL;
+
+    if (inst->ttl > 0)
+        cleanup_lifo(inst);
+
+    __lifo_pop(&inst->queue.lifo, item, handle.lifo);
+
+    return item;
+}
+
+static void
+free_lifo(queue_t *inst) {
+    item_t *item;
+
+    for(;;) {
+        __lifo_pop(&inst->queue.lifo, item, handle.lifo);
+
+        if (item == NULL)
+            return;
+
+        enif_release_binary(&item->data);
+        enif_free(item);
+    }
+}
+
+static uint64_t
+size_lifo(queue_t *inst) {
+    return lifo_length(&inst->queue.lifo);
+}
+
+/******************************************************************************
+** NIFs
+*******************************************************************************/
+
+static ERL_NIF_TERM
+new_queue(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    if (!enif_is_list(env, argv[0]))
+        return enif_make_badarg(env);
+
+    priv_t *priv = (priv_t *) enif_priv_data(env);
+
+    queue_type_t qtype = QTYPE_FIFO;
+    unsigned long ttl = 0;
+    unsigned long max_size = 0;
+
+    ERL_NIF_TERM settings_list = argv[0];
+    ERL_NIF_TERM head;
+
+    // parses proplist [fifo, lifo, {ttl, non_neg_integer()}, {max_size, non_neg_integer()}]
+    while(enif_get_list_cell(env, settings_list, &head, &settings_list))
+    {
+        const ERL_NIF_TERM *items;
+        int arity;
+
+        if (enif_is_atom(env, head)) {
+            if (enif_is_identical(head, priv->atoms.fifo)) {
+                qtype = QTYPE_FIFO;
+            } else if (enif_is_identical(head, priv->atoms.lifo)) {
+                qtype = QTYPE_LIFO;
+            } else {
+                return enif_make_badarg(env);
+            }
+        } else if (enif_get_tuple(env, head, &arity, &items) && arity == 2) {
+            if (enif_is_identical(items[0], priv->atoms.ttl)) {
+                if (!enif_get_ulong(env, items[1], &ttl)) {
+                    return enif_make_badarg(env);
+                }
+            } else if (enif_is_identical(items[0], priv->atoms.max_size)) {
+                if (!enif_get_ulong(env, items[1], &max_size)) {
+                    return enif_make_badarg(env);
+                }
+            } else {
+                return enif_make_badarg(env);
+            }
+        } else {
+            return enif_make_badarg(env);
+        }
+    }
+
+    queue_t *inst = (queue_t *) enif_alloc_resource(priv->queue, sizeof(*inst));
+
+    if (inst == NULL)
+        return make_error(env, "enif_alloc_resource");
+
+    inst->ttl = ttl;
+    inst->max_size = max_size;
+
+    switch (qtype) {
+        case QTYPE_FIFO:
+            fifo_init(&inst->queue.fifo);
+            inst->push = &push_fifo;
+            inst->pop = &pop_fifo;
+            inst->free = &free_fifo;
+            inst->size = &size_fifo;
+            inst->cleanup = &cleanup_fifo;
+            break;
+        case QTYPE_LIFO:
+            lifo_init(&inst->queue.lifo);
+            inst->push = &push_lifo;
+            inst->pop = &pop_lifo;
+            inst->free = &free_lifo;
+            inst->size = &size_lifo;
+            inst->cleanup = &cleanup_lifo;
+            break;
+    }
+
+    ERL_NIF_TERM result = enif_make_resource(env, inst);
+    enif_release_resource(inst);
+
+    return enif_make_tuple2(env, priv->atoms.ok, result);
+}
+
+static ERL_NIF_TERM
+push_item(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    priv_t *priv = (priv_t *) enif_priv_data(env);
+
+    queue_t *inst;
+
+    if (!enif_get_resource(env, argv[0], priv->queue, (void**) &inst))
+        return enif_make_badarg(env);
+
+    // todo: check an owner of the queue
+
+    ErlNifBinary bin;
+    if (!enif_inspect_binary(env, argv[1], &bin))
+        return enif_make_badarg(env);
+
+    if (inst->ttl > 0) {
+        inst->cleanup(inst);
+    }
+
+    if (inst->max_size > 0 && inst->size(inst) >= inst->max_size) {
+        return enif_make_tuple2(env, priv->atoms.error, priv->atoms.max_size);
+    }
+
+    item_t *item = (item_t *) enif_alloc(sizeof(*item));
+
+    if (item == NULL)
+        return make_error(env, "enif_alloc");
+
+    if (!enif_alloc_binary(bin.size, &item->data)) {
+        enif_free(item);
+        return make_error(env, "enif_alloc_binary");
+    }
+
+    memcpy(item->data.data, bin.data, bin.size);
+
+    if (inst->ttl > 0) {
+        gettime(&item->added);
+    }
+
+    inst->push(inst, item);
+    return priv->atoms.ok;
+}
+
+static ERL_NIF_TERM
+pop_item(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    priv_t *priv = (priv_t *) enif_priv_data(env);
+
+    queue_t *inst;
+    item_t *item;
+
+    if (!enif_get_resource(env, argv[0], priv->queue, (void**) &inst))
+        return enif_make_badarg(env);
+
+    // todo: check an owner of the queue
+
+    item = inst->pop(inst);
+
+    if (item == NULL)
+        return enif_make_list(env, 0);
+
+    ERL_NIF_TERM result = enif_make_binary(env, &item->data);
+
+    enif_free(item);
+
+    return enif_make_list1(env, result);
+}
+
+static ERL_NIF_TERM
+queue_size(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    priv_t *priv = (priv_t *) enif_priv_data(env);
+
+    queue_t *inst;
+
+    if (!enif_get_resource(env, argv[0], priv->queue, (void**) &inst))
+        return enif_make_badarg(env);
+
+    return enif_make_uint64(env, inst->size(inst));
+}
+
+/******************************************************************************
+** NIF initialization
+*******************************************************************************/
+
+static void
+enq_queue_free(ErlNifEnv* env, void* obj) {
+    queue_t *inst = obj;
+    inst->free(inst);
+}
+
+static priv_t *
+make_priv(ErlNifEnv *env) {
+    priv_t *priv = enif_alloc(sizeof(*priv));
+
+    if (priv == NULL)
+        return NULL;
+
+    ErlNifResourceFlags flags = ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER;
+    priv->queue = enif_open_resource_type(env, NULL, "enq_queue", enq_queue_free, flags, NULL);
+
+    priv->atoms.ok = enif_make_atom(env, "ok");
+    priv->atoms.error = enif_make_atom(env, "error");
+    priv->atoms.fifo = enif_make_atom(env, "fifo");
+    priv->atoms.lifo = enif_make_atom(env, "lifo");
+    priv->atoms.ttl = enif_make_atom(env, "ttl");
+    priv->atoms.max_size = enif_make_atom(env, "max_size");
+
+    return priv;
+}
+
+static int
+enq_nif_load(ErlNifEnv *env, void **priv_data, ERL_NIF_TERM load_info) {
+    *priv_data = make_priv(env);
+
+    return 0;
+}
+
+static int
+enq_nif_upgrade(ErlNifEnv *env, void **priv_data, void **old_priv_data, ERL_NIF_TERM load_info) {
+    *priv_data = make_priv(env);
+
+    return 0;
+}
+
+static ErlNifFunc enq_nif_funcs[] = {
+    {"new", 1, new_queue},
+    {"push", 2, push_item},
+    {"pop", 1, pop_item},
+    {"size", 1, queue_size},
+};
+
+ERL_NIF_INIT(enq_nif, enq_nif_funcs, enq_nif_load, NULL, enq_nif_upgrade, NULL)
\ No newline at end of file
diff --git a/c_src/.enq/fifo.h b/c_src/.enq/fifo.h
new file mode 100644
index 0000000..ec45eed
--- /dev/null
+++ b/c_src/.enq/fifo.h
@@ -0,0 +1,71 @@
+#ifndef _FIFO_H
+#define _FIFO_H
+
+/* Main FIFO structure. Allocate memory for it yourself. */
+typedef struct fifo_t {
+  void *head;
+  void *tail;
+  unsigned long long count;
+} fifo_t;
+
+typedef struct fifo_handle_t {
+  void *next;
+} fifo_handle_t;
+
+/* Initializes fifo structure. */
+#define fifo_init(fifo) \
+do {                    \
+  fifo_t *__q = fifo;   \
+  __q->head = NULL;     \
+  __q->tail = NULL;     \
+  __q->count = 0;       \
+} while (0)
+
+#define __fifo_push(fifo, p, h)   \
+do {                              \
+  fifo_t *__q = fifo;             \
+  __typeof__ (p) e = p;           \
+  e->h.next = NULL;               \
+  if (__q->tail == NULL) {        \
+    __q->head = e;                \
+  } else {                        \
+    __typeof__ (e) t = __q->tail; \
+    t->h.next = e;                \
+  }                               \
+  __q->tail = e;                  \
+  __q->count++;                   \
+} while (0)
+
+/* Puts an element to the queue. */
+#define fifo_push(fifo, p) __fifo_push (fifo, p, fifo_handle)
+
+#define __fifo_pop(fifo, p, h) \
+do {                           \
+  fifo_t *__q = fifo;          \
+  p = __q->head;               \
+  if (p != NULL) {             \
+    __q->count--;              \
+    __q->head = p->h.next;     \
+    if (__q->tail == p)        \
+      __q->tail = NULL;        \
+  }                            \
+} while (0)
+
+/* Pops the first element out of the queue. */
+#define fifo_pop(fifo, p) __fifo_pop (fifo, p, fifo_handle)
+
+#define __fifo_peak(fifo, p, h) \
+do {                            \
+  p = (fifo)->head;             \
+} while (0)
+
+/* Returns the first elemnt of the queue without removing. */
+#define fifo_peak(fifo, p) __fifo_peak (fifo, p, fifo_handle)
+
+/* Returns the length of the queue. */
+#define fifo_length(fifo) ((fifo)->count)
+
+/* Returns true if the queue is empty. */
+#define fifo_empty(fifo) ((fifo)->count == 0)
+
+#endif /* _FIFO_H */
diff --git a/c_src/.enq/lifo.h b/c_src/.enq/lifo.h
new file mode 100644
index 0000000..8e57c06
--- /dev/null
+++ b/c_src/.enq/lifo.h
@@ -0,0 +1,63 @@
+#ifndef _LIFO_H
+#define _LIFO_H
+
+typedef struct lifo_t {
+  void *head;
+  void *tail;
+  unsigned long long count;
+} lifo_t;
+
+typedef struct lifo_handle_t {
+  void *next;
+  void *prev;
+} lifo_handle_t;
+
+#define lifo_init(lifo) \
+do {                    \
+  lifo_t *__q = lifo;   \
+  __q->head = NULL;     \
+  __q->tail = NULL;     \
+  __q->count = 0;       \
+} while (0)
+
+#define __lifo_push(lifo, p, h)   \
+do {                              \
+  lifo_t *__q = lifo;             \
+  __typeof__ (p) e = p;           \
+  e->h.next = __q->head;          \
+  e->h.prev = NULL;               \
+  if (__q->head == NULL) {        \
+    __q->tail = e;                \
+  } else {                        \
+    __typeof__ (e) t = __q->head; \
+    t->h.prev = e;                \
+  }                               \
+  __q->head = e;                  \
+  __q->count++;                   \
+} while (0)
+
+#define lifo_push(lifo, p) __lifo_push (lifo, p, lifo_handle)
+
+#define __lifo_pop(lifo, p, h)      \
+do {                                \
+  lifo_t *__q = lifo;               \
+  p = __q->head;                    \
+  if (p != NULL) {                  \
+    __q->count--;                   \
+    __q->head = p->h.next;          \
+    if (__q->head != NULL) {        \
+      __typeof__ (p) t = __q->head; \
+      t->h.prev = NULL;             \
+    } else {                        \
+      __q->tail = NULL;             \
+    }                               \
+  }                                 \
+} while (0)
+
+#define lifo_pop(lifo, p) __lifo_pop (lifo, p, lifo_handle)
+
+#define lifo_length(lifo) ((lifo)->count)
+
+#define lifo_empty(lifo) ((lifo)->count == 0)
+
+#endif /* _LIFO_H */
diff --git a/c_src/.enq/rebar.config b/c_src/.enq/rebar.config
new file mode 100644
index 0000000..95b16e5
--- /dev/null
+++ b/c_src/.enq/rebar.config
@@ -0,0 +1,12 @@
+{port_specs, [
+    {"../../priv/enq_nif.so", ["*.c"]}
+]}.
+
+% {port_env, [
+%     {"LDFLAGS", "$ERL_LDFLAGS -lrt"},
+%     {"CFLAGS", "$CFLAGS --std=gnu99 -Wall -O3"}
+% ]}.
+
+
+
+
diff --git a/c_src/bitmap_filter/bitmap_filter.c b/c_src/bitmap_filter/bitmap_filter.c
new file mode 100644
index 0000000..8ea2fe7
--- /dev/null
+++ b/c_src/bitmap_filter/bitmap_filter.c
@@ -0,0 +1,80 @@
+#include <erl_nif.h>
+
+/*
+    This function expects a list of list of tuples of type {int, _}.
+    It filters the tuples, using the first int field as a key,
+    and removing duplicating keys with precedence given the the order
+    in which they were seen (first given precedence).
+*/
+static ERL_NIF_TERM
+bitmap_filter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    size_t seen_forklift_id[3000] = { 0 };
+
+    if(argc != 1)
+    {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_is_list(env, argv[0]))
+    {
+        return enif_make_badarg(env);
+    }
+
+    ERL_NIF_TERM ret = enif_make_list(env, 0);
+
+    ERL_NIF_TERM outer_list = argv[0];
+    ERL_NIF_TERM inner_list;
+
+    ERL_NIF_TERM inner_head;
+
+    const ERL_NIF_TERM* tuple_elems;
+    int num_elems;
+    unsigned int key;
+
+    while(enif_get_list_cell(env, outer_list, &inner_list, &outer_list))
+    {
+        if(!enif_is_list(env, inner_list))
+        {
+            return enif_make_badarg(env);
+        }
+
+        while(enif_get_list_cell(env, inner_list, &inner_head, &inner_list))
+        {
+            if(!enif_get_tuple(env, inner_head, &num_elems, &tuple_elems))
+            {
+                return enif_make_badarg(env);
+            }
+
+            if(num_elems != 2)
+            {
+                return enif_make_badarg(env);
+            }
+
+            if(!enif_get_uint(env, tuple_elems[0], &key))
+            {
+                return enif_make_badarg(env);
+            }
+
+            if(key >= 3000)
+            {
+                return enif_make_badarg(env);
+            }
+
+            if(!seen_forklift_id[key])
+            {
+                seen_forklift_id[key] = 1;
+                ret = enif_make_list_cell(env, inner_head, ret);
+            }
+        }
+    }
+
+    return ret;
+}
+
+static ErlNifFunc nif_funcs[] =
+{
+    {"filter", 1, bitmap_filter, 0}
+};
+
+ERL_NIF_INIT(bitmap_filter, nif_funcs, NULL, NULL, NULL, NULL)
diff --git a/c_src/cq/rebar.config b/c_src/bitmap_filter/rebar.config
similarity index 88%
rename from c_src/cq/rebar.config
rename to c_src/bitmap_filter/rebar.config
index c55438b..0dfa1a9 100644
--- a/c_src/cq/rebar.config
+++ b/c_src/bitmap_filter/rebar.config
@@ -1,9 +1,9 @@
 {port_specs, [
-    {"../../priv/cq.so", [
-        "*.c",
-        "*.cc"
+    {"../../priv/bitmap_filter.so", [
+        "*.c"
     ]}
 ]}.
+%{port_specs, [{"../../priv/granderl.so", []}]}.
 
 %% {port_env, [
 %%     {"(linux|solaris|freebsd|netbsd|openbsd|dragonfly|darwin|gnu)",
diff --git a/c_src/bsn/bsn_ext.c b/c_src/bsn/bsn_ext.c
new file mode 100644
index 0000000..2ea8d9a
--- /dev/null
+++ b/c_src/bsn/bsn_ext.c
@@ -0,0 +1,448 @@
+#include "erl_nif.h"
+
+ErlNifResourceType* bsn_type;
+ERL_NIF_TERM ATOM_TRUE, ATOM_FALSE;
+
+/*
+typedef struct {
+    unsigned size;
+    unsigned char* data;
+} ErlNifBinary;
+
+*/
+
+struct bsn_elem_struct {
+	ErlNifBinary bin;
+	struct bsn_elem_struct* next;
+};
+typedef struct bsn_elem_struct bsn_elem;
+
+typedef bsn_elem* bsn_list;
+
+typedef struct {
+	unsigned int count; /* count of elements */
+	unsigned int max; 	/* count of slots */
+	ErlNifMutex *mutex;
+	bsn_list* list;
+} bsn_res;
+
+
+inline static ERL_NIF_TERM bool_to_term(int value) {
+    return value ? ATOM_TRUE : ATOM_FALSE; 
+}
+
+/* Calculate the sum of chars. */
+unsigned int 
+private_hash(const ErlNifBinary* b, unsigned int max)
+{
+	unsigned char* ptr;
+	unsigned int i, sum = 0;
+
+	ptr = b->data;
+	i   = b->size;
+
+	for (; i; i--, ptr++)
+		sum += *ptr;
+
+	return sum % max;
+}
+
+inline void 
+private_clear_elem(bsn_elem* el)
+{
+	enif_release_binary(&(el->bin));
+	enif_free(el);
+}
+
+inline void 
+private_chain_clear_all(bsn_elem* ptr)
+{
+	bsn_elem* next;
+
+	while (ptr != NULL) {
+		
+		next = ptr->next;
+		private_clear_elem(ptr);	
+		ptr = next;
+	}
+}
+
+inline int 
+private_compare(ErlNifBinary* b1, ErlNifBinary* b2)
+{
+	unsigned char* p1; 
+	unsigned char* p2;
+	unsigned len;
+
+	if (b1->size != b2->size)
+		return 0;
+	
+	p1 = b1->data;
+	p2 = b2->data;
+	len = b1->size;
+	
+	while (len) {
+    	if ((*p1) != (*p2))
+    		return 0;
+
+		len--; p1++; p2++;
+	}
+	return 1;
+}
+
+/* Skip existing elements. If the element bin is not found, return last element. 
+ * If el.bin == bin, return el. */
+bsn_elem* 
+private_chain_shift(bsn_elem* ptr, ErlNifBinary* bin, int* num_ptr)
+{
+	(*num_ptr)++;
+	if ((ptr) == NULL)
+		return ptr;
+
+	while (1) {
+		if (private_compare(&(ptr->bin), bin)) {
+			/* found an equal binary. Invert num */
+			(*num_ptr) *= -1;
+			return ptr;
+		}
+		if ((ptr->next) == NULL)
+			return ptr;
+		ptr = ptr->next;
+		(*num_ptr)++;
+	}
+}
+
+/* Append the element `el' to the chain `chain' */
+void 
+private_chain_append(bsn_elem** chain, bsn_elem* el, int* num_ptr)
+{
+	bsn_elem* last;
+
+	if ((*chain) == NULL) {
+		/* The new element is last */
+		*chain = el;
+	} else {
+		last = private_chain_shift(*chain, &(el->bin), num_ptr);
+		if ((*num_ptr) < 0) {
+			/* Element was already added. */
+			private_clear_elem(el);
+		} else {
+			last->next = el;
+		}
+	}
+}
+
+bsn_elem*
+private_chain_shift_clear(bsn_elem** ptr, ErlNifBinary* bin, int* num_ptr)
+{
+	bsn_elem** prev = NULL;
+	bsn_elem* el;
+
+	while ((*ptr) != NULL) {
+		if (private_compare(&((*ptr)->bin), bin)) {
+			(*num_ptr) *= -1;
+
+			/* found an equal binary. Delete elem. Invert num */
+			if (prev == NULL) {
+				el = *ptr;
+				(*ptr) = (*ptr)->next;
+				return el;
+			}
+			*prev = (*ptr)->next;
+			return *ptr;
+		}
+		prev = ptr;
+		el = *ptr;
+		ptr = (bsn_elem**) &(el->next);
+		(*num_ptr)++;
+	}
+
+	return NULL;
+}
+
+static ERL_NIF_TERM 
+bsn_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	unsigned int max;
+	bsn_list* ptr;
+	bsn_res* r;
+
+	if (!(enif_get_uint(env, argv[0], &max) && (max>0)))
+		return enif_make_badarg(env);
+
+	ptr = enif_alloc(sizeof(bsn_list) * max);
+	if (ptr == NULL)
+		return enif_make_badarg(env);
+
+	r = (bsn_res*) enif_alloc_resource(bsn_type, sizeof(bsn_res));
+	r->mutex = enif_mutex_create("Mutex for the BSN writer");
+	r->count = 0;
+	r->max = max;
+	r->list = ptr;
+
+	for (; max; max--, ptr++)
+		*ptr = NULL;
+
+	return enif_make_resource(env, r);
+}
+
+static ERL_NIF_TERM 
+bsn_add(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	ErlNifBinary bin;
+	bsn_res* r;
+	unsigned int pos;
+	int num = 0;
+	bsn_elem* elem_ptr;
+	
+	if (!(enif_get_resource(env, argv[0], bsn_type, (void**) &r)
+		&& enif_inspect_binary(env, argv[1], &bin)))
+		return enif_make_badarg(env);
+
+	enif_realloc_binary(&bin, bin.size);
+	pos = private_hash(&bin, r->max);
+
+	elem_ptr = enif_alloc(sizeof(bsn_elem));
+	if (elem_ptr == NULL)
+		return enif_make_badarg(env);
+
+	elem_ptr->next = NULL;
+	elem_ptr->bin = bin;
+
+	enif_mutex_lock(r->mutex);
+	private_chain_append(&(r->list[pos]), elem_ptr, &num);
+	if (num >= 0) 
+		(r->count)++;
+	enif_mutex_unlock(r->mutex);
+	
+	/* Already added */
+	if (num < 0) 
+		enif_release_binary(&(bin));
+
+	return enif_make_int(env, num);
+}
+
+static ERL_NIF_TERM 
+bsn_search(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	ErlNifBinary bin;
+	bsn_res* r;
+	unsigned int pos;
+	int num = 0;
+	
+	if (!(enif_get_resource(env, argv[0], bsn_type, (void**) &r)
+		&& enif_inspect_binary(env, argv[1], &bin)))
+		return enif_make_badarg(env);
+
+	pos = private_hash(&bin, r->max);
+
+	enif_mutex_lock(r->mutex);
+	private_chain_shift(r->list[pos], &bin, &num);
+	enif_mutex_unlock(r->mutex);
+	
+	return enif_make_int(env, num);
+}
+
+static ERL_NIF_TERM 
+bsn_clear(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	ErlNifBinary bin;
+	bsn_res* r;
+	unsigned int pos;
+	int num = 0;
+	bsn_elem* elem_ptr;
+	
+	if (!(enif_get_resource(env, argv[0], bsn_type, (void**) &r)
+		&& enif_inspect_binary(env, argv[1], &bin)))
+		return enif_make_badarg(env);
+
+	pos = private_hash(&bin, r->max);
+
+	enif_mutex_lock(r->mutex);
+	elem_ptr = private_chain_shift_clear(&(r->list[pos]), &bin, &num);
+	if (elem_ptr != NULL) {
+		private_clear_elem(elem_ptr);
+		(r->count)--;
+	}
+	enif_mutex_unlock(r->mutex);
+	
+	return enif_make_int(env, num);
+}
+
+static ERL_NIF_TERM 
+bsn_all_chain(ErlNifEnv* env, bsn_elem* e, ERL_NIF_TERM tail)
+{
+	ERL_NIF_TERM head;
+	ErlNifBinary bin;
+	while (e != NULL) {
+		bin = e->bin;
+		enif_realloc_binary(&bin, bin.size);
+		head = enif_make_binary(env, &bin);
+		tail = enif_make_list_cell(env, head, tail);
+		e = e->next;
+	}
+	return tail;
+}
+  
+static ERL_NIF_TERM 
+bsn_chains(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	bsn_res* r;
+	unsigned int max;
+	bsn_list* ptr;
+	ERL_NIF_TERM tail, head;
+	
+	if (!enif_get_resource(env, argv[0], bsn_type, (void**) &r))
+		return enif_make_badarg(env);
+	tail = enif_make_list(env, 0);
+
+	ptr = r->list;
+
+	enif_mutex_lock(r->mutex);
+	max = r->max;
+
+	while (max) {
+		head = enif_make_list(env, 0);
+		head = bsn_all_chain(env, *ptr, head);
+		tail = enif_make_list_cell(env, head, tail);
+		
+		ptr++;
+		max--;
+	}
+	enif_mutex_unlock(r->mutex);
+	
+	return tail;
+}
+  
+static ERL_NIF_TERM 
+bsn_all(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	bsn_res* r;
+	unsigned int max;
+	bsn_list* ptr;
+	ERL_NIF_TERM list;
+	
+	if (!enif_get_resource(env, argv[0], bsn_type, (void**) &r))
+		return enif_make_badarg(env);
+	list = enif_make_list(env, 0);
+
+	ptr = r->list;
+
+	enif_mutex_lock(r->mutex);
+	max = r->max;
+
+	while (max) {
+		list = bsn_all_chain(env, *ptr, list);
+		ptr++;
+		max--;
+	}
+	enif_mutex_unlock(r->mutex);
+	
+	return list;
+}
+  
+
+static ERL_NIF_TERM 
+bsn_count(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	bsn_res* r;
+	
+	if (!enif_get_resource(env, argv[0], bsn_type, (void**) &r))
+		return enif_make_badarg(env);
+
+	return enif_make_int(env, r->count);
+}
+
+
+static ERL_NIF_TERM 
+bsn_hash(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	ErlNifBinary bin;
+	unsigned int max;
+
+	if (!(enif_inspect_binary(env, argv[0], &bin)
+		&& enif_get_uint(env, argv[1], &max) && (max>0)))
+		return enif_make_badarg(env);
+
+	return enif_make_uint(env,
+		private_hash(&bin, max));
+}
+
+
+static ERL_NIF_TERM 
+bsn_compare(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	ErlNifBinary b1, b2;
+
+	if (!(enif_inspect_binary(env, argv[0], &b1)
+		&& enif_inspect_binary(env, argv[1], &b2)))
+		return enif_make_badarg(env);
+
+	return bool_to_term(private_compare(&b1, &b2));
+}
+
+void private_clear_all(bsn_res* r)
+{
+	unsigned int max;
+	bsn_list* ptr;
+	max = r->max;
+	ptr = r->list;
+
+	while (max) {
+		private_chain_clear_all(*ptr);
+		ptr++;
+		max--;
+	}
+}
+
+void 
+bsn_type_dtor(ErlNifEnv* env, void* obj) 
+{
+	bsn_res* r = (bsn_res*) obj;
+	private_clear_all(r);
+	enif_mutex_destroy(r->mutex);
+	enif_free(r->list);
+}
+
+
+
+int
+on_load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
+{
+    ATOM_TRUE     = enif_make_atom(env, "true");
+    ATOM_FALSE    = enif_make_atom(env, "false");
+	
+    ErlNifResourceFlags flags = (ErlNifResourceFlags)(ERL_NIF_RT_CREATE |
+        ERL_NIF_RT_TAKEOVER);
+
+    bsn_type = enif_open_resource_type(env, NULL, "bsn_type",
+        bsn_type_dtor, flags, NULL); 
+
+    if (bsn_type == NULL) return 1;
+
+    return 0;
+}
+
+
+int
+on_upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM info)
+{
+    return 0;
+}
+
+
+static ErlNifFunc nif_functions[] = {
+    {"new",     1, bsn_new},
+    {"add",     2, bsn_add},
+    {"all",     1, bsn_all},
+    {"chains",  1, bsn_chains},
+    {"in",      2, bsn_search},
+    {"clear",   2, bsn_clear},
+    {"count",   1, bsn_count},
+
+    {"hash",    2, bsn_hash},
+    {"compare", 2, bsn_compare},
+};
+
+
+ERL_NIF_INIT(bsn_ext, nif_functions, &on_load, &on_load, &on_upgrade, NULL);
diff --git a/c_src/bsn/bsn_int.c b/c_src/bsn/bsn_int.c
new file mode 100644
index 0000000..30e2944
--- /dev/null
+++ b/c_src/bsn/bsn_int.c
@@ -0,0 +1,331 @@
+#include "erl_nif.h"
+
+
+ErlNifResourceType* bsn_type;
+ERL_NIF_TERM ATOM_TRUE, ATOM_FALSE, ATOM_NO_MORE;
+
+struct bsn_elem_struct {
+	ErlNifBinary bin;
+	unsigned int hash;
+};
+typedef struct bsn_elem_struct bsn_elem;
+
+
+typedef struct {
+	unsigned int count; /* count of elements */
+	unsigned int max; 	/* count of slots */
+	ErlNifMutex *mutex;
+	bsn_elem* list;
+	unsigned int (*next_pos)
+		(void*, unsigned int, unsigned int);
+} bsn_res;
+
+
+inline static ERL_NIF_TERM bool_to_term(int value) {
+    return value ? ATOM_TRUE : ATOM_FALSE; 
+}
+
+unsigned int next_pos_linear(bsn_res* r, unsigned int hash, unsigned int step) {
+	return (hash + step) % (r->max);
+}
+
+unsigned int next_pos_quadric(bsn_res* r, unsigned int hash, unsigned int step) {
+	return (hash + (step*step)) % (r->max);
+}
+
+/* Calculate the sum of chars. */
+unsigned int 
+private_hash(const ErlNifBinary* b, unsigned int max)
+{
+	unsigned char* ptr;
+	unsigned int i, sum = 0;
+
+	ptr = b->data;
+	i   = b->size;
+
+	for (; i; i--, ptr++)
+		sum += *ptr;
+
+	return sum % max;
+}
+
+
+
+inline int 
+private_compare(ErlNifBinary* b1, ErlNifBinary* b2)
+{
+	unsigned char* p1; 
+	unsigned char* p2;
+	unsigned len;
+
+	if (b1->size != b2->size)
+		return 0;
+	
+	p1 = b1->data;
+	p2 = b2->data;
+	len = b1->size;
+	
+	while (len) {
+    	if ((*p1) != (*p2))
+    		return 0;
+
+		len--; p1++; p2++;
+	}
+	return 1;
+}
+
+
+static ERL_NIF_TERM 
+bsn_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	int max; /* This value will be set by a client: 
+				if (max<0) -> use quadric algorithm */
+	bsn_elem* ptr;
+	bsn_res* r;
+
+	if (!enif_get_int(env, argv[0], &max) || (max == 0))
+		return enif_make_badarg(env);
+
+
+	r = (bsn_res*) enif_alloc_resource(bsn_type, sizeof(bsn_res));
+	r->mutex = enif_mutex_create("Mutex for the BSN writer");
+	r->count = 0;
+
+	/* Select an algorithm */
+	if (max>0) {
+		r->next_pos = &next_pos_linear;
+	} else if (max<0) {
+		r->next_pos = &next_pos_quadric;
+		max *= -1; 
+	} 
+	/* Now max is cells' count in the array. */
+	r->max = (unsigned int) max;
+
+	ptr = enif_alloc(sizeof(bsn_elem) * max);
+	if (ptr == NULL)
+		return enif_make_badarg(env);
+	r->list = ptr;
+
+	for (; max; max--, ptr++)
+		ptr->hash = r->max;
+
+
+	return enif_make_resource(env, r);
+}
+
+static ERL_NIF_TERM 
+bsn_add(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	ErlNifBinary bin;
+	bsn_res* r;
+	unsigned int pos, hash, max;
+	int num = 0;
+	bsn_elem* elem_ptr;
+	
+	if (!(enif_get_resource(env, argv[0], bsn_type, (void**) &r)
+		&& enif_inspect_binary(env, argv[1], &bin)))
+		return enif_make_badarg(env);
+
+	enif_realloc_binary(&bin, bin.size);
+	hash = pos = private_hash(&bin, r->max);
+
+
+	enif_mutex_lock(r->mutex);
+	max = r->max;
+
+	while (num < max) {
+		elem_ptr = &(r->list[pos]);
+		/* Found free space */
+		if (elem_ptr->hash == max) {
+			elem_ptr->bin = bin;
+			elem_ptr->hash = hash;
+			break;
+		}
+
+
+		/* Found elem */
+		if ((elem_ptr->hash == hash)
+			&& private_compare(&bin, &(elem_ptr->bin))) {
+			num *= -1;
+			break;
+		}
+
+		pos = (r->next_pos)(r, hash, num);
+		num++;
+	}
+	if ((num >= 0) && (num < max))
+		(r->count)++;
+
+	enif_mutex_unlock(r->mutex);
+
+	/* Error: already added or owerflow */
+	if (!((num >= 0) && (num < max)))
+		enif_release_binary(&bin);
+
+	if (num >= max)
+		return ATOM_NO_MORE;
+	
+	return enif_make_int(env, num);
+}
+
+static ERL_NIF_TERM 
+bsn_search(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	ErlNifBinary bin;
+	bsn_res* r;
+	unsigned int pos, max, hash;
+	int num = 1;
+	bsn_elem* elem_ptr;
+	
+	if (!(enif_get_resource(env, argv[0], bsn_type, (void**) &r)
+		&& enif_inspect_binary(env, argv[1], &bin)))
+		return enif_make_badarg(env);
+
+	hash = pos = private_hash(&bin, r->max);
+
+	enif_mutex_lock(r->mutex);
+	max = r->max;
+
+	while (num < max) {
+		elem_ptr = &(r->list[pos]);
+		/* Found free space */
+		if (elem_ptr->hash == max) {
+			break;
+		}
+
+
+		/* Found elem */
+		if ((elem_ptr->hash == hash)
+			&& private_compare(&bin, &(elem_ptr->bin))) {
+			num *= -1;
+			break;
+		}
+
+		pos = (r->next_pos)(r, hash, num);
+		num++;
+	}
+	enif_mutex_unlock(r->mutex);
+	
+	return enif_make_int(env, num);
+}
+
+static ERL_NIF_TERM 
+bsn_clear(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	return enif_make_badarg(env);
+}
+
+  
+static ERL_NIF_TERM 
+bsn_all(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	bsn_res* r;
+	unsigned int max, pos = 0;
+	ERL_NIF_TERM head, tail;
+	ErlNifBinary bin;
+	bsn_elem* elem_ptr;
+	
+	if (!enif_get_resource(env, argv[0], bsn_type, (void**) &r))
+		return enif_make_badarg(env);
+	tail = enif_make_list(env, 0);
+
+	enif_mutex_lock(r->mutex);
+	max = r->max;
+	elem_ptr = r->list;
+
+	do {
+
+		if (elem_ptr->hash != max) {
+			bin = elem_ptr->bin;
+			enif_realloc_binary(&bin, bin.size);
+			head = enif_make_binary(env, &bin);
+			tail = enif_make_list_cell(env, head, tail);
+		}
+
+		elem_ptr++;
+		pos++;
+	} while (pos < max);
+
+	enif_mutex_unlock(r->mutex);
+	
+	return tail;
+}
+  
+
+static ERL_NIF_TERM 
+bsn_count(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	bsn_res* r;
+	
+	if (!enif_get_resource(env, argv[0], bsn_type, (void**) &r))
+		return enif_make_badarg(env);
+
+	return enif_make_int(env, r->count);
+}
+
+
+void private_clear_all(bsn_res* r)
+{
+	unsigned int max, num;
+	bsn_elem* ptr;
+	num = max = r->max;
+	ptr = r->list;
+
+	while (num) {
+		if (ptr->hash != max) {
+			enif_release_binary(&(ptr->bin));
+		}
+		ptr++;
+		num--;
+	}
+}
+
+void 
+bsn_type_dtor(ErlNifEnv* env, void* obj) 
+{
+	bsn_res* r = (bsn_res*) obj;
+	private_clear_all(r);
+	enif_mutex_destroy(r->mutex);
+	enif_free(r->list);
+}
+
+
+
+int
+on_load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
+{
+    ATOM_TRUE     = enif_make_atom(env, "true");
+    ATOM_FALSE    = enif_make_atom(env, "false");
+    ATOM_NO_MORE  = enif_make_atom(env, "no_more");
+
+	
+    ErlNifResourceFlags flags = (ErlNifResourceFlags)(ERL_NIF_RT_CREATE |
+        ERL_NIF_RT_TAKEOVER);
+
+    bsn_type = enif_open_resource_type(env, NULL, "bsn_type",
+        bsn_type_dtor, flags, NULL); 
+
+    if (bsn_type == NULL) return 1;
+
+    return 0;
+}
+
+
+int
+on_upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM info)
+{
+    return 0;
+}
+
+
+static ErlNifFunc nif_functions[] = {
+    {"new",     1, bsn_new},
+    {"add",     2, bsn_add},
+    {"all",     1, bsn_all},
+    {"in",      2, bsn_search},
+    {"clear",   2, bsn_clear},
+    {"count",   1, bsn_count},
+};
+
+
+ERL_NIF_INIT(bsn_int, nif_functions, &on_load, &on_load, &on_upgrade, NULL);
diff --git a/c_src/bsn/c_src/bsn_ext.c b/c_src/bsn/c_src/bsn_ext.c
new file mode 100644
index 0000000..2ea8d9a
--- /dev/null
+++ b/c_src/bsn/c_src/bsn_ext.c
@@ -0,0 +1,448 @@
+#include "erl_nif.h"
+
+ErlNifResourceType* bsn_type;
+ERL_NIF_TERM ATOM_TRUE, ATOM_FALSE;
+
+/*
+typedef struct {
+    unsigned size;
+    unsigned char* data;
+} ErlNifBinary;
+
+*/
+
+struct bsn_elem_struct {
+	ErlNifBinary bin;
+	struct bsn_elem_struct* next;
+};
+typedef struct bsn_elem_struct bsn_elem;
+
+typedef bsn_elem* bsn_list;
+
+typedef struct {
+	unsigned int count; /* count of elements */
+	unsigned int max; 	/* count of slots */
+	ErlNifMutex *mutex;
+	bsn_list* list;
+} bsn_res;
+
+
+inline static ERL_NIF_TERM bool_to_term(int value) {
+    return value ? ATOM_TRUE : ATOM_FALSE; 
+}
+
+/* Calculate the sum of chars. */
+unsigned int 
+private_hash(const ErlNifBinary* b, unsigned int max)
+{
+	unsigned char* ptr;
+	unsigned int i, sum = 0;
+
+	ptr = b->data;
+	i   = b->size;
+
+	for (; i; i--, ptr++)
+		sum += *ptr;
+
+	return sum % max;
+}
+
+inline void 
+private_clear_elem(bsn_elem* el)
+{
+	enif_release_binary(&(el->bin));
+	enif_free(el);
+}
+
+inline void 
+private_chain_clear_all(bsn_elem* ptr)
+{
+	bsn_elem* next;
+
+	while (ptr != NULL) {
+		
+		next = ptr->next;
+		private_clear_elem(ptr);	
+		ptr = next;
+	}
+}
+
+inline int 
+private_compare(ErlNifBinary* b1, ErlNifBinary* b2)
+{
+	unsigned char* p1; 
+	unsigned char* p2;
+	unsigned len;
+
+	if (b1->size != b2->size)
+		return 0;
+	
+	p1 = b1->data;
+	p2 = b2->data;
+	len = b1->size;
+	
+	while (len) {
+    	if ((*p1) != (*p2))
+    		return 0;
+
+		len--; p1++; p2++;
+	}
+	return 1;
+}
+
+/* Skip existing elements. If the element bin is not found, return last element. 
+ * If el.bin == bin, return el. */
+bsn_elem* 
+private_chain_shift(bsn_elem* ptr, ErlNifBinary* bin, int* num_ptr)
+{
+	(*num_ptr)++;
+	if ((ptr) == NULL)
+		return ptr;
+
+	while (1) {
+		if (private_compare(&(ptr->bin), bin)) {
+			/* found an equal binary. Invert num */
+			(*num_ptr) *= -1;
+			return ptr;
+		}
+		if ((ptr->next) == NULL)
+			return ptr;
+		ptr = ptr->next;
+		(*num_ptr)++;
+	}
+}
+
+/* Append the element `el' to the chain `chain' */
+void 
+private_chain_append(bsn_elem** chain, bsn_elem* el, int* num_ptr)
+{
+	bsn_elem* last;
+
+	if ((*chain) == NULL) {
+		/* The new element is last */
+		*chain = el;
+	} else {
+		last = private_chain_shift(*chain, &(el->bin), num_ptr);
+		if ((*num_ptr) < 0) {
+			/* Element was already added. */
+			private_clear_elem(el);
+		} else {
+			last->next = el;
+		}
+	}
+}
+
+bsn_elem*
+private_chain_shift_clear(bsn_elem** ptr, ErlNifBinary* bin, int* num_ptr)
+{
+	bsn_elem** prev = NULL;
+	bsn_elem* el;
+
+	while ((*ptr) != NULL) {
+		if (private_compare(&((*ptr)->bin), bin)) {
+			(*num_ptr) *= -1;
+
+			/* found an equal binary. Delete elem. Invert num */
+			if (prev == NULL) {
+				el = *ptr;
+				(*ptr) = (*ptr)->next;
+				return el;
+			}
+			*prev = (*ptr)->next;
+			return *ptr;
+		}
+		prev = ptr;
+		el = *ptr;
+		ptr = (bsn_elem**) &(el->next);
+		(*num_ptr)++;
+	}
+
+	return NULL;
+}
+
+static ERL_NIF_TERM 
+bsn_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	unsigned int max;
+	bsn_list* ptr;
+	bsn_res* r;
+
+	if (!(enif_get_uint(env, argv[0], &max) && (max>0)))
+		return enif_make_badarg(env);
+
+	ptr = enif_alloc(sizeof(bsn_list) * max);
+	if (ptr == NULL)
+		return enif_make_badarg(env);
+
+	r = (bsn_res*) enif_alloc_resource(bsn_type, sizeof(bsn_res));
+	r->mutex = enif_mutex_create("Mutex for the BSN writer");
+	r->count = 0;
+	r->max = max;
+	r->list = ptr;
+
+	for (; max; max--, ptr++)
+		*ptr = NULL;
+
+	return enif_make_resource(env, r);
+}
+
+static ERL_NIF_TERM 
+bsn_add(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	ErlNifBinary bin;
+	bsn_res* r;
+	unsigned int pos;
+	int num = 0;
+	bsn_elem* elem_ptr;
+	
+	if (!(enif_get_resource(env, argv[0], bsn_type, (void**) &r)
+		&& enif_inspect_binary(env, argv[1], &bin)))
+		return enif_make_badarg(env);
+
+	enif_realloc_binary(&bin, bin.size);
+	pos = private_hash(&bin, r->max);
+
+	elem_ptr = enif_alloc(sizeof(bsn_elem));
+	if (elem_ptr == NULL)
+		return enif_make_badarg(env);
+
+	elem_ptr->next = NULL;
+	elem_ptr->bin = bin;
+
+	enif_mutex_lock(r->mutex);
+	private_chain_append(&(r->list[pos]), elem_ptr, &num);
+	if (num >= 0) 
+		(r->count)++;
+	enif_mutex_unlock(r->mutex);
+	
+	/* Already added */
+	if (num < 0) 
+		enif_release_binary(&(bin));
+
+	return enif_make_int(env, num);
+}
+
+static ERL_NIF_TERM 
+bsn_search(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	ErlNifBinary bin;
+	bsn_res* r;
+	unsigned int pos;
+	int num = 0;
+	
+	if (!(enif_get_resource(env, argv[0], bsn_type, (void**) &r)
+		&& enif_inspect_binary(env, argv[1], &bin)))
+		return enif_make_badarg(env);
+
+	pos = private_hash(&bin, r->max);
+
+	enif_mutex_lock(r->mutex);
+	private_chain_shift(r->list[pos], &bin, &num);
+	enif_mutex_unlock(r->mutex);
+	
+	return enif_make_int(env, num);
+}
+
+static ERL_NIF_TERM 
+bsn_clear(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	ErlNifBinary bin;
+	bsn_res* r;
+	unsigned int pos;
+	int num = 0;
+	bsn_elem* elem_ptr;
+	
+	if (!(enif_get_resource(env, argv[0], bsn_type, (void**) &r)
+		&& enif_inspect_binary(env, argv[1], &bin)))
+		return enif_make_badarg(env);
+
+	pos = private_hash(&bin, r->max);
+
+	enif_mutex_lock(r->mutex);
+	elem_ptr = private_chain_shift_clear(&(r->list[pos]), &bin, &num);
+	if (elem_ptr != NULL) {
+		private_clear_elem(elem_ptr);
+		(r->count)--;
+	}
+	enif_mutex_unlock(r->mutex);
+	
+	return enif_make_int(env, num);
+}
+
+static ERL_NIF_TERM 
+bsn_all_chain(ErlNifEnv* env, bsn_elem* e, ERL_NIF_TERM tail)
+{
+	ERL_NIF_TERM head;
+	ErlNifBinary bin;
+	while (e != NULL) {
+		bin = e->bin;
+		enif_realloc_binary(&bin, bin.size);
+		head = enif_make_binary(env, &bin);
+		tail = enif_make_list_cell(env, head, tail);
+		e = e->next;
+	}
+	return tail;
+}
+  
+static ERL_NIF_TERM 
+bsn_chains(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	bsn_res* r;
+	unsigned int max;
+	bsn_list* ptr;
+	ERL_NIF_TERM tail, head;
+	
+	if (!enif_get_resource(env, argv[0], bsn_type, (void**) &r))
+		return enif_make_badarg(env);
+	tail = enif_make_list(env, 0);
+
+	ptr = r->list;
+
+	enif_mutex_lock(r->mutex);
+	max = r->max;
+
+	while (max) {
+		head = enif_make_list(env, 0);
+		head = bsn_all_chain(env, *ptr, head);
+		tail = enif_make_list_cell(env, head, tail);
+		
+		ptr++;
+		max--;
+	}
+	enif_mutex_unlock(r->mutex);
+	
+	return tail;
+}
+  
+static ERL_NIF_TERM 
+bsn_all(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	bsn_res* r;
+	unsigned int max;
+	bsn_list* ptr;
+	ERL_NIF_TERM list;
+	
+	if (!enif_get_resource(env, argv[0], bsn_type, (void**) &r))
+		return enif_make_badarg(env);
+	list = enif_make_list(env, 0);
+
+	ptr = r->list;
+
+	enif_mutex_lock(r->mutex);
+	max = r->max;
+
+	while (max) {
+		list = bsn_all_chain(env, *ptr, list);
+		ptr++;
+		max--;
+	}
+	enif_mutex_unlock(r->mutex);
+	
+	return list;
+}
+  
+
+static ERL_NIF_TERM 
+bsn_count(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	bsn_res* r;
+	
+	if (!enif_get_resource(env, argv[0], bsn_type, (void**) &r))
+		return enif_make_badarg(env);
+
+	return enif_make_int(env, r->count);
+}
+
+
+static ERL_NIF_TERM 
+bsn_hash(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	ErlNifBinary bin;
+	unsigned int max;
+
+	if (!(enif_inspect_binary(env, argv[0], &bin)
+		&& enif_get_uint(env, argv[1], &max) && (max>0)))
+		return enif_make_badarg(env);
+
+	return enif_make_uint(env,
+		private_hash(&bin, max));
+}
+
+
+static ERL_NIF_TERM 
+bsn_compare(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	ErlNifBinary b1, b2;
+
+	if (!(enif_inspect_binary(env, argv[0], &b1)
+		&& enif_inspect_binary(env, argv[1], &b2)))
+		return enif_make_badarg(env);
+
+	return bool_to_term(private_compare(&b1, &b2));
+}
+
+void private_clear_all(bsn_res* r)
+{
+	unsigned int max;
+	bsn_list* ptr;
+	max = r->max;
+	ptr = r->list;
+
+	while (max) {
+		private_chain_clear_all(*ptr);
+		ptr++;
+		max--;
+	}
+}
+
+void 
+bsn_type_dtor(ErlNifEnv* env, void* obj) 
+{
+	bsn_res* r = (bsn_res*) obj;
+	private_clear_all(r);
+	enif_mutex_destroy(r->mutex);
+	enif_free(r->list);
+}
+
+
+
+int
+on_load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
+{
+    ATOM_TRUE     = enif_make_atom(env, "true");
+    ATOM_FALSE    = enif_make_atom(env, "false");
+	
+    ErlNifResourceFlags flags = (ErlNifResourceFlags)(ERL_NIF_RT_CREATE |
+        ERL_NIF_RT_TAKEOVER);
+
+    bsn_type = enif_open_resource_type(env, NULL, "bsn_type",
+        bsn_type_dtor, flags, NULL); 
+
+    if (bsn_type == NULL) return 1;
+
+    return 0;
+}
+
+
+int
+on_upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM info)
+{
+    return 0;
+}
+
+
+static ErlNifFunc nif_functions[] = {
+    {"new",     1, bsn_new},
+    {"add",     2, bsn_add},
+    {"all",     1, bsn_all},
+    {"chains",  1, bsn_chains},
+    {"in",      2, bsn_search},
+    {"clear",   2, bsn_clear},
+    {"count",   1, bsn_count},
+
+    {"hash",    2, bsn_hash},
+    {"compare", 2, bsn_compare},
+};
+
+
+ERL_NIF_INIT(bsn_ext, nif_functions, &on_load, &on_load, &on_upgrade, NULL);
diff --git a/c_src/bsn/c_src/bsn_int.c b/c_src/bsn/c_src/bsn_int.c
new file mode 100644
index 0000000..30e2944
--- /dev/null
+++ b/c_src/bsn/c_src/bsn_int.c
@@ -0,0 +1,331 @@
+#include "erl_nif.h"
+
+
+ErlNifResourceType* bsn_type;
+ERL_NIF_TERM ATOM_TRUE, ATOM_FALSE, ATOM_NO_MORE;
+
+struct bsn_elem_struct {
+	ErlNifBinary bin;
+	unsigned int hash;
+};
+typedef struct bsn_elem_struct bsn_elem;
+
+
+typedef struct {
+	unsigned int count; /* count of elements */
+	unsigned int max; 	/* count of slots */
+	ErlNifMutex *mutex;
+	bsn_elem* list;
+	unsigned int (*next_pos)
+		(void*, unsigned int, unsigned int);
+} bsn_res;
+
+
+inline static ERL_NIF_TERM bool_to_term(int value) {
+    return value ? ATOM_TRUE : ATOM_FALSE; 
+}
+
+unsigned int next_pos_linear(bsn_res* r, unsigned int hash, unsigned int step) {
+	return (hash + step) % (r->max);
+}
+
+unsigned int next_pos_quadric(bsn_res* r, unsigned int hash, unsigned int step) {
+	return (hash + (step*step)) % (r->max);
+}
+
+/* Calculate the sum of chars. */
+unsigned int 
+private_hash(const ErlNifBinary* b, unsigned int max)
+{
+	unsigned char* ptr;
+	unsigned int i, sum = 0;
+
+	ptr = b->data;
+	i   = b->size;
+
+	for (; i; i--, ptr++)
+		sum += *ptr;
+
+	return sum % max;
+}
+
+
+
+inline int 
+private_compare(ErlNifBinary* b1, ErlNifBinary* b2)
+{
+	unsigned char* p1; 
+	unsigned char* p2;
+	unsigned len;
+
+	if (b1->size != b2->size)
+		return 0;
+	
+	p1 = b1->data;
+	p2 = b2->data;
+	len = b1->size;
+	
+	while (len) {
+    	if ((*p1) != (*p2))
+    		return 0;
+
+		len--; p1++; p2++;
+	}
+	return 1;
+}
+
+
+static ERL_NIF_TERM 
+bsn_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	int max; /* This value will be set by a client: 
+				if (max<0) -> use quadric algorithm */
+	bsn_elem* ptr;
+	bsn_res* r;
+
+	if (!enif_get_int(env, argv[0], &max) || (max == 0))
+		return enif_make_badarg(env);
+
+
+	r = (bsn_res*) enif_alloc_resource(bsn_type, sizeof(bsn_res));
+	r->mutex = enif_mutex_create("Mutex for the BSN writer");
+	r->count = 0;
+
+	/* Select an algorithm */
+	if (max>0) {
+		r->next_pos = &next_pos_linear;
+	} else if (max<0) {
+		r->next_pos = &next_pos_quadric;
+		max *= -1; 
+	} 
+	/* Now max is cells' count in the array. */
+	r->max = (unsigned int) max;
+
+	ptr = enif_alloc(sizeof(bsn_elem) * max);
+	if (ptr == NULL)
+		return enif_make_badarg(env);
+	r->list = ptr;
+
+	for (; max; max--, ptr++)
+		ptr->hash = r->max;
+
+
+	return enif_make_resource(env, r);
+}
+
+static ERL_NIF_TERM 
+bsn_add(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	ErlNifBinary bin;
+	bsn_res* r;
+	unsigned int pos, hash, max;
+	int num = 0;
+	bsn_elem* elem_ptr;
+	
+	if (!(enif_get_resource(env, argv[0], bsn_type, (void**) &r)
+		&& enif_inspect_binary(env, argv[1], &bin)))
+		return enif_make_badarg(env);
+
+	enif_realloc_binary(&bin, bin.size);
+	hash = pos = private_hash(&bin, r->max);
+
+
+	enif_mutex_lock(r->mutex);
+	max = r->max;
+
+	while (num < max) {
+		elem_ptr = &(r->list[pos]);
+		/* Found free space */
+		if (elem_ptr->hash == max) {
+			elem_ptr->bin = bin;
+			elem_ptr->hash = hash;
+			break;
+		}
+
+
+		/* Found elem */
+		if ((elem_ptr->hash == hash)
+			&& private_compare(&bin, &(elem_ptr->bin))) {
+			num *= -1;
+			break;
+		}
+
+		pos = (r->next_pos)(r, hash, num);
+		num++;
+	}
+	if ((num >= 0) && (num < max))
+		(r->count)++;
+
+	enif_mutex_unlock(r->mutex);
+
+	/* Error: already added or owerflow */
+	if (!((num >= 0) && (num < max)))
+		enif_release_binary(&bin);
+
+	if (num >= max)
+		return ATOM_NO_MORE;
+	
+	return enif_make_int(env, num);
+}
+
+static ERL_NIF_TERM 
+bsn_search(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	ErlNifBinary bin;
+	bsn_res* r;
+	unsigned int pos, max, hash;
+	int num = 1;
+	bsn_elem* elem_ptr;
+	
+	if (!(enif_get_resource(env, argv[0], bsn_type, (void**) &r)
+		&& enif_inspect_binary(env, argv[1], &bin)))
+		return enif_make_badarg(env);
+
+	hash = pos = private_hash(&bin, r->max);
+
+	enif_mutex_lock(r->mutex);
+	max = r->max;
+
+	while (num < max) {
+		elem_ptr = &(r->list[pos]);
+		/* Found free space */
+		if (elem_ptr->hash == max) {
+			break;
+		}
+
+
+		/* Found elem */
+		if ((elem_ptr->hash == hash)
+			&& private_compare(&bin, &(elem_ptr->bin))) {
+			num *= -1;
+			break;
+		}
+
+		pos = (r->next_pos)(r, hash, num);
+		num++;
+	}
+	enif_mutex_unlock(r->mutex);
+	
+	return enif_make_int(env, num);
+}
+
+static ERL_NIF_TERM 
+bsn_clear(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	return enif_make_badarg(env);
+}
+
+  
+static ERL_NIF_TERM 
+bsn_all(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	bsn_res* r;
+	unsigned int max, pos = 0;
+	ERL_NIF_TERM head, tail;
+	ErlNifBinary bin;
+	bsn_elem* elem_ptr;
+	
+	if (!enif_get_resource(env, argv[0], bsn_type, (void**) &r))
+		return enif_make_badarg(env);
+	tail = enif_make_list(env, 0);
+
+	enif_mutex_lock(r->mutex);
+	max = r->max;
+	elem_ptr = r->list;
+
+	do {
+
+		if (elem_ptr->hash != max) {
+			bin = elem_ptr->bin;
+			enif_realloc_binary(&bin, bin.size);
+			head = enif_make_binary(env, &bin);
+			tail = enif_make_list_cell(env, head, tail);
+		}
+
+		elem_ptr++;
+		pos++;
+	} while (pos < max);
+
+	enif_mutex_unlock(r->mutex);
+	
+	return tail;
+}
+  
+
+static ERL_NIF_TERM 
+bsn_count(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	bsn_res* r;
+	
+	if (!enif_get_resource(env, argv[0], bsn_type, (void**) &r))
+		return enif_make_badarg(env);
+
+	return enif_make_int(env, r->count);
+}
+
+
+void private_clear_all(bsn_res* r)
+{
+	unsigned int max, num;
+	bsn_elem* ptr;
+	num = max = r->max;
+	ptr = r->list;
+
+	while (num) {
+		if (ptr->hash != max) {
+			enif_release_binary(&(ptr->bin));
+		}
+		ptr++;
+		num--;
+	}
+}
+
+void 
+bsn_type_dtor(ErlNifEnv* env, void* obj) 
+{
+	bsn_res* r = (bsn_res*) obj;
+	private_clear_all(r);
+	enif_mutex_destroy(r->mutex);
+	enif_free(r->list);
+}
+
+
+
+int
+on_load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
+{
+    ATOM_TRUE     = enif_make_atom(env, "true");
+    ATOM_FALSE    = enif_make_atom(env, "false");
+    ATOM_NO_MORE  = enif_make_atom(env, "no_more");
+
+	
+    ErlNifResourceFlags flags = (ErlNifResourceFlags)(ERL_NIF_RT_CREATE |
+        ERL_NIF_RT_TAKEOVER);
+
+    bsn_type = enif_open_resource_type(env, NULL, "bsn_type",
+        bsn_type_dtor, flags, NULL); 
+
+    if (bsn_type == NULL) return 1;
+
+    return 0;
+}
+
+
+int
+on_upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM info)
+{
+    return 0;
+}
+
+
+static ErlNifFunc nif_functions[] = {
+    {"new",     1, bsn_new},
+    {"add",     2, bsn_add},
+    {"all",     1, bsn_all},
+    {"in",      2, bsn_search},
+    {"clear",   2, bsn_clear},
+    {"count",   1, bsn_count},
+};
+
+
+ERL_NIF_INIT(bsn_int, nif_functions, &on_load, &on_load, &on_upgrade, NULL);
diff --git a/c_src/cq2/rebar.config b/c_src/bsn/rebar.config
similarity index 84%
rename from c_src/cq2/rebar.config
rename to c_src/bsn/rebar.config
index 9bbec13..77bc6d8 100644
--- a/c_src/cq2/rebar.config
+++ b/c_src/bsn/rebar.config
@@ -1,9 +1,8 @@
 {port_specs, [
-    {"../../priv/cq2.so", [
-        "*.c",
-        "*.cc"
-    ]}
+    {"../../priv/bsn_ext.so", ["bsn_ext.c"]},
+    {"../../priv/bsn_int.so", ["bsn_int.c"]}
 ]}.
+%{port_specs, [{"../../priv/granderl.so", []}]}.
 
 %% {port_env, [
 %%     {"(linux|solaris|freebsd|netbsd|openbsd|dragonfly|darwin|gnu)",
@@ -24,3 +23,7 @@
 %%
 %%     {"win32", "CXXFLAGS", "$CXXFLAGS /O2 /DNDEBUG"}
 %% ]}.
+
+
+
+
diff --git a/c_src/couchdb_hqueue/c_src/hqueue.c b/c_src/couchdb_hqueue/c_src/hqueue.c
new file mode 100644
index 0000000..f02f251
--- /dev/null
+++ b/c_src/couchdb_hqueue/c_src/hqueue.c
@@ -0,0 +1,318 @@
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "hqueue.h"
+
+
+struct hqueue
+{
+    int version;
+    uint32_t idx;
+    uint32_t max_elems;
+    uint32_t heap_size;
+    hqnode_t* heap; // one based index
+};
+
+
+struct hqnode
+{
+    double priority;
+    void* value;
+};
+
+
+static inline void
+hqueue_exchange(hqueue_t* hqueue, int i, int j)
+{
+    hqnode_t tmp;
+
+    tmp = hqueue->heap[i];
+    hqueue->heap[i] = hqueue->heap[j];
+    hqueue->heap[j] = tmp;
+    return;
+}
+
+
+static inline int
+hqueue_less(hqueue_t* hqueue, int i, int j)
+{
+    return hqueue->heap[i].priority < hqueue->heap[j].priority;
+}
+
+
+static void
+hqueue_fix_up(hqueue_t* hqueue, int k)
+{
+    while(k > 1 && hqueue_less(hqueue, k/2, k)) {
+        hqueue_exchange(hqueue, k/2, k);
+        k = k/2;
+    }
+    return;
+}
+
+
+static void
+hqueue_fix_down(hqueue_t* hqueue, int k)
+{
+    int j;
+    int n = hqueue->idx;
+
+    while(2*k <= n) {
+        j = 2*k;
+        if(j < n && hqueue_less(hqueue, j, j+1)) {
+            j++;
+        }
+        if(!hqueue_less(hqueue, k, j)) {
+            break;
+        }
+        hqueue_exchange(hqueue, k, j);
+        k = j;
+    }
+    return;
+}
+
+
+hqueue_t*
+hqueue_new(uint32_t max_elems, uint32_t heap_size)
+{
+    hqueue_t* hqueue = NULL;
+    size_t total_heap_size;
+
+    if(max_elems == 0 || heap_size == 0) {
+        return NULL;
+    }
+
+    if(max_elems < heap_size) {
+        heap_size = max_elems;
+    }
+
+    hqueue = HQUEUE_ALLOC(sizeof(hqueue_t));
+    if(hqueue == NULL) {
+        return NULL;
+    }
+
+    memset(hqueue, '\0', sizeof(hqueue_t));
+    hqueue->version = HQ_VERSION;
+    hqueue->max_elems = max_elems;
+    hqueue->heap_size = heap_size;
+    hqueue->idx = 0;
+
+    total_heap_size = sizeof(hqnode_t) * (hqueue->heap_size+1);
+
+    hqueue->heap = (hqnode_t*) HQUEUE_ALLOC(total_heap_size);
+
+    if(hqueue->heap == NULL ) {
+        HQUEUE_FREE(hqueue);
+        return NULL;
+    }
+
+    memset(hqueue->heap, '\0', total_heap_size);
+
+    return hqueue;
+}
+
+
+void
+hqueue_free(hqueue_t* hqueue)
+{
+    HQUEUE_FREE(hqueue->heap);
+    HQUEUE_FREE(hqueue);
+
+    return;
+}
+
+
+void
+hqueue_free2(hqueue_t* hqueue, void (*free_node)(void* node))
+{
+    uint32_t i;
+
+    for(i = 1; i < hqueue->heap_size + 1; i++) {
+        if(i <= hqueue->idx) {
+            free_node(hqueue->heap[i].value);
+        } else {
+            assert(hqueue->heap[i].value == NULL && "inactive elements must be NULL");
+        }
+    }
+
+    hqueue_free(hqueue);
+
+    return;
+}
+
+
+// Extraction order is undefined for entries with duplicate priorities
+int
+hqueue_extract_max(hqueue_t* hqueue, double* priority, void** value)
+{
+    if(hqueue->idx <= 0) {
+        return 0;
+    }
+
+    hqueue_exchange(hqueue, 1, hqueue->idx);
+
+    *priority = hqueue->heap[hqueue->idx].priority;
+    *value = hqueue->heap[hqueue->idx].value;
+
+    hqueue->heap[hqueue->idx].value = NULL;
+
+    hqueue->idx--; // heap uses one based index, so we decrement after
+    hqueue_fix_down(hqueue, 1);
+
+    return 1;
+}
+
+
+void
+hqueue_get_elem(hqueue_t* hqueue, uint32_t idx, double *priority, void** value)
+{
+    *priority = hqueue->heap[idx].priority;
+    *value = hqueue->heap[idx].value;
+
+    return;
+}
+
+
+static int
+hqueue_maybe_resize(hqueue_t* hqueue)
+{
+    uint32_t min_resize;
+
+    if(hqueue->idx + 1 > hqueue->heap_size) {
+        if(hqueue->idx * HQ_SCALE_FACTOR > hqueue->max_elems) {
+            min_resize = hqueue->max_elems;
+        } else {
+            min_resize = hqueue->idx * HQ_SCALE_FACTOR;
+        }
+        return hqueue_resize_heap(hqueue, min_resize);
+    }
+
+    return 1;
+}
+
+
+int
+hqueue_insert(hqueue_t* hqueue, double priority, void* value)
+{
+    if(hqueue->idx >= hqueue->max_elems) {
+        return 0;
+    }
+
+    if(!hqueue_maybe_resize(hqueue)) {
+        return 0;
+    }
+
+    hqueue->idx++; // heap uses one based index, so we increment first
+    hqueue->heap[hqueue->idx].priority = priority;
+    hqueue->heap[hqueue->idx].value = value;
+
+    hqueue_fix_up(hqueue, hqueue->idx);
+
+    return 1;
+}
+
+
+uint32_t
+hqueue_size(hqueue_t* hqueue)
+{
+    return hqueue->idx;
+}
+
+
+uint32_t
+hqueue_heap_size(hqueue_t* hqueue)
+{
+    return hqueue->heap_size;
+}
+
+
+uint32_t
+hqueue_max_elems(hqueue_t* hqueue)
+{
+    return hqueue->max_elems;
+}
+
+
+void
+hqueue_scale_by(hqueue_t* hqueue, double factor)
+{
+    uint32_t i;
+
+    for(i = 1; i <= hqueue->idx && i <= hqueue->heap_size; i++) {
+        hqueue->heap[i].priority *= factor;
+    }
+
+    return;
+}
+
+
+uint32_t
+hqueue_resize_heap(hqueue_t* hqueue, uint32_t new_heap_size)
+{
+    uint32_t old_heap_size;
+    size_t total_heap_size;
+    hqnode_t* tmp_heap;
+    uint32_t i;
+
+    if(hqueue->idx > new_heap_size) {
+        return 0;
+    }
+
+    total_heap_size = sizeof(hqnode_t) * (new_heap_size+1);
+    old_heap_size = hqueue->heap_size;
+
+    if((tmp_heap = (hqnode_t*) HQUEUE_ALLOC(total_heap_size)) == NULL) {
+        return 0;
+    }
+
+    memset(tmp_heap, '\0', total_heap_size);
+
+    for(i = 1; i <= hqueue->idx && i <= old_heap_size; i++) {
+        if(i <= hqueue->idx) {
+            tmp_heap[i] = hqueue->heap[i];
+            hqueue->heap[i].value = NULL;
+        } else {
+            assert(hqueue->heap[i].value == NULL &&
+                "unexpected NULL element during heap resize");
+        }
+    }
+
+    HQUEUE_FREE(hqueue->heap);
+    hqueue->heap = tmp_heap;
+    hqueue->heap_size = new_heap_size;
+
+    return old_heap_size;
+}
+
+
+int
+hqueue_set_max_elems(hqueue_t* hqueue, uint32_t new_max_elems)
+{
+    uint32_t old_max_elems;
+
+    if(hqueue->heap_size > new_max_elems) {
+        if(!hqueue_resize_heap(hqueue, new_max_elems)) {
+            return 0;
+        }
+    }
+
+    old_max_elems = hqueue->max_elems;
+    hqueue->max_elems = new_max_elems;
+
+    return old_max_elems;
+}
diff --git a/c_src/couchdb_hqueue/c_src/hqueue.d b/c_src/couchdb_hqueue/c_src/hqueue.d
new file mode 100644
index 0000000..b8094c8
--- /dev/null
+++ b/c_src/couchdb_hqueue/c_src/hqueue.d
@@ -0,0 +1,5 @@
+c_src/hqueue.o: c_src/hqueue.c c_src/hqueue.h \
+ /usr/lib/erlang/erts-10.6.2/include/erl_nif.h \
+ /usr/lib/erlang/erts-10.6.2/include/erl_drv_nif.h \
+ /usr/lib/erlang/erts-10.6.2/include/erl_int_sizes_config.h \
+ /usr/lib/erlang/erts-10.6.2/include/erl_nif_api_funcs.h
diff --git a/c_src/couchdb_hqueue/c_src/hqueue.h b/c_src/couchdb_hqueue/c_src/hqueue.h
new file mode 100644
index 0000000..4e422e4
--- /dev/null
+++ b/c_src/couchdb_hqueue/c_src/hqueue.h
@@ -0,0 +1,60 @@
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+#pragma once
+
+
+#include <stdint.h>
+
+#define HQ_VERSION 0
+#define HQ_SCALE_FACTOR 2 // heap expansion scale factor
+
+
+// Override the default memory allocator to use the Erlang versions.
+// This bubbles up memory usage for the NIF into Erlang stats.
+#ifdef HQ_ENIF_ALLOC
+
+#include "erl_nif.h"
+
+#define HQUEUE_ALLOC enif_alloc
+#define HQUEUE_FREE enif_free
+
+#else
+
+#define HQUEUE_ALLOC malloc
+#define HQUEUE_FREE free
+
+#endif
+
+
+typedef struct hqnode hqnode_t;
+typedef struct hqueue hqueue_t;
+
+
+hqueue_t* hqueue_new(uint32_t max_elems, uint32_t heap_size);
+
+void hqueue_free(hqueue_t* hqueue);
+void hqueue_free2(hqueue_t* hqueue, void (*free_node)(void* node));
+
+int hqueue_insert(hqueue_t* hqueue, double priority, void* val);
+int hqueue_extract_max(hqueue_t* hqueue, double* priority, void** value);
+void hqueue_get_elem(hqueue_t* hqueue, uint32_t idx, double *priority,
+        void** value);
+
+uint32_t hqueue_size(hqueue_t* hqueue);
+uint32_t hqueue_heap_size(hqueue_t* hqueue);
+
+uint32_t hqueue_max_elems(hqueue_t* hqueue);
+int hqueue_set_max_elems(hqueue_t* hqueue, uint32_t new_max_elems);
+
+void hqueue_scale_by(hqueue_t* hqueue, double factor);
+uint32_t hqueue_resize_heap(hqueue_t* hqueue, uint32_t new_heap_size);
diff --git a/c_src/couchdb_hqueue/c_src/hqueue_nif.c b/c_src/couchdb_hqueue/c_src/hqueue_nif.c
new file mode 100644
index 0000000..7cbc5e2
--- /dev/null
+++ b/c_src/couchdb_hqueue/c_src/hqueue_nif.c
@@ -0,0 +1,601 @@
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "hqueue.h"
+
+
+typedef struct
+{
+    ERL_NIF_TERM atom_ok;
+    ERL_NIF_TERM atom_error;
+    ERL_NIF_TERM atom_value;
+    ERL_NIF_TERM atom_empty;
+    ERL_NIF_TERM atom_full;
+    ERL_NIF_TERM atom_max_elems;
+    ERL_NIF_TERM atom_heap_size;
+    ERL_NIF_TERM atom_too_small;
+    ErlNifResourceType* res_hqueue;
+} hqueue_priv;
+
+
+typedef struct
+{
+    ErlNifEnv* env;
+    ERL_NIF_TERM value;
+} hqnode_nif_t;
+
+
+typedef struct
+{
+    int version;
+    uint64_t gen;
+    hqueue_t* hqueue;
+    ErlNifPid p;
+} hqueue_nif_t;
+
+
+static const uint32_t default_max_elems = UINT32_MAX-1;
+static const uint32_t default_heap_size = 1024;
+
+
+static inline ERL_NIF_TERM
+make_atom(ErlNifEnv* env, const char* name)
+{
+    ERL_NIF_TERM ret;
+    if(enif_make_existing_atom(env, name, &ret, ERL_NIF_LATIN1)) {
+        return ret;
+    }
+    return enif_make_atom(env, name);
+}
+
+
+static inline ERL_NIF_TERM
+make_ok(ErlNifEnv* env, hqueue_priv* priv, ERL_NIF_TERM value)
+{
+    return enif_make_tuple2(env, priv->atom_ok, value);
+}
+
+
+static inline ERL_NIF_TERM
+make_error(ErlNifEnv* env, hqueue_priv* priv, ERL_NIF_TERM reason)
+{
+    return enif_make_tuple2(env, priv->atom_error, reason);
+}
+
+
+static inline int
+check_pid(ErlNifEnv* env, hqueue_nif_t* hqueue_nif)
+{
+    ErlNifPid pid;
+    enif_self(env, &pid);
+
+    if(enif_compare(pid.pid, hqueue_nif->p.pid) == 0) {
+        return 1;
+    }
+
+    return 0;
+}
+
+
+void
+hqueue_nif_node_free(hqnode_nif_t* hqnode_nif)
+{
+    enif_free_env(hqnode_nif->env);
+    enif_free(hqnode_nif);
+
+    return;
+}
+
+
+void
+hqueue_nif_node_free_ext(void* node)
+{
+    hqueue_nif_node_free((hqnode_nif_t*) node);
+
+    return;
+}
+
+
+hqnode_nif_t*
+hqueue_nif_node_alloc()
+{
+    hqnode_nif_t* node = (hqnode_nif_t*) enif_alloc(sizeof(hqnode_nif_t*));
+
+    memset(node, 0, sizeof(hqnode_nif_t));
+
+    node->env = enif_alloc_env();
+
+    return node;
+}
+
+
+static int
+get_uint_param(ErlNifEnv* env, ERL_NIF_TERM value, ERL_NIF_TERM atom, uint32_t* p)
+{
+    const ERL_NIF_TERM* tuple;
+    int arity;
+
+    if(!enif_get_tuple(env, value, &arity, &tuple)) {
+        return 0;
+    }
+
+    if(arity != 2) {
+        return 0;
+    }
+
+    if(enif_compare(tuple[0], atom) != 0) {
+        return 0;
+    }
+
+    if(!enif_get_uint(env, tuple[1], p)) {
+        return 0;
+    }
+
+    return 1;
+}
+
+
+static inline hqueue_nif_t*
+hqueue_nif_create_int(ErlNifEnv* env, hqueue_priv* priv, uint32_t max_elems,
+        uint32_t heap_size)
+{
+    hqueue_nif_t* hqueue_nif = NULL;
+
+    assert(priv != NULL && "missing private data member");
+
+    hqueue_nif = (hqueue_nif_t*) enif_alloc_resource(
+            priv->res_hqueue, sizeof(hqueue_nif_t));
+    memset(hqueue_nif, 0, sizeof(hqueue_nif_t));
+    hqueue_nif->version = HQ_VERSION;
+
+    hqueue_nif->hqueue = hqueue_new(max_elems, heap_size);
+
+    if(hqueue_nif->hqueue == NULL ) {
+        enif_release_resource(hqueue_nif);
+        return NULL;
+    }
+
+    enif_self(env, &(hqueue_nif->p));
+
+    return hqueue_nif;
+}
+
+
+static ERL_NIF_TERM
+hqueue_nif_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    hqueue_priv* priv = enif_priv_data(env);
+    hqueue_nif_t* hqueue_nif;
+    ERL_NIF_TERM ret;
+    ERL_NIF_TERM opts;
+    ERL_NIF_TERM value;
+    uint32_t max_elems = default_max_elems;
+    uint32_t heap_size = default_heap_size;
+
+    if(argc != 1) {
+        return enif_make_badarg(env);
+    }
+
+    opts = argv[0];
+    if(!enif_is_list(env, opts)) {
+        return enif_make_badarg(env);
+    }
+
+    while(enif_get_list_cell(env, opts, &value, &opts)) {
+        if(get_uint_param(env, value, priv->atom_max_elems, &max_elems)) {
+            continue;
+        } else if(get_uint_param(env, value, priv->atom_heap_size, &heap_size)) {
+            continue;
+        } else {
+            return enif_make_badarg(env);
+        }
+    }
+
+    hqueue_nif = hqueue_nif_create_int(env, priv, max_elems, heap_size);
+    if(hqueue_nif == NULL) {
+        return enif_make_badarg(env);
+    }
+
+    ret = enif_make_resource(env, hqueue_nif);
+    enif_release_resource(hqueue_nif);
+
+    return make_ok(env, priv, ret);
+}
+
+
+static void
+hqueue_nif_free(ErlNifEnv* env, void* obj)
+{
+    hqueue_nif_t* hqueue_nif = (hqueue_nif_t*) obj;
+
+    hqueue_free2(hqueue_nif->hqueue, hqueue_nif_node_free_ext);
+
+    return;
+}
+
+
+static ERL_NIF_TERM
+hqueue_nif_extract_max(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    hqueue_priv* priv = enif_priv_data(env);
+    hqueue_nif_t* hqueue_nif;
+    hqnode_nif_t* hqnode_nif;
+    double tmp_priority;
+    ERL_NIF_TERM ret;
+    ERL_NIF_TERM priority;
+    ERL_NIF_TERM value;
+
+    if(argc != 1) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!check_pid(env, hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if (!hqueue_extract_max(hqueue_nif->hqueue, &tmp_priority, (void**) &hqnode_nif)) {
+        return make_error(env, priv, priv->atom_empty);
+    }
+
+    priority = enif_make_double(env, tmp_priority);
+    value = enif_make_copy(env, hqnode_nif->value);
+    ret = enif_make_tuple2(env, priority, value);
+
+    hqueue_nif_node_free(hqnode_nif);
+
+    return ret;
+}
+
+
+static ERL_NIF_TERM
+hqueue_nif_insert(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    hqueue_priv* priv = enif_priv_data(env);
+    hqueue_nif_t* hqueue_nif;
+    hqnode_nif_t* hqnode_nif;
+    ERL_NIF_TERM ret;
+    double priority;
+
+    if(argc != 3) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!check_pid(env, hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_double(env, argv[1], &priority)) {
+        return enif_make_badarg(env);
+    }
+
+    if(priority < 0.0) {
+        return enif_make_badarg(env);
+    }
+
+    hqnode_nif = hqueue_nif_node_alloc();
+    hqnode_nif->value = enif_make_copy(hqnode_nif->env, argv[2]);
+
+    if (!hqueue_insert(hqueue_nif->hqueue, priority, (void*) hqnode_nif)) {
+        return make_error(env, priv, priv->atom_full);
+    }
+
+    ret = priv->atom_ok;
+
+    return ret;
+}
+
+
+static ERL_NIF_TERM
+hqueue_nif_size(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    hqueue_priv* priv = enif_priv_data(env);
+    hqueue_nif_t* hqueue_nif;
+    ERL_NIF_TERM ret;
+
+    if(argc != 1) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!check_pid(env, hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    ret = enif_make_uint64(env, hqueue_size(hqueue_nif->hqueue));
+
+    return ret;
+}
+
+
+static ERL_NIF_TERM
+hqueue_nif_heap_size(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    hqueue_priv* priv = enif_priv_data(env);
+    hqueue_nif_t* hqueue_nif;
+    ERL_NIF_TERM ret;
+
+    if(argc != 1) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!check_pid(env, hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    ret = enif_make_uint64(env, hqueue_heap_size(hqueue_nif->hqueue));
+
+    return ret;
+}
+
+
+static ERL_NIF_TERM
+hqueue_nif_max_elems(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    hqueue_priv* priv = enif_priv_data(env);
+    hqueue_nif_t* hqueue_nif;
+    ERL_NIF_TERM ret;
+
+    if(argc != 1) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!check_pid(env, hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    ret = enif_make_uint64(env, hqueue_max_elems(hqueue_nif->hqueue));
+
+    return ret;
+}
+
+
+static ERL_NIF_TERM
+hqueue_nif_to_list(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    hqueue_priv* priv = enif_priv_data(env);
+    hqueue_nif_t* hqueue_nif;
+    hqueue_t* hqueue;
+    hqnode_nif_t* hqnode_nif;
+    double tmp_priority;
+    ERL_NIF_TERM ret = enif_make_list(env, 0);
+    ERL_NIF_TERM priority;
+    ERL_NIF_TERM value;
+    ERL_NIF_TERM tuple;
+    uint32_t i;
+
+    if(argc != 1) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!check_pid(env, hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    hqueue = hqueue_nif->hqueue;
+
+    for (i = 1; i <= hqueue_size(hqueue); i++) {
+        hqueue_get_elem(hqueue, i, &tmp_priority, (void **) &hqnode_nif);
+        priority = enif_make_double(env, tmp_priority);
+        value = enif_make_copy(env, hqnode_nif->value);
+        tuple = enif_make_tuple2(env, priority, value);
+        ret = enif_make_list_cell(env, tuple, ret);
+    }
+
+    return ret;
+}
+
+
+static ERL_NIF_TERM
+hqueue_nif_scale_by(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    hqueue_priv* priv = enif_priv_data(env);
+    hqueue_nif_t* hqueue_nif;
+    ERL_NIF_TERM ret;
+    double factor;
+
+    if(argc != 2) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!check_pid(env, hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_double(env, argv[1], &factor)) {
+        return enif_make_badarg(env);
+    }
+
+    if(factor < 0.0) {
+        return enif_make_badarg(env);
+    }
+
+    hqueue_scale_by(hqueue_nif->hqueue, factor);
+
+    ret = priv->atom_ok;
+
+    return ret;
+}
+
+
+static ERL_NIF_TERM
+hqueue_nif_resize_heap(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    hqueue_priv* priv = enif_priv_data(env);
+    hqueue_nif_t* hqueue_nif;
+    ERL_NIF_TERM ret;
+    uint32_t new_heap_size;
+    uint32_t old_heap_size;
+
+    if(argc != 2) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!check_pid(env, hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_uint(env, argv[1], &new_heap_size)) {
+        return enif_make_badarg(env);
+    }
+
+    if(hqueue_size(hqueue_nif->hqueue) > new_heap_size) {
+        return make_error(env, priv, priv->atom_too_small);
+    }
+
+    if((old_heap_size = hqueue_resize_heap(hqueue_nif->hqueue, new_heap_size)) == 0) {
+        return enif_make_badarg(env);
+    }
+
+    ret = enif_make_uint64(env, old_heap_size);
+
+    return ret;
+}
+
+
+static ERL_NIF_TERM
+hqueue_nif_set_max_elems(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    hqueue_priv* priv = enif_priv_data(env);
+    hqueue_nif_t* hqueue_nif;
+    ERL_NIF_TERM ret;
+    uint32_t new_max_elems;
+    uint32_t old_max_elems;
+
+    if(argc != 2) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!check_pid(env, hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_uint(env, argv[1], &new_max_elems)) {
+        return enif_make_badarg(env);
+    }
+
+    if(hqueue_size(hqueue_nif->hqueue) > new_max_elems) {
+        return make_error(env, priv, priv->atom_too_small);
+    }
+
+    if ((old_max_elems = hqueue_set_max_elems(hqueue_nif->hqueue, new_max_elems)) == 0) {
+        return enif_make_badarg(env);
+    }
+
+    ret = enif_make_uint64(env, old_max_elems);
+
+    return ret;
+}
+
+
+static int
+load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
+{
+    int flags = ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER;
+    ErlNifResourceType* res;
+
+    hqueue_priv* new_priv = (hqueue_priv*) enif_alloc(sizeof(hqueue_priv));
+    if(new_priv == NULL) {
+        return 1;
+    }
+
+    res = enif_open_resource_type(
+            env, NULL, "hqueue", hqueue_nif_free, flags, NULL);
+    if(res == NULL) {
+        enif_free(new_priv);
+        return 1;
+    }
+    new_priv->res_hqueue = res;
+
+    new_priv->atom_ok = make_atom(env, "ok");
+    new_priv->atom_error = make_atom(env, "error");
+    new_priv->atom_value = make_atom(env, "value");
+    new_priv->atom_empty = make_atom(env, "empty");
+    new_priv->atom_full = make_atom(env, "full");
+    new_priv->atom_max_elems = make_atom(env, "max_elems");
+    new_priv->atom_heap_size = make_atom(env, "heap_size");
+    new_priv->atom_too_small = make_atom(env, "too_small");
+
+    *priv = (void*) new_priv;
+
+    return 0;
+}
+
+
+static int
+upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM info)
+{
+    return load(env, priv, info);
+}
+
+
+static void
+unload(ErlNifEnv* env, void* priv)
+{
+    enif_free(priv);
+    return;
+}
+
+
+static ErlNifFunc funcs[] = {
+    {"new", 1, hqueue_nif_new},
+    {"extract_max", 1, hqueue_nif_extract_max},
+    {"insert", 3, hqueue_nif_insert},
+    {"size", 1, hqueue_nif_size},
+    {"heap_size", 1, hqueue_nif_heap_size},
+    {"max_elems", 1, hqueue_nif_max_elems},
+    {"set_max_elems", 2, hqueue_nif_set_max_elems},
+    {"to_list", 1, hqueue_nif_to_list},
+    {"scale_by", 2, hqueue_nif_scale_by},
+    {"resize_heap", 2, hqueue_nif_resize_heap}
+};
+
+
+ERL_NIF_INIT(hqueue, funcs, &load, NULL, &upgrade, &unload);
diff --git a/c_src/couchdb_hqueue/c_src/hqueue_nif.d b/c_src/couchdb_hqueue/c_src/hqueue_nif.d
new file mode 100644
index 0000000..d8b20c2
--- /dev/null
+++ b/c_src/couchdb_hqueue/c_src/hqueue_nif.d
@@ -0,0 +1,5 @@
+c_src/hqueue_nif.o: c_src/hqueue_nif.c c_src/hqueue.h \
+ /usr/lib/erlang/erts-10.6.2/include/erl_nif.h \
+ /usr/lib/erlang/erts-10.6.2/include/erl_drv_nif.h \
+ /usr/lib/erlang/erts-10.6.2/include/erl_int_sizes_config.h \
+ /usr/lib/erlang/erts-10.6.2/include/erl_nif_api_funcs.h
diff --git a/c_src/couchdb_hqueue/c_src/valgrind_sample.c b/c_src/couchdb_hqueue/c_src/valgrind_sample.c
new file mode 100644
index 0000000..3c78da5
--- /dev/null
+++ b/c_src/couchdb_hqueue/c_src/valgrind_sample.c
@@ -0,0 +1,72 @@
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "hqueue.h"
+
+
+// Simple test script to stress the public HQueue API.
+// Primary use case is for running this under Valgrind.
+int main(void)
+{
+    int str_len = 100;
+    int iterations = 1000;
+    uint32_t max_elems = 1024;
+    uint32_t heap_size = 64;
+    hqueue_t* hq = hqueue_new(max_elems, heap_size);
+    double priority;
+    double priority_res;
+    char* val;
+    char* val_res;
+    int i;
+
+    assert(max_elems == hqueue_max_elems(hq));
+    assert(heap_size == hqueue_heap_size(hq));
+
+    for(i = 0; i < iterations; i++) {
+        priority = 1234.4321 * i;
+        val = (char*) malloc(str_len + 1);
+
+        if(val == NULL) {
+            return 1;
+        }
+
+        assert(hqueue_size(hq) == i);
+
+        if(snprintf(val, str_len + 1, "Fun string #%d\n", i)) {
+            if(!hqueue_insert(hq, priority, val)) {
+                return 1;
+            }
+        } else {
+            return 1;
+        }
+    }
+
+    hqueue_scale_by(hq, 3.7);
+
+    // Added 1000 elements, so heap size should have expanded to 1024
+    assert(max_elems == hqueue_max_elems(hq));
+    assert(max_elems == hqueue_heap_size(hq));
+
+    if(!hqueue_extract_max(hq, &priority_res, (void**) &val_res)) {
+        return 1;
+    }
+    free(val_res);
+
+    hqueue_free2(hq, free);
+
+    return 0;
+}
+
diff --git a/c_src/couchdb_hqueue/hqueue.c b/c_src/couchdb_hqueue/hqueue.c
new file mode 100644
index 0000000..f02f251
--- /dev/null
+++ b/c_src/couchdb_hqueue/hqueue.c
@@ -0,0 +1,318 @@
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "hqueue.h"
+
+
+struct hqueue
+{
+    int version;
+    uint32_t idx;
+    uint32_t max_elems;
+    uint32_t heap_size;
+    hqnode_t* heap; // one based index
+};
+
+
+struct hqnode
+{
+    double priority;
+    void* value;
+};
+
+
+static inline void
+hqueue_exchange(hqueue_t* hqueue, int i, int j)
+{
+    hqnode_t tmp;
+
+    tmp = hqueue->heap[i];
+    hqueue->heap[i] = hqueue->heap[j];
+    hqueue->heap[j] = tmp;
+    return;
+}
+
+
+static inline int
+hqueue_less(hqueue_t* hqueue, int i, int j)
+{
+    return hqueue->heap[i].priority < hqueue->heap[j].priority;
+}
+
+
+static void
+hqueue_fix_up(hqueue_t* hqueue, int k)
+{
+    while(k > 1 && hqueue_less(hqueue, k/2, k)) {
+        hqueue_exchange(hqueue, k/2, k);
+        k = k/2;
+    }
+    return;
+}
+
+
+static void
+hqueue_fix_down(hqueue_t* hqueue, int k)
+{
+    int j;
+    int n = hqueue->idx;
+
+    while(2*k <= n) {
+        j = 2*k;
+        if(j < n && hqueue_less(hqueue, j, j+1)) {
+            j++;
+        }
+        if(!hqueue_less(hqueue, k, j)) {
+            break;
+        }
+        hqueue_exchange(hqueue, k, j);
+        k = j;
+    }
+    return;
+}
+
+
+hqueue_t*
+hqueue_new(uint32_t max_elems, uint32_t heap_size)
+{
+    hqueue_t* hqueue = NULL;
+    size_t total_heap_size;
+
+    if(max_elems == 0 || heap_size == 0) {
+        return NULL;
+    }
+
+    if(max_elems < heap_size) {
+        heap_size = max_elems;
+    }
+
+    hqueue = HQUEUE_ALLOC(sizeof(hqueue_t));
+    if(hqueue == NULL) {
+        return NULL;
+    }
+
+    memset(hqueue, '\0', sizeof(hqueue_t));
+    hqueue->version = HQ_VERSION;
+    hqueue->max_elems = max_elems;
+    hqueue->heap_size = heap_size;
+    hqueue->idx = 0;
+
+    total_heap_size = sizeof(hqnode_t) * (hqueue->heap_size+1);
+
+    hqueue->heap = (hqnode_t*) HQUEUE_ALLOC(total_heap_size);
+
+    if(hqueue->heap == NULL ) {
+        HQUEUE_FREE(hqueue);
+        return NULL;
+    }
+
+    memset(hqueue->heap, '\0', total_heap_size);
+
+    return hqueue;
+}
+
+
+void
+hqueue_free(hqueue_t* hqueue)
+{
+    HQUEUE_FREE(hqueue->heap);
+    HQUEUE_FREE(hqueue);
+
+    return;
+}
+
+
+void
+hqueue_free2(hqueue_t* hqueue, void (*free_node)(void* node))
+{
+    uint32_t i;
+
+    for(i = 1; i < hqueue->heap_size + 1; i++) {
+        if(i <= hqueue->idx) {
+            free_node(hqueue->heap[i].value);
+        } else {
+            assert(hqueue->heap[i].value == NULL && "inactive elements must be NULL");
+        }
+    }
+
+    hqueue_free(hqueue);
+
+    return;
+}
+
+
+// Extraction order is undefined for entries with duplicate priorities
+int
+hqueue_extract_max(hqueue_t* hqueue, double* priority, void** value)
+{
+    if(hqueue->idx <= 0) {
+        return 0;
+    }
+
+    hqueue_exchange(hqueue, 1, hqueue->idx);
+
+    *priority = hqueue->heap[hqueue->idx].priority;
+    *value = hqueue->heap[hqueue->idx].value;
+
+    hqueue->heap[hqueue->idx].value = NULL;
+
+    hqueue->idx--; // heap uses one based index, so we decrement after
+    hqueue_fix_down(hqueue, 1);
+
+    return 1;
+}
+
+
+void
+hqueue_get_elem(hqueue_t* hqueue, uint32_t idx, double *priority, void** value)
+{
+    *priority = hqueue->heap[idx].priority;
+    *value = hqueue->heap[idx].value;
+
+    return;
+}
+
+
+static int
+hqueue_maybe_resize(hqueue_t* hqueue)
+{
+    uint32_t min_resize;
+
+    if(hqueue->idx + 1 > hqueue->heap_size) {
+        if(hqueue->idx * HQ_SCALE_FACTOR > hqueue->max_elems) {
+            min_resize = hqueue->max_elems;
+        } else {
+            min_resize = hqueue->idx * HQ_SCALE_FACTOR;
+        }
+        return hqueue_resize_heap(hqueue, min_resize);
+    }
+
+    return 1;
+}
+
+
+int
+hqueue_insert(hqueue_t* hqueue, double priority, void* value)
+{
+    if(hqueue->idx >= hqueue->max_elems) {
+        return 0;
+    }
+
+    if(!hqueue_maybe_resize(hqueue)) {
+        return 0;
+    }
+
+    hqueue->idx++; // heap uses one based index, so we increment first
+    hqueue->heap[hqueue->idx].priority = priority;
+    hqueue->heap[hqueue->idx].value = value;
+
+    hqueue_fix_up(hqueue, hqueue->idx);
+
+    return 1;
+}
+
+
+uint32_t
+hqueue_size(hqueue_t* hqueue)
+{
+    return hqueue->idx;
+}
+
+
+uint32_t
+hqueue_heap_size(hqueue_t* hqueue)
+{
+    return hqueue->heap_size;
+}
+
+
+uint32_t
+hqueue_max_elems(hqueue_t* hqueue)
+{
+    return hqueue->max_elems;
+}
+
+
+void
+hqueue_scale_by(hqueue_t* hqueue, double factor)
+{
+    uint32_t i;
+
+    for(i = 1; i <= hqueue->idx && i <= hqueue->heap_size; i++) {
+        hqueue->heap[i].priority *= factor;
+    }
+
+    return;
+}
+
+
+uint32_t
+hqueue_resize_heap(hqueue_t* hqueue, uint32_t new_heap_size)
+{
+    uint32_t old_heap_size;
+    size_t total_heap_size;
+    hqnode_t* tmp_heap;
+    uint32_t i;
+
+    if(hqueue->idx > new_heap_size) {
+        return 0;
+    }
+
+    total_heap_size = sizeof(hqnode_t) * (new_heap_size+1);
+    old_heap_size = hqueue->heap_size;
+
+    if((tmp_heap = (hqnode_t*) HQUEUE_ALLOC(total_heap_size)) == NULL) {
+        return 0;
+    }
+
+    memset(tmp_heap, '\0', total_heap_size);
+
+    for(i = 1; i <= hqueue->idx && i <= old_heap_size; i++) {
+        if(i <= hqueue->idx) {
+            tmp_heap[i] = hqueue->heap[i];
+            hqueue->heap[i].value = NULL;
+        } else {
+            assert(hqueue->heap[i].value == NULL &&
+                "unexpected NULL element during heap resize");
+        }
+    }
+
+    HQUEUE_FREE(hqueue->heap);
+    hqueue->heap = tmp_heap;
+    hqueue->heap_size = new_heap_size;
+
+    return old_heap_size;
+}
+
+
+int
+hqueue_set_max_elems(hqueue_t* hqueue, uint32_t new_max_elems)
+{
+    uint32_t old_max_elems;
+
+    if(hqueue->heap_size > new_max_elems) {
+        if(!hqueue_resize_heap(hqueue, new_max_elems)) {
+            return 0;
+        }
+    }
+
+    old_max_elems = hqueue->max_elems;
+    hqueue->max_elems = new_max_elems;
+
+    return old_max_elems;
+}
diff --git a/c_src/couchdb_hqueue/hqueue.h b/c_src/couchdb_hqueue/hqueue.h
new file mode 100644
index 0000000..4e422e4
--- /dev/null
+++ b/c_src/couchdb_hqueue/hqueue.h
@@ -0,0 +1,60 @@
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+#pragma once
+
+
+#include <stdint.h>
+
+#define HQ_VERSION 0
+#define HQ_SCALE_FACTOR 2 // heap expansion scale factor
+
+
+// Override the default memory allocator to use the Erlang versions.
+// This bubbles up memory usage for the NIF into Erlang stats.
+#ifdef HQ_ENIF_ALLOC
+
+#include "erl_nif.h"
+
+#define HQUEUE_ALLOC enif_alloc
+#define HQUEUE_FREE enif_free
+
+#else
+
+#define HQUEUE_ALLOC malloc
+#define HQUEUE_FREE free
+
+#endif
+
+
+typedef struct hqnode hqnode_t;
+typedef struct hqueue hqueue_t;
+
+
+hqueue_t* hqueue_new(uint32_t max_elems, uint32_t heap_size);
+
+void hqueue_free(hqueue_t* hqueue);
+void hqueue_free2(hqueue_t* hqueue, void (*free_node)(void* node));
+
+int hqueue_insert(hqueue_t* hqueue, double priority, void* val);
+int hqueue_extract_max(hqueue_t* hqueue, double* priority, void** value);
+void hqueue_get_elem(hqueue_t* hqueue, uint32_t idx, double *priority,
+        void** value);
+
+uint32_t hqueue_size(hqueue_t* hqueue);
+uint32_t hqueue_heap_size(hqueue_t* hqueue);
+
+uint32_t hqueue_max_elems(hqueue_t* hqueue);
+int hqueue_set_max_elems(hqueue_t* hqueue, uint32_t new_max_elems);
+
+void hqueue_scale_by(hqueue_t* hqueue, double factor);
+uint32_t hqueue_resize_heap(hqueue_t* hqueue, uint32_t new_heap_size);
diff --git a/c_src/couchdb_hqueue/hqueue_nif.c b/c_src/couchdb_hqueue/hqueue_nif.c
new file mode 100644
index 0000000..7cbc5e2
--- /dev/null
+++ b/c_src/couchdb_hqueue/hqueue_nif.c
@@ -0,0 +1,601 @@
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "hqueue.h"
+
+
+typedef struct
+{
+    ERL_NIF_TERM atom_ok;
+    ERL_NIF_TERM atom_error;
+    ERL_NIF_TERM atom_value;
+    ERL_NIF_TERM atom_empty;
+    ERL_NIF_TERM atom_full;
+    ERL_NIF_TERM atom_max_elems;
+    ERL_NIF_TERM atom_heap_size;
+    ERL_NIF_TERM atom_too_small;
+    ErlNifResourceType* res_hqueue;
+} hqueue_priv;
+
+
+typedef struct
+{
+    ErlNifEnv* env;
+    ERL_NIF_TERM value;
+} hqnode_nif_t;
+
+
+typedef struct
+{
+    int version;
+    uint64_t gen;
+    hqueue_t* hqueue;
+    ErlNifPid p;
+} hqueue_nif_t;
+
+
+static const uint32_t default_max_elems = UINT32_MAX-1;
+static const uint32_t default_heap_size = 1024;
+
+
+static inline ERL_NIF_TERM
+make_atom(ErlNifEnv* env, const char* name)
+{
+    ERL_NIF_TERM ret;
+    if(enif_make_existing_atom(env, name, &ret, ERL_NIF_LATIN1)) {
+        return ret;
+    }
+    return enif_make_atom(env, name);
+}
+
+
+static inline ERL_NIF_TERM
+make_ok(ErlNifEnv* env, hqueue_priv* priv, ERL_NIF_TERM value)
+{
+    return enif_make_tuple2(env, priv->atom_ok, value);
+}
+
+
+static inline ERL_NIF_TERM
+make_error(ErlNifEnv* env, hqueue_priv* priv, ERL_NIF_TERM reason)
+{
+    return enif_make_tuple2(env, priv->atom_error, reason);
+}
+
+
+static inline int
+check_pid(ErlNifEnv* env, hqueue_nif_t* hqueue_nif)
+{
+    ErlNifPid pid;
+    enif_self(env, &pid);
+
+    if(enif_compare(pid.pid, hqueue_nif->p.pid) == 0) {
+        return 1;
+    }
+
+    return 0;
+}
+
+
+void
+hqueue_nif_node_free(hqnode_nif_t* hqnode_nif)
+{
+    enif_free_env(hqnode_nif->env);
+    enif_free(hqnode_nif);
+
+    return;
+}
+
+
+void
+hqueue_nif_node_free_ext(void* node)
+{
+    hqueue_nif_node_free((hqnode_nif_t*) node);
+
+    return;
+}
+
+
+hqnode_nif_t*
+hqueue_nif_node_alloc()
+{
+    hqnode_nif_t* node = (hqnode_nif_t*) enif_alloc(sizeof(hqnode_nif_t*));
+
+    memset(node, 0, sizeof(hqnode_nif_t));
+
+    node->env = enif_alloc_env();
+
+    return node;
+}
+
+
+static int
+get_uint_param(ErlNifEnv* env, ERL_NIF_TERM value, ERL_NIF_TERM atom, uint32_t* p)
+{
+    const ERL_NIF_TERM* tuple;
+    int arity;
+
+    if(!enif_get_tuple(env, value, &arity, &tuple)) {
+        return 0;
+    }
+
+    if(arity != 2) {
+        return 0;
+    }
+
+    if(enif_compare(tuple[0], atom) != 0) {
+        return 0;
+    }
+
+    if(!enif_get_uint(env, tuple[1], p)) {
+        return 0;
+    }
+
+    return 1;
+}
+
+
+static inline hqueue_nif_t*
+hqueue_nif_create_int(ErlNifEnv* env, hqueue_priv* priv, uint32_t max_elems,
+        uint32_t heap_size)
+{
+    hqueue_nif_t* hqueue_nif = NULL;
+
+    assert(priv != NULL && "missing private data member");
+
+    hqueue_nif = (hqueue_nif_t*) enif_alloc_resource(
+            priv->res_hqueue, sizeof(hqueue_nif_t));
+    memset(hqueue_nif, 0, sizeof(hqueue_nif_t));
+    hqueue_nif->version = HQ_VERSION;
+
+    hqueue_nif->hqueue = hqueue_new(max_elems, heap_size);
+
+    if(hqueue_nif->hqueue == NULL ) {
+        enif_release_resource(hqueue_nif);
+        return NULL;
+    }
+
+    enif_self(env, &(hqueue_nif->p));
+
+    return hqueue_nif;
+}
+
+
+static ERL_NIF_TERM
+hqueue_nif_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    hqueue_priv* priv = enif_priv_data(env);
+    hqueue_nif_t* hqueue_nif;
+    ERL_NIF_TERM ret;
+    ERL_NIF_TERM opts;
+    ERL_NIF_TERM value;
+    uint32_t max_elems = default_max_elems;
+    uint32_t heap_size = default_heap_size;
+
+    if(argc != 1) {
+        return enif_make_badarg(env);
+    }
+
+    opts = argv[0];
+    if(!enif_is_list(env, opts)) {
+        return enif_make_badarg(env);
+    }
+
+    while(enif_get_list_cell(env, opts, &value, &opts)) {
+        if(get_uint_param(env, value, priv->atom_max_elems, &max_elems)) {
+            continue;
+        } else if(get_uint_param(env, value, priv->atom_heap_size, &heap_size)) {
+            continue;
+        } else {
+            return enif_make_badarg(env);
+        }
+    }
+
+    hqueue_nif = hqueue_nif_create_int(env, priv, max_elems, heap_size);
+    if(hqueue_nif == NULL) {
+        return enif_make_badarg(env);
+    }
+
+    ret = enif_make_resource(env, hqueue_nif);
+    enif_release_resource(hqueue_nif);
+
+    return make_ok(env, priv, ret);
+}
+
+
+static void
+hqueue_nif_free(ErlNifEnv* env, void* obj)
+{
+    hqueue_nif_t* hqueue_nif = (hqueue_nif_t*) obj;
+
+    hqueue_free2(hqueue_nif->hqueue, hqueue_nif_node_free_ext);
+
+    return;
+}
+
+
+static ERL_NIF_TERM
+hqueue_nif_extract_max(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    hqueue_priv* priv = enif_priv_data(env);
+    hqueue_nif_t* hqueue_nif;
+    hqnode_nif_t* hqnode_nif;
+    double tmp_priority;
+    ERL_NIF_TERM ret;
+    ERL_NIF_TERM priority;
+    ERL_NIF_TERM value;
+
+    if(argc != 1) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!check_pid(env, hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if (!hqueue_extract_max(hqueue_nif->hqueue, &tmp_priority, (void**) &hqnode_nif)) {
+        return make_error(env, priv, priv->atom_empty);
+    }
+
+    priority = enif_make_double(env, tmp_priority);
+    value = enif_make_copy(env, hqnode_nif->value);
+    ret = enif_make_tuple2(env, priority, value);
+
+    hqueue_nif_node_free(hqnode_nif);
+
+    return ret;
+}
+
+
+static ERL_NIF_TERM
+hqueue_nif_insert(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    hqueue_priv* priv = enif_priv_data(env);
+    hqueue_nif_t* hqueue_nif;
+    hqnode_nif_t* hqnode_nif;
+    ERL_NIF_TERM ret;
+    double priority;
+
+    if(argc != 3) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!check_pid(env, hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_double(env, argv[1], &priority)) {
+        return enif_make_badarg(env);
+    }
+
+    if(priority < 0.0) {
+        return enif_make_badarg(env);
+    }
+
+    hqnode_nif = hqueue_nif_node_alloc();
+    hqnode_nif->value = enif_make_copy(hqnode_nif->env, argv[2]);
+
+    if (!hqueue_insert(hqueue_nif->hqueue, priority, (void*) hqnode_nif)) {
+        return make_error(env, priv, priv->atom_full);
+    }
+
+    ret = priv->atom_ok;
+
+    return ret;
+}
+
+
+static ERL_NIF_TERM
+hqueue_nif_size(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    hqueue_priv* priv = enif_priv_data(env);
+    hqueue_nif_t* hqueue_nif;
+    ERL_NIF_TERM ret;
+
+    if(argc != 1) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!check_pid(env, hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    ret = enif_make_uint64(env, hqueue_size(hqueue_nif->hqueue));
+
+    return ret;
+}
+
+
+static ERL_NIF_TERM
+hqueue_nif_heap_size(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    hqueue_priv* priv = enif_priv_data(env);
+    hqueue_nif_t* hqueue_nif;
+    ERL_NIF_TERM ret;
+
+    if(argc != 1) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!check_pid(env, hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    ret = enif_make_uint64(env, hqueue_heap_size(hqueue_nif->hqueue));
+
+    return ret;
+}
+
+
+static ERL_NIF_TERM
+hqueue_nif_max_elems(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    hqueue_priv* priv = enif_priv_data(env);
+    hqueue_nif_t* hqueue_nif;
+    ERL_NIF_TERM ret;
+
+    if(argc != 1) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!check_pid(env, hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    ret = enif_make_uint64(env, hqueue_max_elems(hqueue_nif->hqueue));
+
+    return ret;
+}
+
+
+static ERL_NIF_TERM
+hqueue_nif_to_list(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    hqueue_priv* priv = enif_priv_data(env);
+    hqueue_nif_t* hqueue_nif;
+    hqueue_t* hqueue;
+    hqnode_nif_t* hqnode_nif;
+    double tmp_priority;
+    ERL_NIF_TERM ret = enif_make_list(env, 0);
+    ERL_NIF_TERM priority;
+    ERL_NIF_TERM value;
+    ERL_NIF_TERM tuple;
+    uint32_t i;
+
+    if(argc != 1) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!check_pid(env, hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    hqueue = hqueue_nif->hqueue;
+
+    for (i = 1; i <= hqueue_size(hqueue); i++) {
+        hqueue_get_elem(hqueue, i, &tmp_priority, (void **) &hqnode_nif);
+        priority = enif_make_double(env, tmp_priority);
+        value = enif_make_copy(env, hqnode_nif->value);
+        tuple = enif_make_tuple2(env, priority, value);
+        ret = enif_make_list_cell(env, tuple, ret);
+    }
+
+    return ret;
+}
+
+
+static ERL_NIF_TERM
+hqueue_nif_scale_by(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    hqueue_priv* priv = enif_priv_data(env);
+    hqueue_nif_t* hqueue_nif;
+    ERL_NIF_TERM ret;
+    double factor;
+
+    if(argc != 2) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!check_pid(env, hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_double(env, argv[1], &factor)) {
+        return enif_make_badarg(env);
+    }
+
+    if(factor < 0.0) {
+        return enif_make_badarg(env);
+    }
+
+    hqueue_scale_by(hqueue_nif->hqueue, factor);
+
+    ret = priv->atom_ok;
+
+    return ret;
+}
+
+
+static ERL_NIF_TERM
+hqueue_nif_resize_heap(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    hqueue_priv* priv = enif_priv_data(env);
+    hqueue_nif_t* hqueue_nif;
+    ERL_NIF_TERM ret;
+    uint32_t new_heap_size;
+    uint32_t old_heap_size;
+
+    if(argc != 2) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!check_pid(env, hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_uint(env, argv[1], &new_heap_size)) {
+        return enif_make_badarg(env);
+    }
+
+    if(hqueue_size(hqueue_nif->hqueue) > new_heap_size) {
+        return make_error(env, priv, priv->atom_too_small);
+    }
+
+    if((old_heap_size = hqueue_resize_heap(hqueue_nif->hqueue, new_heap_size)) == 0) {
+        return enif_make_badarg(env);
+    }
+
+    ret = enif_make_uint64(env, old_heap_size);
+
+    return ret;
+}
+
+
+static ERL_NIF_TERM
+hqueue_nif_set_max_elems(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    hqueue_priv* priv = enif_priv_data(env);
+    hqueue_nif_t* hqueue_nif;
+    ERL_NIF_TERM ret;
+    uint32_t new_max_elems;
+    uint32_t old_max_elems;
+
+    if(argc != 2) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!check_pid(env, hqueue_nif)) {
+        return enif_make_badarg(env);
+    }
+
+    if(!enif_get_uint(env, argv[1], &new_max_elems)) {
+        return enif_make_badarg(env);
+    }
+
+    if(hqueue_size(hqueue_nif->hqueue) > new_max_elems) {
+        return make_error(env, priv, priv->atom_too_small);
+    }
+
+    if ((old_max_elems = hqueue_set_max_elems(hqueue_nif->hqueue, new_max_elems)) == 0) {
+        return enif_make_badarg(env);
+    }
+
+    ret = enif_make_uint64(env, old_max_elems);
+
+    return ret;
+}
+
+
+static int
+load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
+{
+    int flags = ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER;
+    ErlNifResourceType* res;
+
+    hqueue_priv* new_priv = (hqueue_priv*) enif_alloc(sizeof(hqueue_priv));
+    if(new_priv == NULL) {
+        return 1;
+    }
+
+    res = enif_open_resource_type(
+            env, NULL, "hqueue", hqueue_nif_free, flags, NULL);
+    if(res == NULL) {
+        enif_free(new_priv);
+        return 1;
+    }
+    new_priv->res_hqueue = res;
+
+    new_priv->atom_ok = make_atom(env, "ok");
+    new_priv->atom_error = make_atom(env, "error");
+    new_priv->atom_value = make_atom(env, "value");
+    new_priv->atom_empty = make_atom(env, "empty");
+    new_priv->atom_full = make_atom(env, "full");
+    new_priv->atom_max_elems = make_atom(env, "max_elems");
+    new_priv->atom_heap_size = make_atom(env, "heap_size");
+    new_priv->atom_too_small = make_atom(env, "too_small");
+
+    *priv = (void*) new_priv;
+
+    return 0;
+}
+
+
+static int
+upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM info)
+{
+    return load(env, priv, info);
+}
+
+
+static void
+unload(ErlNifEnv* env, void* priv)
+{
+    enif_free(priv);
+    return;
+}
+
+
+static ErlNifFunc funcs[] = {
+    {"new", 1, hqueue_nif_new},
+    {"extract_max", 1, hqueue_nif_extract_max},
+    {"insert", 3, hqueue_nif_insert},
+    {"size", 1, hqueue_nif_size},
+    {"heap_size", 1, hqueue_nif_heap_size},
+    {"max_elems", 1, hqueue_nif_max_elems},
+    {"set_max_elems", 2, hqueue_nif_set_max_elems},
+    {"to_list", 1, hqueue_nif_to_list},
+    {"scale_by", 2, hqueue_nif_scale_by},
+    {"resize_heap", 2, hqueue_nif_resize_heap}
+};
+
+
+ERL_NIF_INIT(hqueue, funcs, &load, NULL, &upgrade, &unload);
diff --git a/c_src/couchdb_hqueue/rebar.config b/c_src/couchdb_hqueue/rebar.config
new file mode 100644
index 0000000..82d6eaf
--- /dev/null
+++ b/c_src/couchdb_hqueue/rebar.config
@@ -0,0 +1,13 @@
+{port_specs, [
+    {"../../priv/hqueue.so", ["hqueue*.c"]}
+]}.
+
+
+{port_env, [
+    {"(linux|solaris|darwin|freebsd)", "CFLAGS", "$CFLAGS -g -Wall -Werror -DHQ_ENIF_ALLOC -O3"},
+    {"win32", "CFLAGS", "$CFLAGS /O2 /DNDEBUG /DHQ_ENIF_ALLOC /Dinline=__inline /Wall"}
+    %% {".*", "CFLAGS", "$CFLAGS -g -Wall -Werror -Wextra"}
+]}.
+
+
+
diff --git a/c_src/couchdb_hqueue/valgrind_sample.c b/c_src/couchdb_hqueue/valgrind_sample.c
new file mode 100644
index 0000000..3c78da5
--- /dev/null
+++ b/c_src/couchdb_hqueue/valgrind_sample.c
@@ -0,0 +1,72 @@
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "hqueue.h"
+
+
+// Simple test script to stress the public HQueue API.
+// Primary use case is for running this under Valgrind.
+int main(void)
+{
+    int str_len = 100;
+    int iterations = 1000;
+    uint32_t max_elems = 1024;
+    uint32_t heap_size = 64;
+    hqueue_t* hq = hqueue_new(max_elems, heap_size);
+    double priority;
+    double priority_res;
+    char* val;
+    char* val_res;
+    int i;
+
+    assert(max_elems == hqueue_max_elems(hq));
+    assert(heap_size == hqueue_heap_size(hq));
+
+    for(i = 0; i < iterations; i++) {
+        priority = 1234.4321 * i;
+        val = (char*) malloc(str_len + 1);
+
+        if(val == NULL) {
+            return 1;
+        }
+
+        assert(hqueue_size(hq) == i);
+
+        if(snprintf(val, str_len + 1, "Fun string #%d\n", i)) {
+            if(!hqueue_insert(hq, priority, val)) {
+                return 1;
+            }
+        } else {
+            return 1;
+        }
+    }
+
+    hqueue_scale_by(hq, 3.7);
+
+    // Added 1000 elements, so heap size should have expanded to 1024
+    assert(max_elems == hqueue_max_elems(hq));
+    assert(max_elems == hqueue_heap_size(hq));
+
+    if(!hqueue_extract_max(hq, &priority_res, (void**) &val_res)) {
+        return 1;
+    }
+    free(val_res);
+
+    hqueue_free2(hq, free);
+
+    return 0;
+}
+
diff --git a/c_src/cq/cq_nif.c b/c_src/cq/cq_nif.c
deleted file mode 100644
index 2f26a20..0000000
--- a/c_src/cq/cq_nif.c
+++ /dev/null
@@ -1,564 +0,0 @@
-#include <stdio.h>
-#include <unistd.h>
-
-#include "erl_nif.h"
-#include "cq_nif.h"
-
-
-/* #ifndef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
-# error Requires dirty schedulers
-#endif */
-
-
-
-
-
-ERL_NIF_TERM
-mk_atom(ErlNifEnv* env, const char* atom)
-{
-    ERL_NIF_TERM ret;
-
-    if(!enif_make_existing_atom(env, atom, &ret, ERL_NIF_LATIN1))
-        return enif_make_atom(env, atom);
-
-    return ret;
-}
-
-ERL_NIF_TERM
-mk_error(ErlNifEnv* env, const char* mesg)
-{
-    return enif_make_tuple2(env, mk_atom(env, "error"), mk_atom(env, mesg));
-}
-
-
-static ERL_NIF_TERM
-queue_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
-{
-    cq_t *q = enif_alloc_resource(CQ_RESOURCE, sizeof(cq_t));
-    if (q == NULL)
-        return mk_error(env, "priv_alloc_error");
-
-    ERL_NIF_TERM ret = enif_make_resource(env, q);
-    /* enif_release_resource(ret); */
-
-    uint32_t queue_id = 0;
-    uint32_t queue_size = 0;
-    uint32_t overflow_size = 0;
-
-    if (!enif_get_uint(env, argv[0], &queue_id) ||
-        !enif_get_uint(env, argv[1], &queue_size) ||
-        !enif_get_uint(env, argv[2], &overflow_size))
-        return mk_error(env, "badarg");
-
-    if (queue_id > 8)
-        return mk_error(env, "bad_queue_id");
-
-    /* TODO: Check that queue_size is power of 2 */
-
-    if (QUEUES[queue_id] != NULL)
-        return mk_error(env, "queue_id_already_exists");
-
-    q->id             = queue_id;
-    q->queue_size     = queue_size;
-    q->overflow_size  = overflow_size;
-    q->tail           = 0;
-    q->head           = 0;
-    q->slots_states   = calloc(q->queue_size, CACHE_LINE_SIZE);
-    q->slots_terms    = calloc(q->queue_size, CACHE_LINE_SIZE);
-    q->slots_envs     = calloc(q->queue_size, CACHE_LINE_SIZE);
-    q->overflow_terms = calloc(q->overflow_size, CACHE_LINE_SIZE);
-    q->overflow_envs  = calloc(q->queue_size, CACHE_LINE_SIZE);
-
-    q->push_queue = new_queue();
-    q->pop_queue = new_queue();
-
-    /* TODO: Check calloc return */
-
-
-    for (int i = 0; i < q->queue_size; i++) {
-        ErlNifEnv *slot_env = enif_alloc_env();
-
-        q->slots_envs[i*CACHE_LINE_SIZE] = slot_env;
-        //q->overflow_envs[i*CACHE_LINE_SIZE] = (ErlNifEnv *) enif_alloc_env();
-    }
-
-    QUEUES[q->id] = q;
-
-    return enif_make_tuple2(env, mk_atom(env, "ok"), ret);
-}
-
-
-static ERL_NIF_TERM
-queue_free(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
-{
-    uint32_t queue_id = 0;
-
-    if (!enif_get_uint(env, argv[0], &queue_id))
-        return mk_error(env, "badarg");
-
-    if (queue_id > 8)
-        return mk_error(env, "badarg");
-
-    cq_t *q = QUEUES[queue_id];
-    if (q == NULL)
-        return mk_error(env, "bad_queue_id");
-
-
-    /* TODO: Free all the things! */
-    QUEUES[queue_id] = NULL;
-
-    return enif_make_atom(env, "ok");
-
-}
-
-/* Push to the head of the queue. */
-static ERL_NIF_TERM
-queue_push(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
-{
-    uint32_t queue_id = 0;
-
-    if (!enif_get_uint(env, argv[0], &queue_id))
-        return mk_error(env, "badarg");
-
-    if (queue_id > 8)
-        return mk_error(env, "badarg");
-
-    /* Load the queue */
-    cq_t *q = QUEUES[queue_id];
-    if (q == NULL)
-        return mk_error(env, "bad_queue_id");
-
-    if (q->id != queue_id)
-        return mk_error(env, "not_identical_queue_id");
-
-
-    for (int i = 0; i < q->queue_size; i++) {
-        fprintf(stderr, "queue slot %d, index %d, state %d\n",
-                i, i*CACHE_LINE_SIZE, q->slots_states[i*CACHE_LINE_SIZE]);
-    }
-
-    /* If there's consumers waiting, the queue must be empty and we
-       should directly pick a consumer to notify. */
-
-    ErlNifPid *waiting_consumer;
-    int dequeue_ret = dequeue(q->pop_queue, &waiting_consumer);
-    if (dequeue_ret) {
-        ErlNifEnv *msg_env = enif_alloc_env();
-        ERL_NIF_TERM copy = enif_make_copy(msg_env, argv[1]);
-        ERL_NIF_TERM tuple = enif_make_tuple2(msg_env, mk_atom(env, "pop"), copy);
-
-        if (enif_send(env, waiting_consumer, msg_env, tuple)) {
-            enif_free_env(msg_env);
-            return mk_atom(env, "ok");
-        } else {
-            return mk_error(env, "notify_failed");
-        }
-    }
-
-
-
-    /* Increment head and attempt to claim the slot by marking it as
-       busy. This ensures no other thread will attempt to modify this
-       slot. If we cannot lock it, another thread must have */
-
-    uint64_t head = __sync_add_and_fetch(&q->head, 1);
-    size_t size = q->queue_size;
-
-    while (1) {
-        uint64_t index = SLOT_INDEX(head, size);
-        uint64_t ret = __sync_val_compare_and_swap(&q->slots_states[index],
-                                                   STATE_EMPTY,
-                                                   STATE_WRITE);
-
-        switch (ret) {
-
-        case STATE_EMPTY:
-            head = __sync_add_and_fetch(&q->head, 1);
-
-        case STATE_WRITE:
-            /* We acquired the write lock, go ahead with the write. */
-            break;
-
-        case STATE_FULL:
-            /* We have caught up with the tail and the buffer is
-               full. Block the producer until a consumer reads the
-               item. */
-            return mk_error(env, "full_not_implemented");
-        }
-    }
-
-    /* If head catches up with tail, the queue is full. Add to
-       overflow instead */
-
-
-    /* Copy term to slot-specific temporary process env. */
-    ERL_NIF_TERM copy = enif_make_copy(q->slots_envs[SLOT_INDEX(head, size)], argv[1]);
-    q->slots_terms[SLOT_INDEX(head, size)] = copy;
-
-    __sync_synchronize(); /* Or compiler memory barrier? */
-
-
-    /* TODO: Do we need to collect garbage? */
-
-
-    /* Mark the slot ready to be consumed */
-    if (__sync_bool_compare_and_swap(&q->slots_states[SLOT_INDEX(head, size)],
-                                     STATE_WRITE,
-                                     STATE_FULL)) {
-        return mk_atom(env, "ok");
-    } else {
-        return mk_error(env, "could_not_update_slots_after_insert");
-    }
-
-}
-
-
-
-static ERL_NIF_TERM
-queue_async_pop(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
-{
-    /* Load queue */
-
-    uint32_t queue_id = 0;
-
-    if (!enif_get_uint(env, argv[0], &queue_id))
-        return mk_error(env, "badarg");
-
-    if (queue_id > 8)
-        return mk_error(env, "badarg");
-
-    cq_t *q = QUEUES[queue_id];
-    if (q == NULL)
-        return mk_error(env, "bad_queue_id");
-
-    if (q->id != queue_id)
-        return mk_error(env, "not_identical_queue_id");
-
-    uint64_t qsize = q->queue_size;
-    uint64_t tail = q->tail;
-    uint64_t num_busy = 0;
-
-    /* Walk the buffer starting the tail position until we are either
-       able to consume a term or find an empty slot. */
-    while (1) {
-        uint64_t index = SLOT_INDEX(tail, qsize);
-        uint64_t ret = __sync_val_compare_and_swap(&q->slots_states[index],
-                                                   STATE_FULL,
-                                                   STATE_READ);
-
-        if (ret == STATE_READ) {
-            /* We were able to mark the term as read in progress. We
-               now have an exclusive lock. */
-            break;
-
-        } else if (ret == STATE_WRITE) {
-            /* We found an item with a write in progress. If that
-               thread progresses, it will eventually mark the slot as
-               full. We can spin until that happens.
-
-               This can take an arbitrary amount of time and multiple
-               reading threads will compete for the same slot.
-
-               Instead we add the caller to the queue of blocking
-               consumers. When the next producer comes it will "help"
-               this thread by calling enif_send on the current
-               in-progress term *and* handle it's own terms. If
-               there's no new push to the queue, this will block
-               forever. */
-            return mk_atom(env, "write_in_progress_not_implemented");
-
-        } else if (ret == STATE_EMPTY) {
-            /* We found an empty item. Queue must be empty. Add
-               calling Erlang consumer process to queue of waiting
-               processes. When the next producer comes along, it first
-               checks the waiting consumers and calls enif_send
-               instead of writing to the slots. */
-
-            ErlNifPid *pid = enif_alloc(sizeof(ErlNifPid));
-            pid = enif_self(env, pid);
-            enqueue(q->pop_queue, pid);
-
-            return mk_atom(env, "wait_for_msg");
-
-        } else {
-            tail = __sync_add_and_fetch(&q->tail, 1);
-        }
-    }
-
-
-    /* Copy term into calling process env. The NIF env can now be
-       gargbage collected. */
-    ERL_NIF_TERM copy = enif_make_copy(env, q->slots_terms[SLOT_INDEX(tail, qsize)]);
-
-
-    /* Mark the slot as free. Note: We don't increment the tail
-       position here, as another thread also walking the buffer might
-       have incremented it multiple times */
-    q->slots_terms[SLOT_INDEX(tail, qsize)] = 0;
-    if (__sync_bool_compare_and_swap(&q->slots_states[SLOT_INDEX(tail, qsize)],
-                                     STATE_READ,
-                                     STATE_EMPTY)) {
-        return enif_make_tuple2(env, mk_atom(env, "ok"), copy);
-    } else {
-        return mk_error(env, "could_not_update_slots_after_pop");
-    }
-}
-
-
-static ERL_NIF_TERM
-queue_debug(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
-{
-
-    uint32_t queue_id = 0;
-
-    if (!enif_get_uint(env, argv[0], &queue_id))
-        return mk_error(env, "badarg");
-
-    if (queue_id > 8)
-        return mk_error(env, "badarg");
-
-    cq_t *q = QUEUES[queue_id];
-    if (q == NULL)
-        return mk_error(env, "bad_queue_id");
-
-
-
-    ERL_NIF_TERM *slots_states = enif_alloc(sizeof(ERL_NIF_TERM) * q->queue_size);
-    ERL_NIF_TERM *slots_terms = enif_alloc(sizeof(ERL_NIF_TERM) * q->queue_size);
-    for (int i = 0; i < q->queue_size; i++) {
-        slots_states[i] = enif_make_int(env, q->slots_states[i * CACHE_LINE_SIZE]);
-
-        if (q->slots_terms[i * CACHE_LINE_SIZE] == 0) {
-            slots_terms[i] = mk_atom(env, "null");
-        } else {
-            slots_terms[i] = enif_make_copy(env, q->slots_terms[i * CACHE_LINE_SIZE]);
-        }
-    }
-    return enif_make_tuple4(env,
-                            enif_make_uint64(env, q->tail),
-                            enif_make_uint64(env, q->head),
-                            enif_make_list_from_array(env, slots_states, q->queue_size),
-                            enif_make_list_from_array(env, slots_terms, q->queue_size));
-}
-
-static ERL_NIF_TERM
-queue_debug_poppers(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
-{
-    uint32_t queue_id = 0;
-
-    if (!enif_get_uint(env, argv[0], &queue_id))
-        return mk_error(env, "badarg");
-
-    if (queue_id > 8)
-        return mk_error(env, "badarg");
-
-    cq_t *q = QUEUES[queue_id];
-    if (q == NULL)
-        return mk_error(env, "bad_queue_id");
-
-
-    uint64_t pop_queue_size = 0;
-    cq_node_t *node = q->pop_queue->head;
-    if (node->value == NULL) {
-        node = node->next;
-        node = Q_PTR(node);
-    }
-
-    while (node != NULL) {
-        pop_queue_size++;
-        node = node->next;
-        node = Q_PTR(node);
-    }
-
-    ERL_NIF_TERM *pop_queue_pids = enif_alloc(sizeof(ERL_NIF_TERM) * pop_queue_size);
-
-    node = q->pop_queue->head;
-    node = Q_PTR(node);
-    if (node->value == NULL) {
-        node = node->next;
-        node = Q_PTR(node);
-    }
-
-    uint64_t i = 0;
-    while (node != NULL) {
-        if (node->value == 0) {
-            pop_queue_pids[i] = mk_atom(env, "null");
-        }
-        else {
-            pop_queue_pids[i] = enif_make_pid(env, node->value);
-        }
-
-        i++;
-        node = node->next;
-        node = Q_PTR(node);
-    }
-
-    ERL_NIF_TERM list = enif_make_list_from_array(env, pop_queue_pids, pop_queue_size);
-    enif_free(pop_queue_pids);
-
-    return list;
-}
-
-
-
-static ERL_NIF_TERM
-print_bits(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
-{
-
-    uint64_t *p1 = malloc(8);
-    *p1 = 0;
-
-
-    for (int bit = 63; bit >= 0; bit--) {
-        uint64_t power = 1 << bit;
-        //uint64_t byte = *p1;
-        uint64_t byte = p1;
-        fprintf(stderr, "%d", (byte & power) >> bit);
-    }
-    fprintf(stderr, "\n");
-
-    //enif_free(p1);
-
-    return mk_atom(env, "ok");
-}
-
-void free_resource(ErlNifEnv* env, void* arg)
-{
-    //cq_t *cq = (cq_t *) arg;
-
-    fprintf(stderr, "free_resource\n");
-}
-
-
-cq_queue_t * new_queue()
-{
-    cq_queue_t *queue = enif_alloc(sizeof(cq_queue_t));
-    cq_node_t *node = enif_alloc(sizeof(cq_node_t));
-    node->next = NULL;
-    //node->env = NULL;
-    node->value = NULL;
-    queue->head = node;
-    queue->tail = node;
-
-    return queue;
-}
-
-
-
-void enqueue(cq_queue_t *queue, ErlNifPid *pid)
-{
-    cq_node_t *node = enif_alloc(sizeof(cq_node_t));
-    //node->env = enif_alloc_env();
-    //node->term = enif_make_copy(node->env, term);
-    node->value = pid;
-    node->next = NULL;
-    fprintf(stderr, "node %lu\n", node);
-
-    cq_node_t *tail = NULL;
-    uint64_t tail_count = 0;
-    while (1) {
-        tail = queue->tail;
-        cq_node_t *tail_ptr = Q_PTR(tail);
-        tail_count = Q_COUNT(tail);
-
-        cq_node_t *next = tail->next;
-        cq_node_t *next_ptr = Q_PTR(next);
-        uint64_t next_count = Q_COUNT(next);
-
-        if (tail == queue->tail) {
-            fprintf(stderr, "tail == queue->tail\n");
-            if (next_ptr == NULL) {
-                fprintf(stderr, "next_ptr == NULL\n");
-                if (__sync_bool_compare_and_swap(&tail_ptr->next,
-                                                 next,
-                                                 Q_SET_COUNT(node, next_count+1)))
-                    fprintf(stderr, "CAS(tail_ptr->next, next, (node, next_count+1)) -> true\n");
-                    break;
-            } else {
-                __sync_bool_compare_and_swap(&queue->tail,
-                                             tail,
-                                             Q_SET_COUNT(next_ptr, next_count+1));
-                    fprintf(stderr, "CAS(queue->tail, tail, (next_ptr, next_count+1))\n");
-            }
-        }
-    }
-
-    cq_node_t *node_with_count = Q_SET_COUNT(node, tail_count+1);
-    int ret = __sync_bool_compare_and_swap(&queue->tail,
-                                           tail,
-                                           node_with_count);
-    fprintf(stderr, "CAS(queue->tail, tail, %lu) -> %d\n", node_with_count, ret);
-}
-
-
-int dequeue(cq_queue_t *queue, ErlNifPid **pid)
-{
-    fprintf(stderr, "dequeue\n");
-    cq_node_t *head, *head_ptr, *tail, *tail_ptr, *next, *next_ptr;
-
-    while (1) {
-        head = queue->head;
-        head_ptr = Q_PTR(head);
-        tail = queue->tail;
-        tail_ptr = Q_PTR(tail);
-        next = head->next;
-        next_ptr = Q_PTR(next);
-        fprintf(stderr, "head %lu, tail %lu, next %lu\n", head, tail, next);
-
-        if (head == queue->head) {
-            if (head_ptr == tail_ptr) {
-                if (next_ptr == NULL) {
-                    return 0; /* Queue is empty */
-                }
-                fprintf(stderr, "CAS(queue->tail, tail, (next_ptr, tail+1))\n");
-                __sync_bool_compare_and_swap(&queue->tail,
-                                             tail,
-                                             Q_SET_COUNT(next_ptr, Q_COUNT(tail)+1));
-            } else {
-                fprintf(stderr, "next->value %lu\n", next_ptr->value);
-                *pid = next_ptr->value;
-                fprintf(stderr, "CAS(queue->head, head, (next_ptr, head+1))\n");
-                if (__sync_bool_compare_and_swap(&queue->head,
-                                                 head,
-                                                 Q_SET_COUNT(next_ptr, Q_COUNT(head)+1)))
-                    break;
-            }
-        }
-    }
-    // free pid
-    //enif_free(Q_PTR(head));
-    return 1;
-}
-
-
-
-
-int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info) {
-    /* Initialize global array mapping id to cq_t ptr */
-    QUEUES = (cq_t **) calloc(8, sizeof(cq_t **));
-    if (QUEUES == NULL)
-        return -1;
-
-
-    ErlNifResourceFlags flags = (ErlNifResourceFlags)(ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER);
-    CQ_RESOURCE = enif_open_resource_type(env, "cq", "cq",
-                                          &free_resource, flags, NULL);
-
-    if (CQ_RESOURCE == NULL)
-        return -1;
-
-    return 0;
-}
-
-
-static ErlNifFunc nif_funcs[] = {
-    {"new"      , 3, queue_new},
-    {"free"     , 1, queue_free},
-    {"push"     , 2, queue_push},
-    {"async_pop", 1, queue_async_pop},
-    {"debug"    , 1, queue_debug},
-    {"debug_poppers", 1, queue_debug_poppers},
-    {"print_bits", 0, print_bits}
-};
-
-ERL_NIF_INIT(cq, nif_funcs, load, NULL, NULL, NULL);
diff --git a/c_src/cq/cq_nif.h b/c_src/cq/cq_nif.h
deleted file mode 100644
index 75f8891..0000000
--- a/c_src/cq/cq_nif.h
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <stdint.h>
-#include "erl_nif.h"
-
-
-#define CACHE_LINE_SIZE 64
-
-#define SLOT_INDEX(__index, __size) __index & (__size - 1)
-
-#define Q_MASK 3L
-#define Q_PTR(__ptr) (cq_node_t *) (((uint64_t)__ptr) & (~Q_MASK))
-#define Q_COUNT(__ptr) ((uint64_t) __ptr & Q_MASK)
-#define Q_SET_COUNT(__ptr, __val) (cq_node_t *) ((uint64_t) __ptr | (__val & Q_MASK))
-
-
-#define STATE_EMPTY 0
-#define STATE_WRITE 1
-#define STATE_READ  2
-#define STATE_FULL  3
-
-
-ErlNifResourceType* CQ_RESOURCE;
-
-typedef struct cq_node cq_node_t;
-
-struct cq_node {
-    ErlNifEnv *env;
-    //ERL_NIF_TERM term;
-    ErlNifPid *value;
-    cq_node_t *next;
-};
-
-
-
-typedef struct cq_queue {
-    cq_node_t *head;
-    cq_node_t *tail;
-} cq_queue_t;
-
-
-// TODO: Add padding between the fields
-typedef struct cq {
-    uint32_t id;
-    uint64_t queue_size;
-    uint64_t overflow_size;
-    uint64_t head;
-    uint64_t tail;
-
-    uint8_t       *slots_states;
-    ERL_NIF_TERM  *slots_terms;
-    ErlNifEnv    **slots_envs;
-
-    cq_queue_t *push_queue;
-    cq_queue_t *pop_queue;
-
-    uint8_t       *overflow_states;
-    ERL_NIF_TERM  *overflow_terms;
-    ErlNifEnv    **overflow_envs;
-
-} cq_t;
-
-cq_t **QUEUES = NULL; /* Initialized on nif load */
-
-
-ERL_NIF_TERM mk_atom(ErlNifEnv* env, const char* atom);
-ERL_NIF_TERM mk_error(ErlNifEnv* env, const char* msg);
-int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info);
-void free_resource(ErlNifEnv*, void*);
-
-
-cq_queue_t* new_queue(void);
-void enqueue(cq_queue_t *q, ErlNifPid *pid);
diff --git a/c_src/cq1/cq_nif.c b/c_src/cq1/cq_nif.c
deleted file mode 100644
index 2f26a20..0000000
--- a/c_src/cq1/cq_nif.c
+++ /dev/null
@@ -1,564 +0,0 @@
-#include <stdio.h>
-#include <unistd.h>
-
-#include "erl_nif.h"
-#include "cq_nif.h"
-
-
-/* #ifndef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
-# error Requires dirty schedulers
-#endif */
-
-
-
-
-
-ERL_NIF_TERM
-mk_atom(ErlNifEnv* env, const char* atom)
-{
-    ERL_NIF_TERM ret;
-
-    if(!enif_make_existing_atom(env, atom, &ret, ERL_NIF_LATIN1))
-        return enif_make_atom(env, atom);
-
-    return ret;
-}
-
-ERL_NIF_TERM
-mk_error(ErlNifEnv* env, const char* mesg)
-{
-    return enif_make_tuple2(env, mk_atom(env, "error"), mk_atom(env, mesg));
-}
-
-
-static ERL_NIF_TERM
-queue_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
-{
-    cq_t *q = enif_alloc_resource(CQ_RESOURCE, sizeof(cq_t));
-    if (q == NULL)
-        return mk_error(env, "priv_alloc_error");
-
-    ERL_NIF_TERM ret = enif_make_resource(env, q);
-    /* enif_release_resource(ret); */
-
-    uint32_t queue_id = 0;
-    uint32_t queue_size = 0;
-    uint32_t overflow_size = 0;
-
-    if (!enif_get_uint(env, argv[0], &queue_id) ||
-        !enif_get_uint(env, argv[1], &queue_size) ||
-        !enif_get_uint(env, argv[2], &overflow_size))
-        return mk_error(env, "badarg");
-
-    if (queue_id > 8)
-        return mk_error(env, "bad_queue_id");
-
-    /* TODO: Check that queue_size is power of 2 */
-
-    if (QUEUES[queue_id] != NULL)
-        return mk_error(env, "queue_id_already_exists");
-
-    q->id             = queue_id;
-    q->queue_size     = queue_size;
-    q->overflow_size  = overflow_size;
-    q->tail           = 0;
-    q->head           = 0;
-    q->slots_states   = calloc(q->queue_size, CACHE_LINE_SIZE);
-    q->slots_terms    = calloc(q->queue_size, CACHE_LINE_SIZE);
-    q->slots_envs     = calloc(q->queue_size, CACHE_LINE_SIZE);
-    q->overflow_terms = calloc(q->overflow_size, CACHE_LINE_SIZE);
-    q->overflow_envs  = calloc(q->queue_size, CACHE_LINE_SIZE);
-
-    q->push_queue = new_queue();
-    q->pop_queue = new_queue();
-
-    /* TODO: Check calloc return */
-
-
-    for (int i = 0; i < q->queue_size; i++) {
-        ErlNifEnv *slot_env = enif_alloc_env();
-
-        q->slots_envs[i*CACHE_LINE_SIZE] = slot_env;
-        //q->overflow_envs[i*CACHE_LINE_SIZE] = (ErlNifEnv *) enif_alloc_env();
-    }
-
-    QUEUES[q->id] = q;
-
-    return enif_make_tuple2(env, mk_atom(env, "ok"), ret);
-}
-
-
-static ERL_NIF_TERM
-queue_free(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
-{
-    uint32_t queue_id = 0;
-
-    if (!enif_get_uint(env, argv[0], &queue_id))
-        return mk_error(env, "badarg");
-
-    if (queue_id > 8)
-        return mk_error(env, "badarg");
-
-    cq_t *q = QUEUES[queue_id];
-    if (q == NULL)
-        return mk_error(env, "bad_queue_id");
-
-
-    /* TODO: Free all the things! */
-    QUEUES[queue_id] = NULL;
-
-    return enif_make_atom(env, "ok");
-
-}
-
-/* Push to the head of the queue. */
-static ERL_NIF_TERM
-queue_push(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
-{
-    uint32_t queue_id = 0;
-
-    if (!enif_get_uint(env, argv[0], &queue_id))
-        return mk_error(env, "badarg");
-
-    if (queue_id > 8)
-        return mk_error(env, "badarg");
-
-    /* Load the queue */
-    cq_t *q = QUEUES[queue_id];
-    if (q == NULL)
-        return mk_error(env, "bad_queue_id");
-
-    if (q->id != queue_id)
-        return mk_error(env, "not_identical_queue_id");
-
-
-    for (int i = 0; i < q->queue_size; i++) {
-        fprintf(stderr, "queue slot %d, index %d, state %d\n",
-                i, i*CACHE_LINE_SIZE, q->slots_states[i*CACHE_LINE_SIZE]);
-    }
-
-    /* If there's consumers waiting, the queue must be empty and we
-       should directly pick a consumer to notify. */
-
-    ErlNifPid *waiting_consumer;
-    int dequeue_ret = dequeue(q->pop_queue, &waiting_consumer);
-    if (dequeue_ret) {
-        ErlNifEnv *msg_env = enif_alloc_env();
-        ERL_NIF_TERM copy = enif_make_copy(msg_env, argv[1]);
-        ERL_NIF_TERM tuple = enif_make_tuple2(msg_env, mk_atom(env, "pop"), copy);
-
-        if (enif_send(env, waiting_consumer, msg_env, tuple)) {
-            enif_free_env(msg_env);
-            return mk_atom(env, "ok");
-        } else {
-            return mk_error(env, "notify_failed");
-        }
-    }
-
-
-
-    /* Increment head and attempt to claim the slot by marking it as
-       busy. This ensures no other thread will attempt to modify this
-       slot. If we cannot lock it, another thread must have */
-
-    uint64_t head = __sync_add_and_fetch(&q->head, 1);
-    size_t size = q->queue_size;
-
-    while (1) {
-        uint64_t index = SLOT_INDEX(head, size);
-        uint64_t ret = __sync_val_compare_and_swap(&q->slots_states[index],
-                                                   STATE_EMPTY,
-                                                   STATE_WRITE);
-
-        switch (ret) {
-
-        case STATE_EMPTY:
-            head = __sync_add_and_fetch(&q->head, 1);
-
-        case STATE_WRITE:
-            /* We acquired the write lock, go ahead with the write. */
-            break;
-
-        case STATE_FULL:
-            /* We have caught up with the tail and the buffer is
-               full. Block the producer until a consumer reads the
-               item. */
-            return mk_error(env, "full_not_implemented");
-        }
-    }
-
-    /* If head catches up with tail, the queue is full. Add to
-       overflow instead */
-
-
-    /* Copy term to slot-specific temporary process env. */
-    ERL_NIF_TERM copy = enif_make_copy(q->slots_envs[SLOT_INDEX(head, size)], argv[1]);
-    q->slots_terms[SLOT_INDEX(head, size)] = copy;
-
-    __sync_synchronize(); /* Or compiler memory barrier? */
-
-
-    /* TODO: Do we need to collect garbage? */
-
-
-    /* Mark the slot ready to be consumed */
-    if (__sync_bool_compare_and_swap(&q->slots_states[SLOT_INDEX(head, size)],
-                                     STATE_WRITE,
-                                     STATE_FULL)) {
-        return mk_atom(env, "ok");
-    } else {
-        return mk_error(env, "could_not_update_slots_after_insert");
-    }
-
-}
-
-
-
-static ERL_NIF_TERM
-queue_async_pop(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
-{
-    /* Load queue */
-
-    uint32_t queue_id = 0;
-
-    if (!enif_get_uint(env, argv[0], &queue_id))
-        return mk_error(env, "badarg");
-
-    if (queue_id > 8)
-        return mk_error(env, "badarg");
-
-    cq_t *q = QUEUES[queue_id];
-    if (q == NULL)
-        return mk_error(env, "bad_queue_id");
-
-    if (q->id != queue_id)
-        return mk_error(env, "not_identical_queue_id");
-
-    uint64_t qsize = q->queue_size;
-    uint64_t tail = q->tail;
-    uint64_t num_busy = 0;
-
-    /* Walk the buffer starting the tail position until we are either
-       able to consume a term or find an empty slot. */
-    while (1) {
-        uint64_t index = SLOT_INDEX(tail, qsize);
-        uint64_t ret = __sync_val_compare_and_swap(&q->slots_states[index],
-                                                   STATE_FULL,
-                                                   STATE_READ);
-
-        if (ret == STATE_READ) {
-            /* We were able to mark the term as read in progress. We
-               now have an exclusive lock. */
-            break;
-
-        } else if (ret == STATE_WRITE) {
-            /* We found an item with a write in progress. If that
-               thread progresses, it will eventually mark the slot as
-               full. We can spin until that happens.
-
-               This can take an arbitrary amount of time and multiple
-               reading threads will compete for the same slot.
-
-               Instead we add the caller to the queue of blocking
-               consumers. When the next producer comes it will "help"
-               this thread by calling enif_send on the current
-               in-progress term *and* handle it's own terms. If
-               there's no new push to the queue, this will block
-               forever. */
-            return mk_atom(env, "write_in_progress_not_implemented");
-
-        } else if (ret == STATE_EMPTY) {
-            /* We found an empty item. Queue must be empty. Add
-               calling Erlang consumer process to queue of waiting
-               processes. When the next producer comes along, it first
-               checks the waiting consumers and calls enif_send
-               instead of writing to the slots. */
-
-            ErlNifPid *pid = enif_alloc(sizeof(ErlNifPid));
-            pid = enif_self(env, pid);
-            enqueue(q->pop_queue, pid);
-
-            return mk_atom(env, "wait_for_msg");
-
-        } else {
-            tail = __sync_add_and_fetch(&q->tail, 1);
-        }
-    }
-
-
-    /* Copy term into calling process env. The NIF env can now be
-       gargbage collected. */
-    ERL_NIF_TERM copy = enif_make_copy(env, q->slots_terms[SLOT_INDEX(tail, qsize)]);
-
-
-    /* Mark the slot as free. Note: We don't increment the tail
-       position here, as another thread also walking the buffer might
-       have incremented it multiple times */
-    q->slots_terms[SLOT_INDEX(tail, qsize)] = 0;
-    if (__sync_bool_compare_and_swap(&q->slots_states[SLOT_INDEX(tail, qsize)],
-                                     STATE_READ,
-                                     STATE_EMPTY)) {
-        return enif_make_tuple2(env, mk_atom(env, "ok"), copy);
-    } else {
-        return mk_error(env, "could_not_update_slots_after_pop");
-    }
-}
-
-
-static ERL_NIF_TERM
-queue_debug(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
-{
-
-    uint32_t queue_id = 0;
-
-    if (!enif_get_uint(env, argv[0], &queue_id))
-        return mk_error(env, "badarg");
-
-    if (queue_id > 8)
-        return mk_error(env, "badarg");
-
-    cq_t *q = QUEUES[queue_id];
-    if (q == NULL)
-        return mk_error(env, "bad_queue_id");
-
-
-
-    ERL_NIF_TERM *slots_states = enif_alloc(sizeof(ERL_NIF_TERM) * q->queue_size);
-    ERL_NIF_TERM *slots_terms = enif_alloc(sizeof(ERL_NIF_TERM) * q->queue_size);
-    for (int i = 0; i < q->queue_size; i++) {
-        slots_states[i] = enif_make_int(env, q->slots_states[i * CACHE_LINE_SIZE]);
-
-        if (q->slots_terms[i * CACHE_LINE_SIZE] == 0) {
-            slots_terms[i] = mk_atom(env, "null");
-        } else {
-            slots_terms[i] = enif_make_copy(env, q->slots_terms[i * CACHE_LINE_SIZE]);
-        }
-    }
-    return enif_make_tuple4(env,
-                            enif_make_uint64(env, q->tail),
-                            enif_make_uint64(env, q->head),
-                            enif_make_list_from_array(env, slots_states, q->queue_size),
-                            enif_make_list_from_array(env, slots_terms, q->queue_size));
-}
-
-static ERL_NIF_TERM
-queue_debug_poppers(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
-{
-    uint32_t queue_id = 0;
-
-    if (!enif_get_uint(env, argv[0], &queue_id))
-        return mk_error(env, "badarg");
-
-    if (queue_id > 8)
-        return mk_error(env, "badarg");
-
-    cq_t *q = QUEUES[queue_id];
-    if (q == NULL)
-        return mk_error(env, "bad_queue_id");
-
-
-    uint64_t pop_queue_size = 0;
-    cq_node_t *node = q->pop_queue->head;
-    if (node->value == NULL) {
-        node = node->next;
-        node = Q_PTR(node);
-    }
-
-    while (node != NULL) {
-        pop_queue_size++;
-        node = node->next;
-        node = Q_PTR(node);
-    }
-
-    ERL_NIF_TERM *pop_queue_pids = enif_alloc(sizeof(ERL_NIF_TERM) * pop_queue_size);
-
-    node = q->pop_queue->head;
-    node = Q_PTR(node);
-    if (node->value == NULL) {
-        node = node->next;
-        node = Q_PTR(node);
-    }
-
-    uint64_t i = 0;
-    while (node != NULL) {
-        if (node->value == 0) {
-            pop_queue_pids[i] = mk_atom(env, "null");
-        }
-        else {
-            pop_queue_pids[i] = enif_make_pid(env, node->value);
-        }
-
-        i++;
-        node = node->next;
-        node = Q_PTR(node);
-    }
-
-    ERL_NIF_TERM list = enif_make_list_from_array(env, pop_queue_pids, pop_queue_size);
-    enif_free(pop_queue_pids);
-
-    return list;
-}
-
-
-
-static ERL_NIF_TERM
-print_bits(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
-{
-
-    uint64_t *p1 = malloc(8);
-    *p1 = 0;
-
-
-    for (int bit = 63; bit >= 0; bit--) {
-        uint64_t power = 1 << bit;
-        //uint64_t byte = *p1;
-        uint64_t byte = p1;
-        fprintf(stderr, "%d", (byte & power) >> bit);
-    }
-    fprintf(stderr, "\n");
-
-    //enif_free(p1);
-
-    return mk_atom(env, "ok");
-}
-
-void free_resource(ErlNifEnv* env, void* arg)
-{
-    //cq_t *cq = (cq_t *) arg;
-
-    fprintf(stderr, "free_resource\n");
-}
-
-
-cq_queue_t * new_queue()
-{
-    cq_queue_t *queue = enif_alloc(sizeof(cq_queue_t));
-    cq_node_t *node = enif_alloc(sizeof(cq_node_t));
-    node->next = NULL;
-    //node->env = NULL;
-    node->value = NULL;
-    queue->head = node;
-    queue->tail = node;
-
-    return queue;
-}
-
-
-
-void enqueue(cq_queue_t *queue, ErlNifPid *pid)
-{
-    cq_node_t *node = enif_alloc(sizeof(cq_node_t));
-    //node->env = enif_alloc_env();
-    //node->term = enif_make_copy(node->env, term);
-    node->value = pid;
-    node->next = NULL;
-    fprintf(stderr, "node %lu\n", node);
-
-    cq_node_t *tail = NULL;
-    uint64_t tail_count = 0;
-    while (1) {
-        tail = queue->tail;
-        cq_node_t *tail_ptr = Q_PTR(tail);
-        tail_count = Q_COUNT(tail);
-
-        cq_node_t *next = tail->next;
-        cq_node_t *next_ptr = Q_PTR(next);
-        uint64_t next_count = Q_COUNT(next);
-
-        if (tail == queue->tail) {
-            fprintf(stderr, "tail == queue->tail\n");
-            if (next_ptr == NULL) {
-                fprintf(stderr, "next_ptr == NULL\n");
-                if (__sync_bool_compare_and_swap(&tail_ptr->next,
-                                                 next,
-                                                 Q_SET_COUNT(node, next_count+1)))
-                    fprintf(stderr, "CAS(tail_ptr->next, next, (node, next_count+1)) -> true\n");
-                    break;
-            } else {
-                __sync_bool_compare_and_swap(&queue->tail,
-                                             tail,
-                                             Q_SET_COUNT(next_ptr, next_count+1));
-                    fprintf(stderr, "CAS(queue->tail, tail, (next_ptr, next_count+1))\n");
-            }
-        }
-    }
-
-    cq_node_t *node_with_count = Q_SET_COUNT(node, tail_count+1);
-    int ret = __sync_bool_compare_and_swap(&queue->tail,
-                                           tail,
-                                           node_with_count);
-    fprintf(stderr, "CAS(queue->tail, tail, %lu) -> %d\n", node_with_count, ret);
-}
-
-
-int dequeue(cq_queue_t *queue, ErlNifPid **pid)
-{
-    fprintf(stderr, "dequeue\n");
-    cq_node_t *head, *head_ptr, *tail, *tail_ptr, *next, *next_ptr;
-
-    while (1) {
-        head = queue->head;
-        head_ptr = Q_PTR(head);
-        tail = queue->tail;
-        tail_ptr = Q_PTR(tail);
-        next = head->next;
-        next_ptr = Q_PTR(next);
-        fprintf(stderr, "head %lu, tail %lu, next %lu\n", head, tail, next);
-
-        if (head == queue->head) {
-            if (head_ptr == tail_ptr) {
-                if (next_ptr == NULL) {
-                    return 0; /* Queue is empty */
-                }
-                fprintf(stderr, "CAS(queue->tail, tail, (next_ptr, tail+1))\n");
-                __sync_bool_compare_and_swap(&queue->tail,
-                                             tail,
-                                             Q_SET_COUNT(next_ptr, Q_COUNT(tail)+1));
-            } else {
-                fprintf(stderr, "next->value %lu\n", next_ptr->value);
-                *pid = next_ptr->value;
-                fprintf(stderr, "CAS(queue->head, head, (next_ptr, head+1))\n");
-                if (__sync_bool_compare_and_swap(&queue->head,
-                                                 head,
-                                                 Q_SET_COUNT(next_ptr, Q_COUNT(head)+1)))
-                    break;
-            }
-        }
-    }
-    // free pid
-    //enif_free(Q_PTR(head));
-    return 1;
-}
-
-
-
-
-int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info) {
-    /* Initialize global array mapping id to cq_t ptr */
-    QUEUES = (cq_t **) calloc(8, sizeof(cq_t **));
-    if (QUEUES == NULL)
-        return -1;
-
-
-    ErlNifResourceFlags flags = (ErlNifResourceFlags)(ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER);
-    CQ_RESOURCE = enif_open_resource_type(env, "cq", "cq",
-                                          &free_resource, flags, NULL);
-
-    if (CQ_RESOURCE == NULL)
-        return -1;
-
-    return 0;
-}
-
-
-static ErlNifFunc nif_funcs[] = {
-    {"new"      , 3, queue_new},
-    {"free"     , 1, queue_free},
-    {"push"     , 2, queue_push},
-    {"async_pop", 1, queue_async_pop},
-    {"debug"    , 1, queue_debug},
-    {"debug_poppers", 1, queue_debug_poppers},
-    {"print_bits", 0, print_bits}
-};
-
-ERL_NIF_INIT(cq, nif_funcs, load, NULL, NULL, NULL);
diff --git a/c_src/cq1/cq_nif.h b/c_src/cq1/cq_nif.h
deleted file mode 100644
index 75f8891..0000000
--- a/c_src/cq1/cq_nif.h
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <stdint.h>
-#include "erl_nif.h"
-
-
-#define CACHE_LINE_SIZE 64
-
-#define SLOT_INDEX(__index, __size) __index & (__size - 1)
-
-#define Q_MASK 3L
-#define Q_PTR(__ptr) (cq_node_t *) (((uint64_t)__ptr) & (~Q_MASK))
-#define Q_COUNT(__ptr) ((uint64_t) __ptr & Q_MASK)
-#define Q_SET_COUNT(__ptr, __val) (cq_node_t *) ((uint64_t) __ptr | (__val & Q_MASK))
-
-
-#define STATE_EMPTY 0
-#define STATE_WRITE 1
-#define STATE_READ  2
-#define STATE_FULL  3
-
-
-ErlNifResourceType* CQ_RESOURCE;
-
-typedef struct cq_node cq_node_t;
-
-struct cq_node {
-    ErlNifEnv *env;
-    //ERL_NIF_TERM term;
-    ErlNifPid *value;
-    cq_node_t *next;
-};
-
-
-
-typedef struct cq_queue {
-    cq_node_t *head;
-    cq_node_t *tail;
-} cq_queue_t;
-
-
-// TODO: Add padding between the fields
-typedef struct cq {
-    uint32_t id;
-    uint64_t queue_size;
-    uint64_t overflow_size;
-    uint64_t head;
-    uint64_t tail;
-
-    uint8_t       *slots_states;
-    ERL_NIF_TERM  *slots_terms;
-    ErlNifEnv    **slots_envs;
-
-    cq_queue_t *push_queue;
-    cq_queue_t *pop_queue;
-
-    uint8_t       *overflow_states;
-    ERL_NIF_TERM  *overflow_terms;
-    ErlNifEnv    **overflow_envs;
-
-} cq_t;
-
-cq_t **QUEUES = NULL; /* Initialized on nif load */
-
-
-ERL_NIF_TERM mk_atom(ErlNifEnv* env, const char* atom);
-ERL_NIF_TERM mk_error(ErlNifEnv* env, const char* msg);
-int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info);
-void free_resource(ErlNifEnv*, void*);
-
-
-cq_queue_t* new_queue(void);
-void enqueue(cq_queue_t *q, ErlNifPid *pid);
diff --git a/c_src/cq1/rebar.config b/c_src/cq1/rebar.config
deleted file mode 100644
index 6fd2f2c..0000000
--- a/c_src/cq1/rebar.config
+++ /dev/null
@@ -1,26 +0,0 @@
-{port_specs, [
-    {"../../priv/cq1.so", [
-        "*.c",
-        "*.cc"
-    ]}
-]}.
-
-%% {port_env, [
-%%     {"(linux|solaris|freebsd|netbsd|openbsd|dragonfly|darwin|gnu)",
-%%         "CFLAGS", "$CFLAGS -Ic_src/ -g -Wall -flto -Werror -O3"},
-%%     {"(linux|solaris|freebsd|netbsd|openbsd|dragonfly|darwin|gnu)",
-%%         "CXXFLAGS", "$CXXFLAGS -Ic_src/ -g -Wall -flto -Werror -O3"},
-%%
-%%     {"(linux|solaris|freebsd|netbsd|openbsd|dragonfly|darwin|gnu)",
-%%         "LDFLAGS", "$LDFLAGS -flto -lstdc++"},
-%%
-%%     %% OS X Leopard flags for 64-bit
-%%     {"darwin9.*-64$", "CXXFLAGS", "-m64"},
-%%     {"darwin9.*-64$", "LDFLAGS", "-arch x86_64"},
-%%
-%%     %% OS X Snow Leopard flags for 32-bit
-%%     {"darwin10.*-32$", "CXXFLAGS", "-m32"},
-%%     {"darwin10.*-32$", "LDFLAGS", "-arch i386"},
-%%
-%%     {"win32", "CXXFLAGS", "$CXXFLAGS /O2 /DNDEBUG"}
-%% ]}.
diff --git a/c_src/cq2/cq_nif.c b/c_src/cq2/cq_nif.c
deleted file mode 100644
index 2f26a20..0000000
--- a/c_src/cq2/cq_nif.c
+++ /dev/null
@@ -1,564 +0,0 @@
-#include <stdio.h>
-#include <unistd.h>
-
-#include "erl_nif.h"
-#include "cq_nif.h"
-
-
-/* #ifndef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
-# error Requires dirty schedulers
-#endif */
-
-
-
-
-
-ERL_NIF_TERM
-mk_atom(ErlNifEnv* env, const char* atom)
-{
-    ERL_NIF_TERM ret;
-
-    if(!enif_make_existing_atom(env, atom, &ret, ERL_NIF_LATIN1))
-        return enif_make_atom(env, atom);
-
-    return ret;
-}
-
-ERL_NIF_TERM
-mk_error(ErlNifEnv* env, const char* mesg)
-{
-    return enif_make_tuple2(env, mk_atom(env, "error"), mk_atom(env, mesg));
-}
-
-
-static ERL_NIF_TERM
-queue_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
-{
-    cq_t *q = enif_alloc_resource(CQ_RESOURCE, sizeof(cq_t));
-    if (q == NULL)
-        return mk_error(env, "priv_alloc_error");
-
-    ERL_NIF_TERM ret = enif_make_resource(env, q);
-    /* enif_release_resource(ret); */
-
-    uint32_t queue_id = 0;
-    uint32_t queue_size = 0;
-    uint32_t overflow_size = 0;
-
-    if (!enif_get_uint(env, argv[0], &queue_id) ||
-        !enif_get_uint(env, argv[1], &queue_size) ||
-        !enif_get_uint(env, argv[2], &overflow_size))
-        return mk_error(env, "badarg");
-
-    if (queue_id > 8)
-        return mk_error(env, "bad_queue_id");
-
-    /* TODO: Check that queue_size is power of 2 */
-
-    if (QUEUES[queue_id] != NULL)
-        return mk_error(env, "queue_id_already_exists");
-
-    q->id             = queue_id;
-    q->queue_size     = queue_size;
-    q->overflow_size  = overflow_size;
-    q->tail           = 0;
-    q->head           = 0;
-    q->slots_states   = calloc(q->queue_size, CACHE_LINE_SIZE);
-    q->slots_terms    = calloc(q->queue_size, CACHE_LINE_SIZE);
-    q->slots_envs     = calloc(q->queue_size, CACHE_LINE_SIZE);
-    q->overflow_terms = calloc(q->overflow_size, CACHE_LINE_SIZE);
-    q->overflow_envs  = calloc(q->queue_size, CACHE_LINE_SIZE);
-
-    q->push_queue = new_queue();
-    q->pop_queue = new_queue();
-
-    /* TODO: Check calloc return */
-
-
-    for (int i = 0; i < q->queue_size; i++) {
-        ErlNifEnv *slot_env = enif_alloc_env();
-
-        q->slots_envs[i*CACHE_LINE_SIZE] = slot_env;
-        //q->overflow_envs[i*CACHE_LINE_SIZE] = (ErlNifEnv *) enif_alloc_env();
-    }
-
-    QUEUES[q->id] = q;
-
-    return enif_make_tuple2(env, mk_atom(env, "ok"), ret);
-}
-
-
-static ERL_NIF_TERM
-queue_free(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
-{
-    uint32_t queue_id = 0;
-
-    if (!enif_get_uint(env, argv[0], &queue_id))
-        return mk_error(env, "badarg");
-
-    if (queue_id > 8)
-        return mk_error(env, "badarg");
-
-    cq_t *q = QUEUES[queue_id];
-    if (q == NULL)
-        return mk_error(env, "bad_queue_id");
-
-
-    /* TODO: Free all the things! */
-    QUEUES[queue_id] = NULL;
-
-    return enif_make_atom(env, "ok");
-
-}
-
-/* Push to the head of the queue. */
-static ERL_NIF_TERM
-queue_push(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
-{
-    uint32_t queue_id = 0;
-
-    if (!enif_get_uint(env, argv[0], &queue_id))
-        return mk_error(env, "badarg");
-
-    if (queue_id > 8)
-        return mk_error(env, "badarg");
-
-    /* Load the queue */
-    cq_t *q = QUEUES[queue_id];
-    if (q == NULL)
-        return mk_error(env, "bad_queue_id");
-
-    if (q->id != queue_id)
-        return mk_error(env, "not_identical_queue_id");
-
-
-    for (int i = 0; i < q->queue_size; i++) {
-        fprintf(stderr, "queue slot %d, index %d, state %d\n",
-                i, i*CACHE_LINE_SIZE, q->slots_states[i*CACHE_LINE_SIZE]);
-    }
-
-    /* If there's consumers waiting, the queue must be empty and we
-       should directly pick a consumer to notify. */
-
-    ErlNifPid *waiting_consumer;
-    int dequeue_ret = dequeue(q->pop_queue, &waiting_consumer);
-    if (dequeue_ret) {
-        ErlNifEnv *msg_env = enif_alloc_env();
-        ERL_NIF_TERM copy = enif_make_copy(msg_env, argv[1]);
-        ERL_NIF_TERM tuple = enif_make_tuple2(msg_env, mk_atom(env, "pop"), copy);
-
-        if (enif_send(env, waiting_consumer, msg_env, tuple)) {
-            enif_free_env(msg_env);
-            return mk_atom(env, "ok");
-        } else {
-            return mk_error(env, "notify_failed");
-        }
-    }
-
-
-
-    /* Increment head and attempt to claim the slot by marking it as
-       busy. This ensures no other thread will attempt to modify this
-       slot. If we cannot lock it, another thread must have */
-
-    uint64_t head = __sync_add_and_fetch(&q->head, 1);
-    size_t size = q->queue_size;
-
-    while (1) {
-        uint64_t index = SLOT_INDEX(head, size);
-        uint64_t ret = __sync_val_compare_and_swap(&q->slots_states[index],
-                                                   STATE_EMPTY,
-                                                   STATE_WRITE);
-
-        switch (ret) {
-
-        case STATE_EMPTY:
-            head = __sync_add_and_fetch(&q->head, 1);
-
-        case STATE_WRITE:
-            /* We acquired the write lock, go ahead with the write. */
-            break;
-
-        case STATE_FULL:
-            /* We have caught up with the tail and the buffer is
-               full. Block the producer until a consumer reads the
-               item. */
-            return mk_error(env, "full_not_implemented");
-        }
-    }
-
-    /* If head catches up with tail, the queue is full. Add to
-       overflow instead */
-
-
-    /* Copy term to slot-specific temporary process env. */
-    ERL_NIF_TERM copy = enif_make_copy(q->slots_envs[SLOT_INDEX(head, size)], argv[1]);
-    q->slots_terms[SLOT_INDEX(head, size)] = copy;
-
-    __sync_synchronize(); /* Or compiler memory barrier? */
-
-
-    /* TODO: Do we need to collect garbage? */
-
-
-    /* Mark the slot ready to be consumed */
-    if (__sync_bool_compare_and_swap(&q->slots_states[SLOT_INDEX(head, size)],
-                                     STATE_WRITE,
-                                     STATE_FULL)) {
-        return mk_atom(env, "ok");
-    } else {
-        return mk_error(env, "could_not_update_slots_after_insert");
-    }
-
-}
-
-
-
-static ERL_NIF_TERM
-queue_async_pop(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
-{
-    /* Load queue */
-
-    uint32_t queue_id = 0;
-
-    if (!enif_get_uint(env, argv[0], &queue_id))
-        return mk_error(env, "badarg");
-
-    if (queue_id > 8)
-        return mk_error(env, "badarg");
-
-    cq_t *q = QUEUES[queue_id];
-    if (q == NULL)
-        return mk_error(env, "bad_queue_id");
-
-    if (q->id != queue_id)
-        return mk_error(env, "not_identical_queue_id");
-
-    uint64_t qsize = q->queue_size;
-    uint64_t tail = q->tail;
-    uint64_t num_busy = 0;
-
-    /* Walk the buffer starting the tail position until we are either
-       able to consume a term or find an empty slot. */
-    while (1) {
-        uint64_t index = SLOT_INDEX(tail, qsize);
-        uint64_t ret = __sync_val_compare_and_swap(&q->slots_states[index],
-                                                   STATE_FULL,
-                                                   STATE_READ);
-
-        if (ret == STATE_READ) {
-            /* We were able to mark the term as read in progress. We
-               now have an exclusive lock. */
-            break;
-
-        } else if (ret == STATE_WRITE) {
-            /* We found an item with a write in progress. If that
-               thread progresses, it will eventually mark the slot as
-               full. We can spin until that happens.
-
-               This can take an arbitrary amount of time and multiple
-               reading threads will compete for the same slot.
-
-               Instead we add the caller to the queue of blocking
-               consumers. When the next producer comes it will "help"
-               this thread by calling enif_send on the current
-               in-progress term *and* handle it's own terms. If
-               there's no new push to the queue, this will block
-               forever. */
-            return mk_atom(env, "write_in_progress_not_implemented");
-
-        } else if (ret == STATE_EMPTY) {
-            /* We found an empty item. Queue must be empty. Add
-               calling Erlang consumer process to queue of waiting
-               processes. When the next producer comes along, it first
-               checks the waiting consumers and calls enif_send
-               instead of writing to the slots. */
-
-            ErlNifPid *pid = enif_alloc(sizeof(ErlNifPid));
-            pid = enif_self(env, pid);
-            enqueue(q->pop_queue, pid);
-
-            return mk_atom(env, "wait_for_msg");
-
-        } else {
-            tail = __sync_add_and_fetch(&q->tail, 1);
-        }
-    }
-
-
-    /* Copy term into calling process env. The NIF env can now be
-       gargbage collected. */
-    ERL_NIF_TERM copy = enif_make_copy(env, q->slots_terms[SLOT_INDEX(tail, qsize)]);
-
-
-    /* Mark the slot as free. Note: We don't increment the tail
-       position here, as another thread also walking the buffer might
-       have incremented it multiple times */
-    q->slots_terms[SLOT_INDEX(tail, qsize)] = 0;
-    if (__sync_bool_compare_and_swap(&q->slots_states[SLOT_INDEX(tail, qsize)],
-                                     STATE_READ,
-                                     STATE_EMPTY)) {
-        return enif_make_tuple2(env, mk_atom(env, "ok"), copy);
-    } else {
-        return mk_error(env, "could_not_update_slots_after_pop");
-    }
-}
-
-
-static ERL_NIF_TERM
-queue_debug(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
-{
-
-    uint32_t queue_id = 0;
-
-    if (!enif_get_uint(env, argv[0], &queue_id))
-        return mk_error(env, "badarg");
-
-    if (queue_id > 8)
-        return mk_error(env, "badarg");
-
-    cq_t *q = QUEUES[queue_id];
-    if (q == NULL)
-        return mk_error(env, "bad_queue_id");
-
-
-
-    ERL_NIF_TERM *slots_states = enif_alloc(sizeof(ERL_NIF_TERM) * q->queue_size);
-    ERL_NIF_TERM *slots_terms = enif_alloc(sizeof(ERL_NIF_TERM) * q->queue_size);
-    for (int i = 0; i < q->queue_size; i++) {
-        slots_states[i] = enif_make_int(env, q->slots_states[i * CACHE_LINE_SIZE]);
-
-        if (q->slots_terms[i * CACHE_LINE_SIZE] == 0) {
-            slots_terms[i] = mk_atom(env, "null");
-        } else {
-            slots_terms[i] = enif_make_copy(env, q->slots_terms[i * CACHE_LINE_SIZE]);
-        }
-    }
-    return enif_make_tuple4(env,
-                            enif_make_uint64(env, q->tail),
-                            enif_make_uint64(env, q->head),
-                            enif_make_list_from_array(env, slots_states, q->queue_size),
-                            enif_make_list_from_array(env, slots_terms, q->queue_size));
-}
-
-static ERL_NIF_TERM
-queue_debug_poppers(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
-{
-    uint32_t queue_id = 0;
-
-    if (!enif_get_uint(env, argv[0], &queue_id))
-        return mk_error(env, "badarg");
-
-    if (queue_id > 8)
-        return mk_error(env, "badarg");
-
-    cq_t *q = QUEUES[queue_id];
-    if (q == NULL)
-        return mk_error(env, "bad_queue_id");
-
-
-    uint64_t pop_queue_size = 0;
-    cq_node_t *node = q->pop_queue->head;
-    if (node->value == NULL) {
-        node = node->next;
-        node = Q_PTR(node);
-    }
-
-    while (node != NULL) {
-        pop_queue_size++;
-        node = node->next;
-        node = Q_PTR(node);
-    }
-
-    ERL_NIF_TERM *pop_queue_pids = enif_alloc(sizeof(ERL_NIF_TERM) * pop_queue_size);
-
-    node = q->pop_queue->head;
-    node = Q_PTR(node);
-    if (node->value == NULL) {
-        node = node->next;
-        node = Q_PTR(node);
-    }
-
-    uint64_t i = 0;
-    while (node != NULL) {
-        if (node->value == 0) {
-            pop_queue_pids[i] = mk_atom(env, "null");
-        }
-        else {
-            pop_queue_pids[i] = enif_make_pid(env, node->value);
-        }
-
-        i++;
-        node = node->next;
-        node = Q_PTR(node);
-    }
-
-    ERL_NIF_TERM list = enif_make_list_from_array(env, pop_queue_pids, pop_queue_size);
-    enif_free(pop_queue_pids);
-
-    return list;
-}
-
-
-
-static ERL_NIF_TERM
-print_bits(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
-{
-
-    uint64_t *p1 = malloc(8);
-    *p1 = 0;
-
-
-    for (int bit = 63; bit >= 0; bit--) {
-        uint64_t power = 1 << bit;
-        //uint64_t byte = *p1;
-        uint64_t byte = p1;
-        fprintf(stderr, "%d", (byte & power) >> bit);
-    }
-    fprintf(stderr, "\n");
-
-    //enif_free(p1);
-
-    return mk_atom(env, "ok");
-}
-
-void free_resource(ErlNifEnv* env, void* arg)
-{
-    //cq_t *cq = (cq_t *) arg;
-
-    fprintf(stderr, "free_resource\n");
-}
-
-
-cq_queue_t * new_queue()
-{
-    cq_queue_t *queue = enif_alloc(sizeof(cq_queue_t));
-    cq_node_t *node = enif_alloc(sizeof(cq_node_t));
-    node->next = NULL;
-    //node->env = NULL;
-    node->value = NULL;
-    queue->head = node;
-    queue->tail = node;
-
-    return queue;
-}
-
-
-
-void enqueue(cq_queue_t *queue, ErlNifPid *pid)
-{
-    cq_node_t *node = enif_alloc(sizeof(cq_node_t));
-    //node->env = enif_alloc_env();
-    //node->term = enif_make_copy(node->env, term);
-    node->value = pid;
-    node->next = NULL;
-    fprintf(stderr, "node %lu\n", node);
-
-    cq_node_t *tail = NULL;
-    uint64_t tail_count = 0;
-    while (1) {
-        tail = queue->tail;
-        cq_node_t *tail_ptr = Q_PTR(tail);
-        tail_count = Q_COUNT(tail);
-
-        cq_node_t *next = tail->next;
-        cq_node_t *next_ptr = Q_PTR(next);
-        uint64_t next_count = Q_COUNT(next);
-
-        if (tail == queue->tail) {
-            fprintf(stderr, "tail == queue->tail\n");
-            if (next_ptr == NULL) {
-                fprintf(stderr, "next_ptr == NULL\n");
-                if (__sync_bool_compare_and_swap(&tail_ptr->next,
-                                                 next,
-                                                 Q_SET_COUNT(node, next_count+1)))
-                    fprintf(stderr, "CAS(tail_ptr->next, next, (node, next_count+1)) -> true\n");
-                    break;
-            } else {
-                __sync_bool_compare_and_swap(&queue->tail,
-                                             tail,
-                                             Q_SET_COUNT(next_ptr, next_count+1));
-                    fprintf(stderr, "CAS(queue->tail, tail, (next_ptr, next_count+1))\n");
-            }
-        }
-    }
-
-    cq_node_t *node_with_count = Q_SET_COUNT(node, tail_count+1);
-    int ret = __sync_bool_compare_and_swap(&queue->tail,
-                                           tail,
-                                           node_with_count);
-    fprintf(stderr, "CAS(queue->tail, tail, %lu) -> %d\n", node_with_count, ret);
-}
-
-
-int dequeue(cq_queue_t *queue, ErlNifPid **pid)
-{
-    fprintf(stderr, "dequeue\n");
-    cq_node_t *head, *head_ptr, *tail, *tail_ptr, *next, *next_ptr;
-
-    while (1) {
-        head = queue->head;
-        head_ptr = Q_PTR(head);
-        tail = queue->tail;
-        tail_ptr = Q_PTR(tail);
-        next = head->next;
-        next_ptr = Q_PTR(next);
-        fprintf(stderr, "head %lu, tail %lu, next %lu\n", head, tail, next);
-
-        if (head == queue->head) {
-            if (head_ptr == tail_ptr) {
-                if (next_ptr == NULL) {
-                    return 0; /* Queue is empty */
-                }
-                fprintf(stderr, "CAS(queue->tail, tail, (next_ptr, tail+1))\n");
-                __sync_bool_compare_and_swap(&queue->tail,
-                                             tail,
-                                             Q_SET_COUNT(next_ptr, Q_COUNT(tail)+1));
-            } else {
-                fprintf(stderr, "next->value %lu\n", next_ptr->value);
-                *pid = next_ptr->value;
-                fprintf(stderr, "CAS(queue->head, head, (next_ptr, head+1))\n");
-                if (__sync_bool_compare_and_swap(&queue->head,
-                                                 head,
-                                                 Q_SET_COUNT(next_ptr, Q_COUNT(head)+1)))
-                    break;
-            }
-        }
-    }
-    // free pid
-    //enif_free(Q_PTR(head));
-    return 1;
-}
-
-
-
-
-int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info) {
-    /* Initialize global array mapping id to cq_t ptr */
-    QUEUES = (cq_t **) calloc(8, sizeof(cq_t **));
-    if (QUEUES == NULL)
-        return -1;
-
-
-    ErlNifResourceFlags flags = (ErlNifResourceFlags)(ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER);
-    CQ_RESOURCE = enif_open_resource_type(env, "cq", "cq",
-                                          &free_resource, flags, NULL);
-
-    if (CQ_RESOURCE == NULL)
-        return -1;
-
-    return 0;
-}
-
-
-static ErlNifFunc nif_funcs[] = {
-    {"new"      , 3, queue_new},
-    {"free"     , 1, queue_free},
-    {"push"     , 2, queue_push},
-    {"async_pop", 1, queue_async_pop},
-    {"debug"    , 1, queue_debug},
-    {"debug_poppers", 1, queue_debug_poppers},
-    {"print_bits", 0, print_bits}
-};
-
-ERL_NIF_INIT(cq, nif_funcs, load, NULL, NULL, NULL);
diff --git a/c_src/cq2/cq_nif.h b/c_src/cq2/cq_nif.h
deleted file mode 100644
index 75f8891..0000000
--- a/c_src/cq2/cq_nif.h
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <stdint.h>
-#include "erl_nif.h"
-
-
-#define CACHE_LINE_SIZE 64
-
-#define SLOT_INDEX(__index, __size) __index & (__size - 1)
-
-#define Q_MASK 3L
-#define Q_PTR(__ptr) (cq_node_t *) (((uint64_t)__ptr) & (~Q_MASK))
-#define Q_COUNT(__ptr) ((uint64_t) __ptr & Q_MASK)
-#define Q_SET_COUNT(__ptr, __val) (cq_node_t *) ((uint64_t) __ptr | (__val & Q_MASK))
-
-
-#define STATE_EMPTY 0
-#define STATE_WRITE 1
-#define STATE_READ  2
-#define STATE_FULL  3
-
-
-ErlNifResourceType* CQ_RESOURCE;
-
-typedef struct cq_node cq_node_t;
-
-struct cq_node {
-    ErlNifEnv *env;
-    //ERL_NIF_TERM term;
-    ErlNifPid *value;
-    cq_node_t *next;
-};
-
-
-
-typedef struct cq_queue {
-    cq_node_t *head;
-    cq_node_t *tail;
-} cq_queue_t;
-
-
-// TODO: Add padding between the fields
-typedef struct cq {
-    uint32_t id;
-    uint64_t queue_size;
-    uint64_t overflow_size;
-    uint64_t head;
-    uint64_t tail;
-
-    uint8_t       *slots_states;
-    ERL_NIF_TERM  *slots_terms;
-    ErlNifEnv    **slots_envs;
-
-    cq_queue_t *push_queue;
-    cq_queue_t *pop_queue;
-
-    uint8_t       *overflow_states;
-    ERL_NIF_TERM  *overflow_terms;
-    ErlNifEnv    **overflow_envs;
-
-} cq_t;
-
-cq_t **QUEUES = NULL; /* Initialized on nif load */
-
-
-ERL_NIF_TERM mk_atom(ErlNifEnv* env, const char* atom);
-ERL_NIF_TERM mk_error(ErlNifEnv* env, const char* msg);
-int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info);
-void free_resource(ErlNifEnv*, void*);
-
-
-cq_queue_t* new_queue(void);
-void enqueue(cq_queue_t *q, ErlNifPid *pid);
diff --git a/c_src/enlfq/Makefile b/c_src/enlfq/Makefile
new file mode 100644
index 0000000..d85d904
--- /dev/null
+++ b/c_src/enlfq/Makefile
@@ -0,0 +1,80 @@
+
+PROJECT = enlfq
+CXXFLAGS = -std=c++11 -O2 -Wextra -Werror -Wno-missing-field-initializers -fno-rtti -fno-exceptions
+LDLIBS = -lstdc++
+
+
+# Based on c_src.mk from erlang.mk by Loic Hoguin <essen@ninenines.eu>
+
+CURDIR := $(shell pwd)
+BASEDIR := $(abspath $(CURDIR)/..)
+
+PROJECT ?= $(notdir $(BASEDIR))
+PROJECT := $(strip $(PROJECT))
+
+ERTS_INCLUDE_DIR ?= $(shell erl -noshell -s init stop -eval "io:format(\"~ts/erts-~ts/include/\", [code:root_dir(), erlang:system_info(version)]).")
+ERL_INTERFACE_INCLUDE_DIR ?= $(shell erl -noshell -s init stop -eval "io:format(\"~ts\", [code:lib_dir(erl_interface, include)]).")
+ERL_INTERFACE_LIB_DIR ?= $(shell erl -noshell -s init stop -eval "io:format(\"~ts\", [code:lib_dir(erl_interface, lib)]).")
+
+C_SRC_DIR = $(CURDIR)
+C_SRC_OUTPUT ?= $(CURDIR)/../priv/$(PROJECT).so
+
+# System type and C compiler/flags.
+
+UNAME_SYS := $(shell uname -s)
+ifeq ($(UNAME_SYS), Darwin)
+	CC ?= cc
+	CFLAGS ?= -O3 -std=c99 -arch x86_64 -finline-functions -Wall -Wmissing-prototypes
+	CXXFLAGS ?= -O3 -arch x86_64 -finline-functions -Wall
+	LDFLAGS ?= -arch x86_64 -flat_namespace -undefined suppress
+else ifeq ($(UNAME_SYS), FreeBSD)
+	CC ?= cc
+	CFLAGS ?= -O3 -std=c99 -finline-functions -Wall -Wmissing-prototypes
+	CXXFLAGS ?= -O3 -finline-functions -Wall
+else ifeq ($(UNAME_SYS), Linux)
+	CC ?= gcc
+	CFLAGS ?= -O3 -std=c99 -finline-functions -Wall -Wmissing-prototypes
+	CXXFLAGS ?= -O3 -finline-functions -Wall
+endif
+
+CFLAGS += -fPIC -I $(ERTS_INCLUDE_DIR) -I $(ERL_INTERFACE_INCLUDE_DIR)
+CXXFLAGS += -fPIC -I $(ERTS_INCLUDE_DIR) -I $(ERL_INTERFACE_INCLUDE_DIR)
+
+LDLIBS += -L $(ERL_INTERFACE_LIB_DIR) -lerl_interface -lei
+LDFLAGS += -shared
+
+# Verbosity.
+
+c_verbose_0 = @echo " C     " $(?F);
+c_verbose = $(c_verbose_$(V))
+
+cpp_verbose_0 = @echo " CPP   " $(?F);
+cpp_verbose = $(cpp_verbose_$(V))
+
+link_verbose_0 = @echo " LD    " $(@F);
+link_verbose = $(link_verbose_$(V))
+
+SOURCES := $(shell find $(C_SRC_DIR) -type f \( -name "*.c" -o -name "*.C" -o -name "*.cc" -o -name "*.cpp" \))
+OBJECTS = $(addsuffix .o, $(basename $(SOURCES)))
+
+COMPILE_C = $(c_verbose) $(CC) $(CFLAGS) $(CPPFLAGS) -c
+COMPILE_CPP = $(cpp_verbose) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c
+
+$(C_SRC_OUTPUT): $(OBJECTS)
+	@mkdir -p $(BASEDIR)/priv/
+	$(link_verbose) $(CC) $(OBJECTS) $(LDFLAGS) $(LDLIBS) -o $(C_SRC_OUTPUT)
+
+%.o: %.c
+	$(COMPILE_C) $(OUTPUT_OPTION) $<
+
+%.o: %.cc
+	$(COMPILE_CPP) $(OUTPUT_OPTION) $<
+
+%.o: %.C
+	$(COMPILE_CPP) $(OUTPUT_OPTION) $<
+
+%.o: %.cpp
+	$(COMPILE_CPP) $(OUTPUT_OPTION) $<
+
+clean:
+	@rm -f $(C_SRC_OUTPUT) $(OBJECTS)
diff --git a/c_src/enlfq/concurrentqueue.h b/c_src/enlfq/concurrentqueue.h
new file mode 100644
index 0000000..68f66df
--- /dev/null
+++ b/c_src/enlfq/concurrentqueue.h
@@ -0,0 +1,3637 @@
+﻿// Provides a C++11 implementation of a multi-producer, multi-consumer lock-free queue.
+// An overview, including benchmark results, is provided here:
+//     http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++
+// The full design is also described in excruciating detail at:
+//    http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue
+
+// Simplified BSD license:
+// Copyright (c) 2013-2016, Cameron Desrochers.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice, this list of
+// conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice, this list of
+// conditions and the following disclaimer in the documentation and/or other materials
+// provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+// OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+// TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+#pragma once
+
+#if defined(__GNUC__)
+// Disable -Wconversion warnings (spuriously triggered when Traits::size_t and
+// Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings
+// upon assigning any computed values)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+
+#ifdef MCDBGQ_USE_RELACY
+#pragma GCC diagnostic ignored "-Wint-to-pointer-cast"
+#endif
+#endif
+
+#if defined(__APPLE__)
+#include "TargetConditionals.h"
+#endif
+
+#ifdef MCDBGQ_USE_RELACY
+#include "relacy/relacy_std.hpp"
+#include "relacy_shims.h"
+// We only use malloc/free anyway, and the delete macro messes up `= delete` method declarations.
+// We'll override the default trait malloc ourselves without a macro.
+#undef new
+#undef delete
+#undef malloc
+#undef free
+#else
+#include <atomic>		// Requires C++11. Sorry VS2010.
+#include <cassert>
+#endif
+#include <cstddef>              // for max_align_t
+#include <cstdint>
+#include <cstdlib>
+#include <type_traits>
+#include <algorithm>
+#include <utility>
+#include <limits>
+#include <climits>		// for CHAR_BIT
+#include <array>
+#include <thread>		// partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading
+
+// Platform-specific definitions of a numeric thread ID type and an invalid value
+namespace moodycamel { namespace details {
+	template<typename thread_id_t> struct thread_id_converter {
+		typedef thread_id_t thread_id_numeric_size_t;
+		typedef thread_id_t thread_id_hash_t;
+		static thread_id_hash_t prehash(thread_id_t const& x) { return x; }
+	};
+} }
+#if defined(MCDBGQ_USE_RELACY)
+namespace moodycamel { namespace details {
+	typedef std::uint32_t thread_id_t;
+	static const thread_id_t invalid_thread_id  = 0xFFFFFFFFU;
+	static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU;
+	static inline thread_id_t thread_id() { return rl::thread_index(); }
+} }
+#elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__)
+// No sense pulling in windows.h in a header, we'll manually declare the function
+// we use and rely on backwards-compatibility for this not to break
+extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void);
+namespace moodycamel { namespace details {
+	static_assert(sizeof(unsigned long) == sizeof(std::uint32_t), "Expected size of unsigned long to be 32 bits on Windows");
+	typedef std::uint32_t thread_id_t;
+	static const thread_id_t invalid_thread_id  = 0;			// See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx
+	static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU;	// Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4.
+	static inline thread_id_t thread_id() { return static_cast<thread_id_t>(::GetCurrentThreadId()); }
+} }
+#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE)
+namespace moodycamel { namespace details {
+	static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes");
+	
+	typedef std::thread::id thread_id_t;
+	static const thread_id_t invalid_thread_id;         // Default ctor creates invalid ID
+
+	// Note we don't define a invalid_thread_id2 since std::thread::id doesn't have one; it's
+	// only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined anyway, which it won't
+	// be.
+	static inline thread_id_t thread_id() { return std::this_thread::get_id(); }
+
+	template<std::size_t> struct thread_id_size { };
+	template<> struct thread_id_size<4> { typedef std::uint32_t numeric_t; };
+	template<> struct thread_id_size<8> { typedef std::uint64_t numeric_t; };
+
+	template<> struct thread_id_converter<thread_id_t> {
+		typedef thread_id_size<sizeof(thread_id_t)>::numeric_t thread_id_numeric_size_t;
+#ifndef __APPLE__
+		typedef std::size_t thread_id_hash_t;
+#else
+		typedef thread_id_numeric_size_t thread_id_hash_t;
+#endif
+
+		static thread_id_hash_t prehash(thread_id_t const& x)
+		{
+#ifndef __APPLE__
+			return std::hash<std::thread::id>()(x);
+#else
+			return *reinterpret_cast<thread_id_hash_t const*>(&x);
+#endif
+		}
+	};
+} }
+#else
+// Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475
+// In order to get a numeric thread ID in a platform-independent way, we use a thread-local
+// static variable's address as a thread identifier :-)
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
+#define MOODYCAMEL_THREADLOCAL __thread
+#elif defined(_MSC_VER)
+#define MOODYCAMEL_THREADLOCAL __declspec(thread)
+#else
+// Assume C++11 compliant compiler
+#define MOODYCAMEL_THREADLOCAL thread_local
+#endif
+namespace moodycamel { namespace details {
+	typedef std::uintptr_t thread_id_t;
+	static const thread_id_t invalid_thread_id  = 0;		// Address can't be nullptr
+	static const thread_id_t invalid_thread_id2 = 1;		// Member accesses off a null pointer are also generally invalid. Plus it's not aligned.
+	static inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast<thread_id_t>(&x); }
+} }
+#endif
+
+// Exceptions
+#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED
+#if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__))
+#define MOODYCAMEL_EXCEPTIONS_ENABLED
+#endif
+#endif
+#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
+#define MOODYCAMEL_TRY try
+#define MOODYCAMEL_CATCH(...) catch(__VA_ARGS__)
+#define MOODYCAMEL_RETHROW throw
+#define MOODYCAMEL_THROW(expr) throw (expr)
+#else
+#define MOODYCAMEL_TRY if (true)
+#define MOODYCAMEL_CATCH(...) else if (false)
+#define MOODYCAMEL_RETHROW
+#define MOODYCAMEL_THROW(expr)
+#endif
+
+#ifndef MOODYCAMEL_NOEXCEPT
+#if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED)
+#define MOODYCAMEL_NOEXCEPT
+#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true
+#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true
+#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800
+// VS2012's std::is_nothrow_[move_]constructible is broken and returns true when it shouldn't :-(
+// We have to assume *all* non-trivial constructors may throw on VS2012!
+#define MOODYCAMEL_NOEXCEPT _NOEXCEPT
+#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value)
+#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
+#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900
+#define MOODYCAMEL_NOEXCEPT _NOEXCEPT
+#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value || std::is_nothrow_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value || std::is_nothrow_copy_constructible<type>::value)
+#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
+#else
+#define MOODYCAMEL_NOEXCEPT noexcept
+#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr)
+#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr)
+#endif
+#endif
+
+#ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+#ifdef MCDBGQ_USE_RELACY
+#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+#else
+// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445
+// g++ <=4.7 doesn't support thread_local either.
+// Finally, iOS/ARM doesn't have support for it either, and g++/ARM allows it to compile but it's unconfirmed to actually work
+#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
+// Assume `thread_local` is fully supported in all other C++11 compilers/platforms
+//#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED    // always disabled for now since several users report having problems with it on
+#endif
+#endif
+#endif
+
+// VS2012 doesn't support deleted functions. 
+// In this case, we declare the function normally but don't define it. A link error will be generated if the function is called.
+#ifndef MOODYCAMEL_DELETE_FUNCTION
+#if defined(_MSC_VER) && _MSC_VER < 1800
+#define MOODYCAMEL_DELETE_FUNCTION
+#else
+#define MOODYCAMEL_DELETE_FUNCTION = delete
+#endif
+#endif
+
+// Compiler-specific likely/unlikely hints
+namespace moodycamel { namespace details {
+#if defined(__GNUC__)
+	static inline bool (likely)(bool x) { return __builtin_expect((x), true); }
+	static inline bool (unlikely)(bool x) { return __builtin_expect((x), false); }
+#else
+	static inline bool (likely)(bool x) { return x; }
+	static inline bool (unlikely)(bool x) { return x; }
+#endif
+} }
+
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+#include "internal/concurrentqueue_internal_debug.h"
+#endif
+
+namespace moodycamel {
+namespace details {
+	template<typename T>
+	struct const_numeric_max {
+		static_assert(std::is_integral<T>::value, "const_numeric_max can only be used with integers");
+		static const T value = std::numeric_limits<T>::is_signed
+			? (static_cast<T>(1) << (sizeof(T) * CHAR_BIT - 1)) - static_cast<T>(1)
+			: static_cast<T>(-1);
+	};
+
+#if defined(__GLIBCXX__)
+	typedef ::max_align_t std_max_align_t;      // libstdc++ forgot to add it to std:: for a while
+#else
+	typedef std::max_align_t std_max_align_t;   // Others (e.g. MSVC) insist it can *only* be accessed via std::
+#endif
+
+	// Some platforms have incorrectly set max_align_t to a type with <8 bytes alignment even while supporting
+	// 8-byte aligned scalar values (*cough* 32-bit iOS). Work around this with our own union. See issue #64.
+	typedef union {
+		std_max_align_t x;
+		long long y;
+		void* z;
+	} max_align_t;
+}
+
+// Default traits for the ConcurrentQueue. To change some of the
+// traits without re-implementing all of them, inherit from this
+// struct and shadow the declarations you wish to be different;
+// since the traits are used as a template type parameter, the
+// shadowed declarations will be used where defined, and the defaults
+// otherwise.
+struct ConcurrentQueueDefaultTraits
+{
+	// General-purpose size type. std::size_t is strongly recommended.
+	typedef std::size_t size_t;
+	
+	// The type used for the enqueue and dequeue indices. Must be at least as
+	// large as size_t. Should be significantly larger than the number of elements
+	// you expect to hold at once, especially if you have a high turnover rate;
+	// for example, on 32-bit x86, if you expect to have over a hundred million
+	// elements or pump several million elements through your queue in a very
+	// short space of time, using a 32-bit type *may* trigger a race condition.
+	// A 64-bit int type is recommended in that case, and in practice will
+	// prevent a race condition no matter the usage of the queue. Note that
+	// whether the queue is lock-free with a 64-int type depends on the whether
+	// std::atomic<std::uint64_t> is lock-free, which is platform-specific.
+	typedef std::size_t index_t;
+	
+	// Internally, all elements are enqueued and dequeued from multi-element
+	// blocks; this is the smallest controllable unit. If you expect few elements
+	// but many producers, a smaller block size should be favoured. For few producers
+	// and/or many elements, a larger block size is preferred. A sane default
+	// is provided. Must be a power of 2.
+	static const size_t BLOCK_SIZE = 32;
+	
+	// For explicit producers (i.e. when using a producer token), the block is
+	// checked for being empty by iterating through a list of flags, one per element.
+	// For large block sizes, this is too inefficient, and switching to an atomic
+	// counter-based approach is faster. The switch is made for block sizes strictly
+	// larger than this threshold.
+	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32;
+	
+	// How many full blocks can be expected for a single explicit producer? This should
+	// reflect that number's maximum for optimal performance. Must be a power of 2.
+	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32;
+	
+	// How many full blocks can be expected for a single implicit producer? This should
+	// reflect that number's maximum for optimal performance. Must be a power of 2.
+	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32;
+	
+	// The initial size of the hash table mapping thread IDs to implicit producers.
+	// Note that the hash is resized every time it becomes half full.
+	// Must be a power of two, and either 0 or at least 1. If 0, implicit production
+	// (using the enqueue methods without an explicit producer token) is disabled.
+	static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32;
+	
+	// Controls the number of items that an explicit consumer (i.e. one with a token)
+	// must consume before it causes all consumers to rotate and move on to the next
+	// internal queue.
+	static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256;
+	
+	// The maximum number of elements (inclusive) that can be enqueued to a sub-queue.
+	// Enqueue operations that would cause this limit to be surpassed will fail. Note
+	// that this limit is enforced at the block level (for performance reasons), i.e.
+	// it's rounded up to the nearest block size.
+	static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max<size_t>::value;
+	
+	
+#ifndef MCDBGQ_USE_RELACY
+	// Memory allocation can be customized if needed.
+	// malloc should return nullptr on failure, and handle alignment like std::malloc.
+#if defined(malloc) || defined(free)
+	// Gah, this is 2015, stop defining macros that break standard code already!
+	// Work around malloc/free being special macros:
+	static inline void* WORKAROUND_malloc(size_t size) { return malloc(size); }
+	static inline void WORKAROUND_free(void* ptr) { return free(ptr); }
+	static inline void* (malloc)(size_t size) { return WORKAROUND_malloc(size); }
+	static inline void (free)(void* ptr) { return WORKAROUND_free(ptr); }
+#else
+	static inline void* malloc(size_t size) { return std::malloc(size); }
+	static inline void free(void* ptr) { return std::free(ptr); }
+#endif
+#else
+	// Debug versions when running under the Relacy race detector (ignore
+	// these in user code)
+	static inline void* malloc(size_t size) { return rl::rl_malloc(size, $); }
+	static inline void free(void* ptr) { return rl::rl_free(ptr, $); }
+#endif
+};
+
+
+// When producing or consuming many elements, the most efficient way is to:
+//    1) Use one of the bulk-operation methods of the queue with a token
+//    2) Failing that, use the bulk-operation methods without a token
+//    3) Failing that, create a token and use that with the single-item methods
+//    4) Failing that, use the single-parameter methods of the queue
+// Having said that, don't create tokens willy-nilly -- ideally there should be
+// a maximum of one token per thread (of each kind).
+struct ProducerToken;
+struct ConsumerToken;
+
+template<typename T, typename Traits> class ConcurrentQueue;
+template<typename T, typename Traits> class BlockingConcurrentQueue;
+class ConcurrentQueueTests;
+
+
+namespace details
+{
+	struct ConcurrentQueueProducerTypelessBase
+	{
+		ConcurrentQueueProducerTypelessBase* next;
+		std::atomic<bool> inactive;
+		ProducerToken* token;
+		
+		ConcurrentQueueProducerTypelessBase()
+			: next(nullptr), inactive(false), token(nullptr)
+		{
+		}
+	};
+	
+	template<bool use32> struct _hash_32_or_64 {
+		static inline std::uint32_t hash(std::uint32_t h)
+		{
+			// MurmurHash3 finalizer -- see https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
+			// Since the thread ID is already unique, all we really want to do is propagate that
+			// uniqueness evenly across all the bits, so that we can use a subset of the bits while
+			// reducing collisions significantly
+			h ^= h >> 16;
+			h *= 0x85ebca6b;
+			h ^= h >> 13;
+			h *= 0xc2b2ae35;
+			return h ^ (h >> 16);
+		}
+	};
+	template<> struct _hash_32_or_64<1> {
+		static inline std::uint64_t hash(std::uint64_t h)
+		{
+			h ^= h >> 33;
+			h *= 0xff51afd7ed558ccd;
+			h ^= h >> 33;
+			h *= 0xc4ceb9fe1a85ec53;
+			return h ^ (h >> 33);
+		}
+	};
+	template<std::size_t size> struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> {  };
+	
+	static inline size_t hash_thread_id(thread_id_t id)
+	{
+		static_assert(sizeof(thread_id_t) <= 8, "Expected a platform where thread IDs are at most 64-bit values");
+		return static_cast<size_t>(hash_32_or_64<sizeof(thread_id_converter<thread_id_t>::thread_id_hash_t)>::hash(
+			thread_id_converter<thread_id_t>::prehash(id)));
+	}
+	
+	template<typename T>
+	static inline bool circular_less_than(T a, T b)
+	{
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4554)
+#endif
+		static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed, "circular_less_than is intended to be used only with unsigned integer types");
+		return static_cast<T>(a - b) > static_cast<T>(static_cast<T>(1) << static_cast<T>(sizeof(T) * CHAR_BIT - 1));
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+	}
+	
+	template<typename U>
+	static inline char* align_for(char* ptr)
+	{
+		const std::size_t alignment = std::alignment_of<U>::value;
+		return ptr + (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) % alignment;
+	}
+
+	template<typename T>
+	static inline T ceil_to_pow_2(T x)
+	{
+		static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed, "ceil_to_pow_2 is intended to be used only with unsigned integer types");
+
+		// Adapted from http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
+		--x;
+		x |= x >> 1;
+		x |= x >> 2;
+		x |= x >> 4;
+		for (std::size_t i = 1; i < sizeof(T); i <<= 1) {
+			x |= x >> (i << 3);
+		}
+		++x;
+		return x;
+	}
+	
+	template<typename T>
+	static inline void swap_relaxed(std::atomic<T>& left, std::atomic<T>& right)
+	{
+		T temp = std::move(left.load(std::memory_order_relaxed));
+		left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed);
+		right.store(std::move(temp), std::memory_order_relaxed);
+	}
+	
+	template<typename T>
+	static inline T const& nomove(T const& x)
+	{
+		return x;
+	}
+	
+	template<bool Enable>
+	struct nomove_if
+	{
+		template<typename T>
+		static inline T const& eval(T const& x)
+		{
+			return x;
+		}
+	};
+	
+	template<>
+	struct nomove_if<false>
+	{
+		template<typename U>
+		static inline auto eval(U&& x)
+			-> decltype(std::forward<U>(x))
+		{
+			return std::forward<U>(x);
+		}
+	};
+	
+	template<typename It>
+	static inline auto deref_noexcept(It& it) MOODYCAMEL_NOEXCEPT -> decltype(*it)
+	{
+		return *it;
+	}
+	
+#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
+	template<typename T> struct is_trivially_destructible : std::is_trivially_destructible<T> { };
+#else
+	template<typename T> struct is_trivially_destructible : std::has_trivial_destructor<T> { };
+#endif
+	
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+#ifdef MCDBGQ_USE_RELACY
+	typedef RelacyThreadExitListener ThreadExitListener;
+	typedef RelacyThreadExitNotifier ThreadExitNotifier;
+#else
+	struct ThreadExitListener
+	{
+		typedef void (*callback_t)(void*);
+		callback_t callback;
+		void* userData;
+		
+		ThreadExitListener* next;		// reserved for use by the ThreadExitNotifier
+	};
+	
+	
+	class ThreadExitNotifier
+	{
+	public:
+		static void subscribe(ThreadExitListener* listener)
+		{
+			auto& tlsInst = instance();
+			listener->next = tlsInst.tail;
+			tlsInst.tail = listener;
+		}
+		
+		static void unsubscribe(ThreadExitListener* listener)
+		{
+			auto& tlsInst = instance();
+			ThreadExitListener** prev = &tlsInst.tail;
+			for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) {
+				if (ptr == listener) {
+					*prev = ptr->next;
+					break;
+				}
+				prev = &ptr->next;
+			}
+		}
+		
+	private:
+		ThreadExitNotifier() : tail(nullptr) { }
+		ThreadExitNotifier(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;
+		ThreadExitNotifier& operator=(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;
+		
+		~ThreadExitNotifier()
+		{
+			// This thread is about to exit, let everyone know!
+			assert(this == &instance() && "If this assert fails, you likely have a buggy compiler! Change the preprocessor conditions such that MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined.");
+			for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) {
+				ptr->callback(ptr->userData);
+			}
+		}
+		
+		// Thread-local
+		static inline ThreadExitNotifier& instance()
+		{
+			static thread_local ThreadExitNotifier notifier;
+			return notifier;
+		}
+		
+	private:
+		ThreadExitListener* tail;
+	};
+#endif
+#endif
+	
+	template<typename T> struct static_is_lock_free_num { enum { value = 0 }; };
+	template<> struct static_is_lock_free_num<signed char> { enum { value = ATOMIC_CHAR_LOCK_FREE }; };
+	template<> struct static_is_lock_free_num<short> { enum { value = ATOMIC_SHORT_LOCK_FREE }; };
+	template<> struct static_is_lock_free_num<int> { enum { value = ATOMIC_INT_LOCK_FREE }; };
+	template<> struct static_is_lock_free_num<long> { enum { value = ATOMIC_LONG_LOCK_FREE }; };
+	template<> struct static_is_lock_free_num<long long> { enum { value = ATOMIC_LLONG_LOCK_FREE }; };
+	template<typename T> struct static_is_lock_free : static_is_lock_free_num<typename std::make_signed<T>::type> {  };
+	template<> struct static_is_lock_free<bool> { enum { value = ATOMIC_BOOL_LOCK_FREE }; };
+	template<typename U> struct static_is_lock_free<U*> { enum { value = ATOMIC_POINTER_LOCK_FREE }; };
+}
+
+
+struct ProducerToken
+{
+	template<typename T, typename Traits>
+	explicit ProducerToken(ConcurrentQueue<T, Traits>& queue);
+	
+	template<typename T, typename Traits>
+	explicit ProducerToken(BlockingConcurrentQueue<T, Traits>& queue);
+	
+	ProducerToken(ProducerToken&& other) MOODYCAMEL_NOEXCEPT
+		: producer(other.producer)
+	{
+		other.producer = nullptr;
+		if (producer != nullptr) {
+			producer->token = this;
+		}
+	}
+	
+	inline ProducerToken& operator=(ProducerToken&& other) MOODYCAMEL_NOEXCEPT
+	{
+		swap(other);
+		return *this;
+	}
+	
+	void swap(ProducerToken& other) MOODYCAMEL_NOEXCEPT
+	{
+		std::swap(producer, other.producer);
+		if (producer != nullptr) {
+			producer->token = this;
+		}
+		if (other.producer != nullptr) {
+			other.producer->token = &other;
+		}
+	}
+	
+	// A token is always valid unless:
+	//     1) Memory allocation failed during construction
+	//     2) It was moved via the move constructor
+	//        (Note: assignment does a swap, leaving both potentially valid)
+	//     3) The associated queue was destroyed
+	// Note that if valid() returns true, that only indicates
+	// that the token is valid for use with a specific queue,
+	// but not which one; that's up to the user to track.
+	inline bool valid() const { return producer != nullptr; }
+	
+	~ProducerToken()
+	{
+		if (producer != nullptr) {
+			producer->token = nullptr;
+			producer->inactive.store(true, std::memory_order_release);
+		}
+	}
+	
+	// Disable copying and assignment
+	ProducerToken(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION;
+	ProducerToken& operator=(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION;
+	
+private:
+	template<typename T, typename Traits> friend class ConcurrentQueue;
+	friend class ConcurrentQueueTests;
+	
+protected:
+	details::ConcurrentQueueProducerTypelessBase* producer;
+};
+
+
+struct ConsumerToken
+{
+	template<typename T, typename Traits>
+	explicit ConsumerToken(ConcurrentQueue<T, Traits>& q);
+	
+	template<typename T, typename Traits>
+	explicit ConsumerToken(BlockingConcurrentQueue<T, Traits>& q);
+	
+	ConsumerToken(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT
+		: initialOffset(other.initialOffset), lastKnownGlobalOffset(other.lastKnownGlobalOffset), itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), currentProducer(other.currentProducer), desiredProducer(other.desiredProducer)
+	{
+	}
+	
+	inline ConsumerToken& operator=(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT
+	{
+		swap(other);
+		return *this;
+	}
+	
+	void swap(ConsumerToken& other) MOODYCAMEL_NOEXCEPT
+	{
+		std::swap(initialOffset, other.initialOffset);
+		std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset);
+		std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent);
+		std::swap(currentProducer, other.currentProducer);
+		std::swap(desiredProducer, other.desiredProducer);
+	}
+	
+	// Disable copying and assignment
+	ConsumerToken(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION;
+	ConsumerToken& operator=(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION;
+
+private:
+	template<typename T, typename Traits> friend class ConcurrentQueue;
+	friend class ConcurrentQueueTests;
+	
+private: // but shared with ConcurrentQueue
+	std::uint32_t initialOffset;
+	std::uint32_t lastKnownGlobalOffset;
+	std::uint32_t itemsConsumedFromCurrent;
+	details::ConcurrentQueueProducerTypelessBase* currentProducer;
+	details::ConcurrentQueueProducerTypelessBase* desiredProducer;
+};
+
+// Need to forward-declare this swap because it's in a namespace.
+// See http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces
+template<typename T, typename Traits>
+inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT;
+
+
+template<typename T, typename Traits = ConcurrentQueueDefaultTraits>
+class ConcurrentQueue
+{
+public:
+	typedef ::moodycamel::ProducerToken producer_token_t;
+	typedef ::moodycamel::ConsumerToken consumer_token_t;
+	
+	typedef typename Traits::index_t index_t;
+	typedef typename Traits::size_t size_t;
+	
+	static const size_t BLOCK_SIZE = static_cast<size_t>(Traits::BLOCK_SIZE);
+	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast<size_t>(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD);
+	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::EXPLICIT_INITIAL_INDEX_SIZE);
+	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::IMPLICIT_INITIAL_INDEX_SIZE);
+	static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = static_cast<size_t>(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE);
+	static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = static_cast<std::uint32_t>(Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE);
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4307)		// + integral constant overflow (that's what the ternary expression is for!)
+#pragma warning(disable: 4309)		// static_cast: Truncation of constant value
+#endif
+	static const size_t MAX_SUBQUEUE_SIZE = (details::const_numeric_max<size_t>::value - static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) < BLOCK_SIZE) ? details::const_numeric_max<size_t>::value : ((static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) + (BLOCK_SIZE - 1)) / BLOCK_SIZE * BLOCK_SIZE);
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+	static_assert(!std::numeric_limits<size_t>::is_signed && std::is_integral<size_t>::value, "Traits::size_t must be an unsigned integral type");
+	static_assert(!std::numeric_limits<index_t>::is_signed && std::is_integral<index_t>::value, "Traits::index_t must be an unsigned integral type");
+	static_assert(sizeof(index_t) >= sizeof(size_t), "Traits::index_t must be at least as wide as Traits::size_t");
+	static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)), "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)");
+	static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) && !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD & (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)), "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a power of 2 (and greater than 1)");
+	static_assert((EXPLICIT_INITIAL_INDEX_SIZE > 1) && !(EXPLICIT_INITIAL_INDEX_SIZE & (EXPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");
+	static_assert((IMPLICIT_INITIAL_INDEX_SIZE > 1) && !(IMPLICIT_INITIAL_INDEX_SIZE & (IMPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");
+	static_assert((INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) || !(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE & (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - 1)), "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2");
+	static_assert(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0 || INITIAL_IMPLICIT_PRODUCER_HASH_SIZE >= 1, "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least 1 (or 0 to disable implicit enqueueing)");
+
+public:
+	// Creates a queue with at least `capacity` element slots; note that the
+	// actual number of elements that can be inserted without additional memory
+	// allocation depends on the number of producers and the block size (e.g. if
+	// the block size is equal to `capacity`, only a single block will be allocated
+	// up-front, which means only a single producer will be able to enqueue elements
+	// without an extra allocation -- blocks aren't shared between producers).
+	// This method is not thread safe -- it is up to the user to ensure that the
+	// queue is fully constructed before it starts being used by other threads (this
+	// includes making the memory effects of construction visible, possibly with a
+	// memory barrier).
+	explicit ConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE)
+		: producerListTail(nullptr),
+		producerCount(0),
+		initialBlockPoolIndex(0),
+		nextExplicitConsumerId(0),
+		globalExplicitConsumerOffset(0)
+	{
+		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+		populate_initial_implicit_producer_hash();
+		populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1));
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		// Track all the producers using a fully-resolved typed list for
+		// each kind; this makes it possible to debug them starting from
+		// the root queue object (otherwise wacky casts are needed that
+		// don't compile in the debugger's expression evaluator).
+		explicitProducers.store(nullptr, std::memory_order_relaxed);
+		implicitProducers.store(nullptr, std::memory_order_relaxed);
+#endif
+	}
+	
+	// Computes the correct amount of pre-allocated blocks for you based
+	// on the minimum number of elements you want available at any given
+	// time, and the maximum concurrent number of each type of producer.
+	ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
+		: producerListTail(nullptr),
+		producerCount(0),
+		initialBlockPoolIndex(0),
+		nextExplicitConsumerId(0),
+		globalExplicitConsumerOffset(0)
+	{
+		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+		populate_initial_implicit_producer_hash();
+		size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) + 2 * (maxExplicitProducers + maxImplicitProducers);
+		populate_initial_block_list(blocks);
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		explicitProducers.store(nullptr, std::memory_order_relaxed);
+		implicitProducers.store(nullptr, std::memory_order_relaxed);
+#endif
+	}
+	
+	// Note: The queue should not be accessed concurrently while it's
+	// being deleted. It's up to the user to synchronize this.
+	// This method is not thread safe.
+	~ConcurrentQueue()
+	{
+		// Destroy producers
+		auto ptr = producerListTail.load(std::memory_order_relaxed);
+		while (ptr != nullptr) {
+			auto next = ptr->next_prod();
+			if (ptr->token != nullptr) {
+				ptr->token->producer = nullptr;
+			}
+			destroy(ptr);
+			ptr = next;
+		}
+		
+		// Destroy implicit producer hash tables
+		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) {
+			auto hash = implicitProducerHash.load(std::memory_order_relaxed);
+			while (hash != nullptr) {
+				auto prev = hash->prev;
+				if (prev != nullptr) {		// The last hash is part of this object and was not allocated dynamically
+					for (size_t i = 0; i != hash->capacity; ++i) {
+						hash->entries[i].~ImplicitProducerKVP();
+					}
+					hash->~ImplicitProducerHash();
+					(Traits::free)(hash);
+				}
+				hash = prev;
+			}
+		}
+		
+		// Destroy global free list
+		auto block = freeList.head_unsafe();
+		while (block != nullptr) {
+			auto next = block->freeListNext.load(std::memory_order_relaxed);
+			if (block->dynamicallyAllocated) {
+				destroy(block);
+			}
+			block = next;
+		}
+		
+		// Destroy initial free list
+		destroy_array(initialBlockPool, initialBlockPoolSize);
+	}
+
+	// Disable copying and copy assignment
+	ConcurrentQueue(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
+	ConcurrentQueue& operator=(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
+	
+	// Moving is supported, but note that it is *not* a thread-safe operation.
+	// Nobody can use the queue while it's being moved, and the memory effects
+	// of that move must be propagated to other threads before they can use it.
+	// Note: When a queue is moved, its tokens are still valid but can only be
+	// used with the destination queue (i.e. semantically they are moved along
+	// with the queue itself).
+	ConcurrentQueue(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
+		: producerListTail(other.producerListTail.load(std::memory_order_relaxed)),
+		producerCount(other.producerCount.load(std::memory_order_relaxed)),
+		initialBlockPoolIndex(other.initialBlockPoolIndex.load(std::memory_order_relaxed)),
+		initialBlockPool(other.initialBlockPool),
+		initialBlockPoolSize(other.initialBlockPoolSize),
+		freeList(std::move(other.freeList)),
+		nextExplicitConsumerId(other.nextExplicitConsumerId.load(std::memory_order_relaxed)),
+		globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load(std::memory_order_relaxed))
+	{
+		// Move the other one into this, and leave the other one as an empty queue
+		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+		populate_initial_implicit_producer_hash();
+		swap_implicit_producer_hashes(other);
+		
+		other.producerListTail.store(nullptr, std::memory_order_relaxed);
+		other.producerCount.store(0, std::memory_order_relaxed);
+		other.nextExplicitConsumerId.store(0, std::memory_order_relaxed);
+		other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed);
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		explicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
+		other.explicitProducers.store(nullptr, std::memory_order_relaxed);
+		implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
+		other.implicitProducers.store(nullptr, std::memory_order_relaxed);
+#endif
+		
+		other.initialBlockPoolIndex.store(0, std::memory_order_relaxed);
+		other.initialBlockPoolSize = 0;
+		other.initialBlockPool = nullptr;
+		
+		reown_producers();
+	}
+	
+	inline ConcurrentQueue& operator=(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
+	{
+		return swap_internal(other);
+	}
+	
+	// Swaps this queue's state with the other's. Not thread-safe.
+	// Swapping two queues does not invalidate their tokens, however
+	// the tokens that were created for one queue must be used with
+	// only the swapped queue (i.e. the tokens are tied to the
+	// queue's movable state, not the object itself).
+	inline void swap(ConcurrentQueue& other) MOODYCAMEL_NOEXCEPT
+	{
+		swap_internal(other);
+	}
+	
+private:
+	ConcurrentQueue& swap_internal(ConcurrentQueue& other)
+	{
+		if (this == &other) {
+			return *this;
+		}
+		
+		details::swap_relaxed(producerListTail, other.producerListTail);
+		details::swap_relaxed(producerCount, other.producerCount);
+		details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex);
+		std::swap(initialBlockPool, other.initialBlockPool);
+		std::swap(initialBlockPoolSize, other.initialBlockPoolSize);
+		freeList.swap(other.freeList);
+		details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId);
+		details::swap_relaxed(globalExplicitConsumerOffset, other.globalExplicitConsumerOffset);
+		
+		swap_implicit_producer_hashes(other);
+		
+		reown_producers();
+		other.reown_producers();
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		details::swap_relaxed(explicitProducers, other.explicitProducers);
+		details::swap_relaxed(implicitProducers, other.implicitProducers);
+#endif
+		
+		return *this;
+	}
+	
+public:
+	// Enqueues a single item (by copying it).
+	// Allocates memory if required. Only fails if memory allocation fails (or implicit
+	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
+	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(T const& item)
+	{
+		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		return inner_enqueue<CanAlloc>(item);
+	}
+	
+	// Enqueues a single item (by moving it, if possible).
+	// Allocates memory if required. Only fails if memory allocation fails (or implicit
+	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
+	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(T&& item)
+	{
+		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		return inner_enqueue<CanAlloc>(std::move(item));
+	}
+	
+	// Enqueues a single item (by copying it) using an explicit producer token.
+	// Allocates memory if required. Only fails if memory allocation fails (or
+	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(producer_token_t const& token, T const& item)
+	{
+		return inner_enqueue<CanAlloc>(token, item);
+	}
+	
+	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
+	// Allocates memory if required. Only fails if memory allocation fails (or
+	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(producer_token_t const& token, T&& item)
+	{
+		return inner_enqueue<CanAlloc>(token, std::move(item));
+	}
+	
+	// Enqueues several items.
+	// Allocates memory if required. Only fails if memory allocation fails (or
+	// implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+	// is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Note: Use std::make_move_iterator if the elements should be moved instead of copied.
+	// Thread-safe.
+	template<typename It>
+	bool enqueue_bulk(It itemFirst, size_t count)
+	{
+		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		return inner_enqueue_bulk<CanAlloc>(itemFirst, count);
+	}
+	
+	// Enqueues several items using an explicit producer token.
+	// Allocates memory if required. Only fails if memory allocation fails
+	// (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Note: Use std::make_move_iterator if the elements should be moved
+	// instead of copied.
+	// Thread-safe.
+	template<typename It>
+	bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+	{
+		return inner_enqueue_bulk<CanAlloc>(token, itemFirst, count);
+	}
+	
+	// Enqueues a single item (by copying it).
+	// Does not allocate memory. Fails if not enough room to enqueue (or implicit
+	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+	// is 0).
+	// Thread-safe.
+	inline bool try_enqueue(T const& item)
+	{
+		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		return inner_enqueue<CannotAlloc>(item);
+	}
+	
+	// Enqueues a single item (by moving it, if possible).
+	// Does not allocate memory (except for one-time implicit producer).
+	// Fails if not enough room to enqueue (or implicit production is
+	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
+	// Thread-safe.
+	inline bool try_enqueue(T&& item)
+	{
+		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		return inner_enqueue<CannotAlloc>(std::move(item));
+	}
+	
+	// Enqueues a single item (by copying it) using an explicit producer token.
+	// Does not allocate memory. Fails if not enough room to enqueue.
+	// Thread-safe.
+	inline bool try_enqueue(producer_token_t const& token, T const& item)
+	{
+		return inner_enqueue<CannotAlloc>(token, item);
+	}
+	
+	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
+	// Does not allocate memory. Fails if not enough room to enqueue.
+	// Thread-safe.
+	inline bool try_enqueue(producer_token_t const& token, T&& item)
+	{
+		return inner_enqueue<CannotAlloc>(token, std::move(item));
+	}
+	
+	// Enqueues several items.
+	// Does not allocate memory (except for one-time implicit producer).
+	// Fails if not enough room to enqueue (or implicit production is
+	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
+	// Note: Use std::make_move_iterator if the elements should be moved
+	// instead of copied.
+	// Thread-safe.
+	template<typename It>
+	bool try_enqueue_bulk(It itemFirst, size_t count)
+	{
+		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		return inner_enqueue_bulk<CannotAlloc>(itemFirst, count);
+	}
+	
+	// Enqueues several items using an explicit producer token.
+	// Does not allocate memory. Fails if not enough room to enqueue.
+	// Note: Use std::make_move_iterator if the elements should be moved
+	// instead of copied.
+	// Thread-safe.
+	template<typename It>
+	bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+	{
+		return inner_enqueue_bulk<CannotAlloc>(token, itemFirst, count);
+	}
+	
+	
+	
+	// Attempts to dequeue from the queue.
+	// Returns false if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename U>
+	bool try_dequeue(U& item)
+	{
+		// Instead of simply trying each producer in turn (which could cause needless contention on the first
+		// producer), we score them heuristically.
+		size_t nonEmptyCount = 0;
+		ProducerBase* best = nullptr;
+		size_t bestSize = 0;
+		for (auto ptr = producerListTail.load(std::memory_order_acquire); nonEmptyCount < 3 && ptr != nullptr; ptr = ptr->next_prod()) {
+			auto size = ptr->size_approx();
+			if (size > 0) {
+				if (size > bestSize) {
+					bestSize = size;
+					best = ptr;
+				}
+				++nonEmptyCount;
+			}
+		}
+		
+		// If there was at least one non-empty queue but it appears empty at the time
+		// we try to dequeue from it, we need to make sure every queue's been tried
+		if (nonEmptyCount > 0) {
+			if ((details::likely)(best->dequeue(item))) {
+				return true;
+			}
+			for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+				if (ptr != best && ptr->dequeue(item)) {
+					return true;
+				}
+			}
+		}
+		return false;
+	}
+	
+	// Attempts to dequeue from the queue.
+	// Returns false if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// This differs from the try_dequeue(item) method in that this one does
+	// not attempt to reduce contention by interleaving the order that producer
+	// streams are dequeued from. So, using this method can reduce overall throughput
+	// under contention, but will give more predictable results in single-threaded
+	// consumer scenarios. This is mostly only useful for internal unit tests.
+	// Never allocates. Thread-safe.
+	template<typename U>
+	bool try_dequeue_non_interleaved(U& item)
+	{
+		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+			if (ptr->dequeue(item)) {
+				return true;
+			}
+		}
+		return false;
+	}
+	
+	// Attempts to dequeue from the queue using an explicit consumer token.
+	// Returns false if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename U>
+	bool try_dequeue(consumer_token_t& token, U& item)
+	{
+		// The idea is roughly as follows:
+		// Every 256 items from one producer, make everyone rotate (increase the global offset) -> this means the highest efficiency consumer dictates the rotation speed of everyone else, more or less
+		// If you see that the global offset has changed, you must reset your consumption counter and move to your designated place
+		// If there's no items where you're supposed to be, keep moving until you find a producer with some items
+		// If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in it
+		
+		if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
+			if (!update_current_producer_after_rotation(token)) {
+				return false;
+			}
+		}
+		
+		// If there was at least one non-empty queue but it appears empty at the time
+		// we try to dequeue from it, we need to make sure every queue's been tried
+		if (static_cast<ProducerBase*>(token.currentProducer)->dequeue(item)) {
+			if (++token.itemsConsumedFromCurrent == EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
+				globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
+			}
+			return true;
+		}
+		
+		auto tail = producerListTail.load(std::memory_order_acquire);
+		auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();
+		if (ptr == nullptr) {
+			ptr = tail;
+		}
+		while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {
+			if (ptr->dequeue(item)) {
+				token.currentProducer = ptr;
+				token.itemsConsumedFromCurrent = 1;
+				return true;
+			}
+			ptr = ptr->next_prod();
+			if (ptr == nullptr) {
+				ptr = tail;
+			}
+		}
+		return false;
+	}
+	
+	// Attempts to dequeue several elements from the queue.
+	// Returns the number of items actually dequeued.
+	// Returns 0 if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename It>
+	size_t try_dequeue_bulk(It itemFirst, size_t max)
+	{
+		size_t count = 0;
+		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+			count += ptr->dequeue_bulk(itemFirst, max - count);
+			if (count == max) {
+				break;
+			}
+		}
+		return count;
+	}
+	
+	// Attempts to dequeue several elements from the queue using an explicit consumer token.
+	// Returns the number of items actually dequeued.
+	// Returns 0 if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename It>
+	size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
+	{
+		if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
+			if (!update_current_producer_after_rotation(token)) {
+				return 0;
+			}
+		}
+		
+		size_t count = static_cast<ProducerBase*>(token.currentProducer)->dequeue_bulk(itemFirst, max);
+		if (count == max) {
+			if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
+				globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
+			}
+			return max;
+		}
+		token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count);
+		max -= count;
+		
+		auto tail = producerListTail.load(std::memory_order_acquire);
+		auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();
+		if (ptr == nullptr) {
+			ptr = tail;
+		}
+		while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {
+			auto dequeued = ptr->dequeue_bulk(itemFirst, max);
+			count += dequeued;
+			if (dequeued != 0) {
+				token.currentProducer = ptr;
+				token.itemsConsumedFromCurrent = static_cast<std::uint32_t>(dequeued);
+			}
+			if (dequeued == max) {
+				break;
+			}
+			max -= dequeued;
+			ptr = ptr->next_prod();
+			if (ptr == nullptr) {
+				ptr = tail;
+			}
+		}
+		return count;
+	}
+	
+	
+	
+	// Attempts to dequeue from a specific producer's inner queue.
+	// If you happen to know which producer you want to dequeue from, this
+	// is significantly faster than using the general-case try_dequeue methods.
+	// Returns false if the producer's queue appeared empty at the time it
+	// was checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename U>
+	inline bool try_dequeue_from_producer(producer_token_t const& producer, U& item)
+	{
+		return static_cast<ExplicitProducer*>(producer.producer)->dequeue(item);
+	}
+	
+	// Attempts to dequeue several elements from a specific producer's inner queue.
+	// Returns the number of items actually dequeued.
+	// If you happen to know which producer you want to dequeue from, this
+	// is significantly faster than using the general-case try_dequeue methods.
+	// Returns 0 if the producer's queue appeared empty at the time it
+	// was checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename It>
+	inline size_t try_dequeue_bulk_from_producer(producer_token_t const& producer, It itemFirst, size_t max)
+	{
+		return static_cast<ExplicitProducer*>(producer.producer)->dequeue_bulk(itemFirst, max);
+	}
+	
+	
+	// Returns an estimate of the total number of elements currently in the queue. This
+	// estimate is only accurate if the queue has completely stabilized before it is called
+	// (i.e. all enqueue and dequeue operations have completed and their memory effects are
+	// visible on the calling thread, and no further operations start while this method is
+	// being called).
+	// Thread-safe.
+	size_t size_approx() const
+	{
+		size_t size = 0;
+		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+			size += ptr->size_approx();
+		}
+		return size;
+	}
+	
+	
+	// Returns true if the underlying atomic variables used by
+	// the queue are lock-free (they should be on most platforms).
+	// Thread-safe.
+	static bool is_lock_free()
+	{
+		return
+			details::static_is_lock_free<bool>::value == 2 &&
+			details::static_is_lock_free<size_t>::value == 2 &&
+			details::static_is_lock_free<std::uint32_t>::value == 2 &&
+			details::static_is_lock_free<index_t>::value == 2 &&
+			details::static_is_lock_free<void*>::value == 2 &&
+			details::static_is_lock_free<typename details::thread_id_converter<details::thread_id_t>::thread_id_numeric_size_t>::value == 2;
+	}
+
+
+private:
+	friend struct ProducerToken;
+	friend struct ConsumerToken;
+	struct ExplicitProducer;
+	friend struct ExplicitProducer;
+	struct ImplicitProducer;
+	friend struct ImplicitProducer;
+	friend class ConcurrentQueueTests;
+		
+	enum AllocationMode { CanAlloc, CannotAlloc };
+	
+	
+	///////////////////////////////
+	// Queue methods
+	///////////////////////////////
+	
+	template<AllocationMode canAlloc, typename U>
+	inline bool inner_enqueue(producer_token_t const& token, U&& element)
+	{
+		return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));
+	}
+	
+	template<AllocationMode canAlloc, typename U>
+	inline bool inner_enqueue(U&& element)
+	{
+		auto producer = get_or_add_implicit_producer();
+		return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));
+	}
+	
+	template<AllocationMode canAlloc, typename It>
+	inline bool inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+	{
+		return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);
+	}
+	
+	template<AllocationMode canAlloc, typename It>
+	inline bool inner_enqueue_bulk(It itemFirst, size_t count)
+	{
+		auto producer = get_or_add_implicit_producer();
+		return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);
+	}
+	
+	inline bool update_current_producer_after_rotation(consumer_token_t& token)
+	{
+		// Ah, there's been a rotation, figure out where we should be!
+		auto tail = producerListTail.load(std::memory_order_acquire);
+		if (token.desiredProducer == nullptr && tail == nullptr) {
+			return false;
+		}
+		auto prodCount = producerCount.load(std::memory_order_relaxed);
+		auto globalOffset = globalExplicitConsumerOffset.load(std::memory_order_relaxed);
+		if ((details::unlikely)(token.desiredProducer == nullptr)) {
+			// Aha, first time we're dequeueing anything.
+			// Figure out our local position
+			// Note: offset is from start, not end, but we're traversing from end -- subtract from count first
+			std::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount);
+			token.desiredProducer = tail;
+			for (std::uint32_t i = 0; i != offset; ++i) {
+				token.desiredProducer = static_cast<ProducerBase*>(token.desiredProducer)->next_prod();
+				if (token.desiredProducer == nullptr) {
+					token.desiredProducer = tail;
+				}
+			}
+		}
+		
+		std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset;
+		if (delta >= prodCount) {
+			delta = delta % prodCount;
+		}
+		for (std::uint32_t i = 0; i != delta; ++i) {
+			token.desiredProducer = static_cast<ProducerBase*>(token.desiredProducer)->next_prod();
+			if (token.desiredProducer == nullptr) {
+				token.desiredProducer = tail;
+			}
+		}
+		
+		token.lastKnownGlobalOffset = globalOffset;
+		token.currentProducer = token.desiredProducer;
+		token.itemsConsumedFromCurrent = 0;
+		return true;
+	}
+	
+	
+	///////////////////////////
+	// Free list
+	///////////////////////////
+	
+	template <typename N>
+	struct FreeListNode
+	{
+		FreeListNode() : freeListRefs(0), freeListNext(nullptr) { }
+		
+		std::atomic<std::uint32_t> freeListRefs;
+		std::atomic<N*> freeListNext;
+	};
+	
+	// A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but
+	// simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly
+	// speedy under low contention.
+	template<typename N>		// N must inherit FreeListNode or have the same fields (and initialization of them)
+	struct FreeList
+	{
+		FreeList() : freeListHead(nullptr) { }
+		FreeList(FreeList&& other) : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) { other.freeListHead.store(nullptr, std::memory_order_relaxed); }
+		void swap(FreeList& other) { details::swap_relaxed(freeListHead, other.freeListHead); }
+		
+		FreeList(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;
+		FreeList& operator=(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;
+		
+		inline void add(N* node)
+		{
+#if MCDBGQ_NOLOCKFREE_FREELIST
+			debug::DebugLock lock(mutex);
+#endif		
+			// We know that the should-be-on-freelist bit is 0 at this point, so it's safe to
+			// set it using a fetch_add
+			if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel) == 0) {
+				// Oh look! We were the last ones referencing this node, and we know
+				// we want to add it to the free list, so let's do it!
+		 		add_knowing_refcount_is_zero(node);
+			}
+		}
+		
+		inline N* try_get()
+		{
+#if MCDBGQ_NOLOCKFREE_FREELIST
+			debug::DebugLock lock(mutex);
+#endif		
+			auto head = freeListHead.load(std::memory_order_acquire);
+			while (head != nullptr) {
+				auto prevHead = head;
+				auto refs = head->freeListRefs.load(std::memory_order_relaxed);
+				if ((refs & REFS_MASK) == 0 || !head->freeListRefs.compare_exchange_strong(refs, refs + 1, std::memory_order_acquire, std::memory_order_relaxed)) {
+					head = freeListHead.load(std::memory_order_acquire);
+					continue;
+				}
+				
+				// Good, reference count has been incremented (it wasn't at zero), which means we can read the
+				// next and not worry about it changing between now and the time we do the CAS
+				auto next = head->freeListNext.load(std::memory_order_relaxed);
+				if (freeListHead.compare_exchange_strong(head, next, std::memory_order_acquire, std::memory_order_relaxed)) {
+					// Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no
+					// matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on).
+					assert((head->freeListRefs.load(std::memory_order_relaxed) & SHOULD_BE_ON_FREELIST) == 0);
+					
+					// Decrease refcount twice, once for our ref, and once for the list's ref
+					head->freeListRefs.fetch_sub(2, std::memory_order_release);
+					return head;
+				}
+				
+				// OK, the head must have changed on us, but we still need to decrease the refcount we increased.
+				// Note that we don't need to release any memory effects, but we do need to ensure that the reference
+				// count decrement happens-after the CAS on the head.
+				refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel);
+				if (refs == SHOULD_BE_ON_FREELIST + 1) {
+					add_knowing_refcount_is_zero(prevHead);
+				}
+			}
+			
+			return nullptr;
+		}
+		
+		// Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes)
+		N* head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); }
+		
+	private:
+		inline void add_knowing_refcount_is_zero(N* node)
+		{
+			// Since the refcount is zero, and nobody can increase it once it's zero (except us, and we run
+			// only one copy of this method per node at a time, i.e. the single thread case), then we know
+			// we can safely change the next pointer of the node; however, once the refcount is back above
+			// zero, then other threads could increase it (happens under heavy contention, when the refcount
+			// goes to zero in between a load and a refcount increment of a node in try_get, then back up to
+			// something non-zero, then the refcount increment is done by the other thread) -- so, if the CAS
+			// to add the node to the actual list fails, decrease the refcount and leave the add operation to
+			// the next thread who puts the refcount back at zero (which could be us, hence the loop).
+			auto head = freeListHead.load(std::memory_order_relaxed);
+			while (true) {
+				node->freeListNext.store(head, std::memory_order_relaxed);
+				node->freeListRefs.store(1, std::memory_order_release);
+				if (!freeListHead.compare_exchange_strong(head, node, std::memory_order_release, std::memory_order_relaxed)) {
+					// Hmm, the add failed, but we can only try again when the refcount goes back to zero
+					if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1, std::memory_order_release) == 1) {
+						continue;
+					}
+				}
+				return;
+			}
+		}
+		
+	private:
+		// Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention)
+		std::atomic<N*> freeListHead;
+	
+	static const std::uint32_t REFS_MASK = 0x7FFFFFFF;
+	static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000;
+		
+#if MCDBGQ_NOLOCKFREE_FREELIST
+		debug::DebugMutex mutex;
+#endif
+	};
+	
+	
+	///////////////////////////
+	// Block
+	///////////////////////////
+	
+	enum InnerQueueContext { implicit_context = 0, explicit_context = 1 };
+	
+	struct Block
+	{
+		Block()
+			: next(nullptr), elementsCompletelyDequeued(0), freeListRefs(0), freeListNext(nullptr), shouldBeOnFreeList(false), dynamicallyAllocated(true)
+		{
+#if MCDBGQ_TRACKMEM
+			owner = nullptr;
+#endif
+		}
+		
+		template<InnerQueueContext context>
+		inline bool is_empty() const
+		{
+			if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+				// Check flags
+				for (size_t i = 0; i < BLOCK_SIZE; ++i) {
+					if (!emptyFlags[i].load(std::memory_order_relaxed)) {
+						return false;
+					}
+				}
+				
+				// Aha, empty; make sure we have all other memory effects that happened before the empty flags were set
+				std::atomic_thread_fence(std::memory_order_acquire);
+				return true;
+			}
+			else {
+				// Check counter
+				if (elementsCompletelyDequeued.load(std::memory_order_relaxed) == BLOCK_SIZE) {
+					std::atomic_thread_fence(std::memory_order_acquire);
+					return true;
+				}
+				assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <= BLOCK_SIZE);
+				return false;
+			}
+		}
+		
+		// Returns true if the block is now empty (does not apply in explicit context)
+		template<InnerQueueContext context>
+		inline bool set_empty(index_t i)
+		{
+			if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+				// Set flag
+				assert(!emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].load(std::memory_order_relaxed));
+				emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].store(true, std::memory_order_release);
+				return false;
+			}
+			else {
+				// Increment counter
+				auto prevVal = elementsCompletelyDequeued.fetch_add(1, std::memory_order_release);
+				assert(prevVal < BLOCK_SIZE);
+				return prevVal == BLOCK_SIZE - 1;
+			}
+		}
+		
+		// Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0).
+		// Returns true if the block is now empty (does not apply in explicit context).
+		template<InnerQueueContext context>
+		inline bool set_many_empty(index_t i, size_t count)
+		{
+			if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+				// Set flags
+				std::atomic_thread_fence(std::memory_order_release);
+				i = BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1)) - count + 1;
+				for (size_t j = 0; j != count; ++j) {
+					assert(!emptyFlags[i + j].load(std::memory_order_relaxed));
+					emptyFlags[i + j].store(true, std::memory_order_relaxed);
+				}
+				return false;
+			}
+			else {
+				// Increment counter
+				auto prevVal = elementsCompletelyDequeued.fetch_add(count, std::memory_order_release);
+				assert(prevVal + count <= BLOCK_SIZE);
+				return prevVal + count == BLOCK_SIZE;
+			}
+		}
+		
+		template<InnerQueueContext context>
+		inline void set_all_empty()
+		{
+			if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+				// Set all flags
+				for (size_t i = 0; i != BLOCK_SIZE; ++i) {
+					emptyFlags[i].store(true, std::memory_order_relaxed);
+				}
+			}
+			else {
+				// Reset counter
+				elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed);
+			}
+		}
+		
+		template<InnerQueueContext context>
+		inline void reset_empty()
+		{
+			if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+				// Reset flags
+				for (size_t i = 0; i != BLOCK_SIZE; ++i) {
+					emptyFlags[i].store(false, std::memory_order_relaxed);
+				}
+			}
+			else {
+				// Reset counter
+				elementsCompletelyDequeued.store(0, std::memory_order_relaxed);
+			}
+		}
+		
+		inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT { return static_cast<T*>(static_cast<void*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
+		inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return static_cast<T const*>(static_cast<void const*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
+		
+	private:
+		// IMPORTANT: This must be the first member in Block, so that if T depends on the alignment of
+		// addresses returned by malloc, that alignment will be preserved. Apparently clang actually
+		// generates code that uses this assumption for AVX instructions in some cases. Ideally, we
+		// should also align Block to the alignment of T in case it's higher than malloc's 16-byte
+		// alignment, but this is hard to do in a cross-platform way. Assert for this case:
+		static_assert(std::alignment_of<T>::value <= std::alignment_of<details::max_align_t>::value, "The queue does not support super-aligned types at this time");
+		// Additionally, we need the alignment of Block itself to be a multiple of max_align_t since
+		// otherwise the appropriate padding will not be added at the end of Block in order to make
+		// arrays of Blocks all be properly aligned (not just the first one). We use a union to force
+		// this.
+		union {
+			char elements[sizeof(T) * BLOCK_SIZE];
+			details::max_align_t dummy;
+		};
+	public:
+		Block* next;
+		std::atomic<size_t> elementsCompletelyDequeued;
+		std::atomic<bool> emptyFlags[BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1];
+	public:
+		std::atomic<std::uint32_t> freeListRefs;
+		std::atomic<Block*> freeListNext;
+		std::atomic<bool> shouldBeOnFreeList;
+		bool dynamicallyAllocated;		// Perhaps a better name for this would be 'isNotPartOfInitialBlockPool'
+		
+#if MCDBGQ_TRACKMEM
+		void* owner;
+#endif
+	};
+	static_assert(std::alignment_of<Block>::value >= std::alignment_of<details::max_align_t>::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping");
+
+
+#if MCDBGQ_TRACKMEM
+public:
+	struct MemStats;
+private:
+#endif
+	
+	///////////////////////////
+	// Producer base
+	///////////////////////////
+	
+	struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase
+	{
+		ProducerBase(ConcurrentQueue* parent_, bool isExplicit_) :
+			tailIndex(0),
+			headIndex(0),
+			dequeueOptimisticCount(0),
+			dequeueOvercommit(0),
+			tailBlock(nullptr),
+			isExplicit(isExplicit_),
+			parent(parent_)
+		{
+		}
+		
+		virtual ~ProducerBase() { };
+		
+		template<typename U>
+		inline bool dequeue(U& element)
+		{
+			if (isExplicit) {
+				return static_cast<ExplicitProducer*>(this)->dequeue(element);
+			}
+			else {
+				return static_cast<ImplicitProducer*>(this)->dequeue(element);
+			}
+		}
+		
+		template<typename It>
+		inline size_t dequeue_bulk(It& itemFirst, size_t max)
+		{
+			if (isExplicit) {
+				return static_cast<ExplicitProducer*>(this)->dequeue_bulk(itemFirst, max);
+			}
+			else {
+				return static_cast<ImplicitProducer*>(this)->dequeue_bulk(itemFirst, max);
+			}
+		}
+		
+		inline ProducerBase* next_prod() const { return static_cast<ProducerBase*>(next); }
+		
+		inline size_t size_approx() const
+		{
+			auto tail = tailIndex.load(std::memory_order_relaxed);
+			auto head = headIndex.load(std::memory_order_relaxed);
+			return details::circular_less_than(head, tail) ? static_cast<size_t>(tail - head) : 0;
+		}
+		
+		inline index_t getTail() const { return tailIndex.load(std::memory_order_relaxed); }
+	protected:
+		std::atomic<index_t> tailIndex;		// Where to enqueue to next
+		std::atomic<index_t> headIndex;		// Where to dequeue from next
+		
+		std::atomic<index_t> dequeueOptimisticCount;
+		std::atomic<index_t> dequeueOvercommit;
+		
+		Block* tailBlock;
+		
+	public:
+		bool isExplicit;
+		ConcurrentQueue* parent;
+		
+	protected:
+#if MCDBGQ_TRACKMEM
+		friend struct MemStats;
+#endif
+	};
+	
+	
+	///////////////////////////
+	// Explicit queue
+	///////////////////////////
+		
+	struct ExplicitProducer : public ProducerBase
+	{
+		explicit ExplicitProducer(ConcurrentQueue* parent) :
+			ProducerBase(parent, true),
+			blockIndex(nullptr),
+			pr_blockIndexSlotsUsed(0),
+			pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1),
+			pr_blockIndexFront(0),
+			pr_blockIndexEntries(nullptr),
+			pr_blockIndexRaw(nullptr)
+		{
+			size_t poolBasedIndexSize = details::ceil_to_pow_2(parent->initialBlockPoolSize) >> 1;
+			if (poolBasedIndexSize > pr_blockIndexSize) {
+				pr_blockIndexSize = poolBasedIndexSize;
+			}
+			
+			new_block_index(0);		// This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE
+		}
+		
+		~ExplicitProducer()
+		{
+			// Destruct any elements not yet dequeued.
+			// Since we're in the destructor, we can assume all elements
+			// are either completely dequeued or completely not (no halfways).
+			if (this->tailBlock != nullptr) {		// Note this means there must be a block index too
+				// First find the block that's partially dequeued, if any
+				Block* halfDequeuedBlock = nullptr;
+				if ((this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) != 0) {
+					// The head's not on a block boundary, meaning a block somewhere is partially dequeued
+					// (or the head block is the tail block and was fully dequeued, but the head/tail are still not on a boundary)
+					size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & (pr_blockIndexSize - 1);
+					while (details::circular_less_than<index_t>(pr_blockIndexEntries[i].base + BLOCK_SIZE, this->headIndex.load(std::memory_order_relaxed))) {
+						i = (i + 1) & (pr_blockIndexSize - 1);
+					}
+					assert(details::circular_less_than<index_t>(pr_blockIndexEntries[i].base, this->headIndex.load(std::memory_order_relaxed)));
+					halfDequeuedBlock = pr_blockIndexEntries[i].block;
+				}
+				
+				// Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration)
+				auto block = this->tailBlock;
+				do {
+					block = block->next;
+					if (block->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
+						continue;
+					}
+					
+					size_t i = 0;	// Offset into block
+					if (block == halfDequeuedBlock) {
+						i = static_cast<size_t>(this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));
+					}
+					
+					// Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index
+					auto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 ? BLOCK_SIZE : static_cast<size_t>(this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));
+					while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) {
+						(*block)[i++]->~T();
+					}
+				} while (block != this->tailBlock);
+			}
+			
+			// Destroy all blocks that we own
+			if (this->tailBlock != nullptr) {
+				auto block = this->tailBlock;
+				do {
+					auto nextBlock = block->next;
+					if (block->dynamicallyAllocated) {
+						destroy(block);
+					}
+					else {
+						this->parent->add_block_to_free_list(block);
+					}
+					block = nextBlock;
+				} while (block != this->tailBlock);
+			}
+			
+			// Destroy the block indices
+			auto header = static_cast<BlockIndexHeader*>(pr_blockIndexRaw);
+			while (header != nullptr) {
+				auto prev = static_cast<BlockIndexHeader*>(header->prev);
+				header->~BlockIndexHeader();
+				(Traits::free)(header);
+				header = prev;
+			}
+		}
+		
+		template<AllocationMode allocMode, typename U>
+		inline bool enqueue(U&& element)
+		{
+			index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
+			index_t newTailIndex = 1 + currentTailIndex;
+			if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
+				// We reached the end of a block, start a new one
+				auto startBlock = this->tailBlock;
+				auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
+				if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
+					// We can re-use the block ahead of us, it's empty!					
+					this->tailBlock = this->tailBlock->next;
+					this->tailBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
+					
+					// We'll put the block on the block index (guaranteed to be room since we're conceptually removing the
+					// last block from it first -- except instead of removing then adding, we can just overwrite).
+					// Note that there must be a valid block index here, since even if allocation failed in the ctor,
+					// it would have been re-attempted when adding the first block to the queue; since there is such
+					// a block, a block index must have been successfully allocated.
+				}
+				else {
+					// Whatever head value we see here is >= the last value we saw here (relatively),
+					// and <= its current value. Since we have the most recent tail, the head must be
+					// <= to it.
+					auto head = this->headIndex.load(std::memory_order_relaxed);
+					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
+					if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE)
+						|| (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
+						// We can't enqueue in another block because there's not enough leeway -- the
+						// tail could surpass the head by the time the block fills up! (Or we'll exceed
+						// the size limit, if the second part of the condition was true.)
+						return false;
+					}
+					// We're going to need a new block; check that the block index has room
+					if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize) {
+						// Hmm, the circular block index is already full -- we'll need
+						// to allocate a new index. Note pr_blockIndexRaw can only be nullptr if
+						// the initial allocation failed in the constructor.
+						
+						if (allocMode == CannotAlloc || !new_block_index(pr_blockIndexSlotsUsed)) {
+							return false;
+						}
+					}
+					
+					// Insert a new block in the circular linked list
+					auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
+					if (newBlock == nullptr) {
+						return false;
+					}
+#if MCDBGQ_TRACKMEM
+					newBlock->owner = this;
+#endif
+					newBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
+					if (this->tailBlock == nullptr) {
+						newBlock->next = newBlock;
+					}
+					else {
+						newBlock->next = this->tailBlock->next;
+						this->tailBlock->next = newBlock;
+					}
+					this->tailBlock = newBlock;
+					++pr_blockIndexSlotsUsed;
+				}
+				
+				if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {
+					// The constructor may throw. We want the element not to appear in the queue in
+					// that case (without corrupting the queue):
+					MOODYCAMEL_TRY {
+						new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
+					}
+					MOODYCAMEL_CATCH (...) {
+						// Revert change to the current block, but leave the new block available
+						// for next time
+						pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+						this->tailBlock = startBlock == nullptr ? this->tailBlock : startBlock;
+						MOODYCAMEL_RETHROW;
+					}
+				}
+				else {
+					(void)startBlock;
+					(void)originalBlockIndexSlotsUsed;
+				}
+				
+				// Add block to block index
+				auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
+				entry.base = currentTailIndex;
+				entry.block = this->tailBlock;
+				blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release);
+				pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
+				
+				if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {
+					this->tailIndex.store(newTailIndex, std::memory_order_release);
+					return true;
+				}
+			}
+			
+			// Enqueue
+			new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
+			
+			this->tailIndex.store(newTailIndex, std::memory_order_release);
+			return true;
+		}
+		
+		template<typename U>
+		bool dequeue(U& element)
+		{
+			auto tail = this->tailIndex.load(std::memory_order_relaxed);
+			auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
+			if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {
+				// Might be something to dequeue, let's give it a try
+				
+				// Note that this if is purely for performance purposes in the common case when the queue is
+				// empty and the values are eventually consistent -- we may enter here spuriously.
+				
+				// Note that whatever the values of overcommit and tail are, they are not going to change (unless we
+				// change them) and must be the same value at this point (inside the if) as when the if condition was
+				// evaluated.
+
+				// We insert an acquire fence here to synchronize-with the release upon incrementing dequeueOvercommit below.
+				// This ensures that whatever the value we got loaded into overcommit, the load of dequeueOptisticCount in
+				// the fetch_add below will result in a value at least as recent as that (and therefore at least as large).
+				// Note that I believe a compiler (signal) fence here would be sufficient due to the nature of fetch_add (all
+				// read-modify-write operations are guaranteed to work on the latest value in the modification order), but
+				// unfortunately that can't be shown to be correct using only the C++11 standard.
+				// See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case
+				std::atomic_thread_fence(std::memory_order_acquire);
+				
+				// Increment optimistic counter, then check if it went over the boundary
+				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
+				
+				// Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever
+				// incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now
+				// have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon
+				// incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount.
+				assert(overcommit <= myDequeueCount);
+				
+				// Note that we reload tail here in case it changed; it will be the same value as before or greater, since
+				// this load is sequenced after (happens after) the earlier load above. This is supported by read-read
+				// coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_order
+				tail = this->tailIndex.load(std::memory_order_acquire);
+				if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {
+					// Guaranteed to be at least one element to dequeue!
+					
+					// Get the index. Note that since there's guaranteed to be at least one element, this
+					// will never exceed tail. We need to do an acquire-release fence here since it's possible
+					// that whatever condition got us to this point was for an earlier enqueued element (that
+					// we already see the memory effects for), but that by the time we increment somebody else
+					// has incremented it, and we need to see the memory effects for *that* element, which is
+					// in such a case is necessarily visible on the thread that incremented it in the first
+					// place with the more current condition (they must have acquired a tail that is at least
+					// as recent).
+					auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);
+					
+					
+					// Determine which block the element is in
+					
+					auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
+					auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
+					
+					// We need to be careful here about subtracting and dividing because of index wrap-around.
+					// When an index wraps, we need to preserve the sign of the offset when dividing it by the
+					// block size (in order to get a correct signed block count offset in all cases):
+					auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
+					auto blockBaseIndex = index & ~static_cast<index_t>(BLOCK_SIZE - 1);
+					auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(blockBaseIndex - headBase) / BLOCK_SIZE);
+					auto block = localBlockIndex->entries[(localBlockIndexHead + offset) & (localBlockIndex->size - 1)].block;
+					
+					// Dequeue
+					auto& el = *((*block)[index]);
+					if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {
+						// Make sure the element is still fully dequeued and destroyed even if the assignment
+						// throws
+						struct Guard {
+							Block* block;
+							index_t index;
+							
+							~Guard()
+							{
+								(*block)[index]->~T();
+								block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
+							}
+						} guard = { block, index };
+						
+						element = std::move(el);
+					}
+					else {
+						element = std::move(el);
+						el.~T();
+						block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
+					}
+					
+					return true;
+				}
+				else {
+					// Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent
+					this->dequeueOvercommit.fetch_add(1, std::memory_order_release);		// Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write
+				}
+			}
+		
+			return false;
+		}
+		
+		template<AllocationMode allocMode, typename It>
+		bool enqueue_bulk(It itemFirst, size_t count)
+		{
+			// First, we need to make sure we have enough room to enqueue all of the elements;
+			// this means pre-allocating blocks and putting them in the block index (but only if
+			// all the allocations succeeded).
+			index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);
+			auto startBlock = this->tailBlock;
+			auto originalBlockIndexFront = pr_blockIndexFront;
+			auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
+			
+			Block* firstAllocatedBlock = nullptr;
+			
+			// Figure out how many blocks we'll need to allocate, and do so
+			size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
+			index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
+			if (blockBaseDiff > 0) {
+				// Allocate as many blocks as possible from ahead
+				while (blockBaseDiff > 0 && this->tailBlock != nullptr && this->tailBlock->next != firstAllocatedBlock && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
+					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
+					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+					
+					this->tailBlock = this->tailBlock->next;
+					firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
+					
+					auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
+					entry.base = currentTailIndex;
+					entry.block = this->tailBlock;
+					pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
+				}
+				
+				// Now allocate as many blocks as necessary from the block pool
+				while (blockBaseDiff > 0) {
+					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
+					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+					
+					auto head = this->headIndex.load(std::memory_order_relaxed);
+					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
+					bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
+					if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize || full) {
+						if (allocMode == CannotAlloc || full || !new_block_index(originalBlockIndexSlotsUsed)) {
+							// Failed to allocate, undo changes (but keep injected blocks)
+							pr_blockIndexFront = originalBlockIndexFront;
+							pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+							this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
+							return false;
+						}
+						
+						// pr_blockIndexFront is updated inside new_block_index, so we need to
+						// update our fallback value too (since we keep the new index even if we
+						// later fail)
+						originalBlockIndexFront = originalBlockIndexSlotsUsed;
+					}
+					
+					// Insert a new block in the circular linked list
+					auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
+					if (newBlock == nullptr) {
+						pr_blockIndexFront = originalBlockIndexFront;
+						pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+						this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
+						return false;
+					}
+					
+#if MCDBGQ_TRACKMEM
+					newBlock->owner = this;
+#endif
+					newBlock->ConcurrentQueue::Block::template set_all_empty<explicit_context>();
+					if (this->tailBlock == nullptr) {
+						newBlock->next = newBlock;
+					}
+					else {
+						newBlock->next = this->tailBlock->next;
+						this->tailBlock->next = newBlock;
+					}
+					this->tailBlock = newBlock;
+					firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
+					
+					++pr_blockIndexSlotsUsed;
+					
+					auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
+					entry.base = currentTailIndex;
+					entry.block = this->tailBlock;
+					pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
+				}
+				
+				// Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and
+				// publish the new block index front
+				auto block = firstAllocatedBlock;
+				while (true) {
+					block->ConcurrentQueue::Block::template reset_empty<explicit_context>();
+					if (block == this->tailBlock) {
+						break;
+					}
+					block = block->next;
+				}
+				
+				if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) {
+					blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
+				}
+			}
+			
+			// Enqueue, one block at a time
+			index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
+			currentTailIndex = startTailIndex;
+			auto endBlock = this->tailBlock;
+			this->tailBlock = startBlock;
+			assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0);
+			if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {
+				this->tailBlock = firstAllocatedBlock;
+			}
+			while (true) {
+				auto stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+				if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
+					stopIndex = newTailIndex;
+				}
+				if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) {
+					while (currentTailIndex != stopIndex) {
+						new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
+					}
+				}
+				else {
+					MOODYCAMEL_TRY {
+						while (currentTailIndex != stopIndex) {
+							// Must use copy constructor even if move constructor is available
+							// because we may have to revert if there's an exception.
+							// Sorry about the horrible templated next line, but it was the only way
+							// to disable moving *at compile time*, which is important because a type
+							// may only define a (noexcept) move constructor, and so calls to the
+							// cctor will not compile, even if they are in an if branch that will never
+							// be executed
+							new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
+							++currentTailIndex;
+							++itemFirst;
+						}
+					}
+					MOODYCAMEL_CATCH (...) {
+						// Oh dear, an exception's been thrown -- destroy the elements that
+						// were enqueued so far and revert the entire bulk operation (we'll keep
+						// any allocated blocks in our linked list for later, though).
+						auto constructedStopIndex = currentTailIndex;
+						auto lastBlockEnqueued = this->tailBlock;
+						
+						pr_blockIndexFront = originalBlockIndexFront;
+						pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+						this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
+						
+						if (!details::is_trivially_destructible<T>::value) {
+							auto block = startBlock;
+							if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
+								block = firstAllocatedBlock;
+							}
+							currentTailIndex = startTailIndex;
+							while (true) {
+								stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+								if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {
+									stopIndex = constructedStopIndex;
+								}
+								while (currentTailIndex != stopIndex) {
+									(*block)[currentTailIndex++]->~T();
+								}
+								if (block == lastBlockEnqueued) {
+									break;
+								}
+								block = block->next;
+							}
+						}
+						MOODYCAMEL_RETHROW;
+					}
+				}
+				
+				if (this->tailBlock == endBlock) {
+					assert(currentTailIndex == newTailIndex);
+					break;
+				}
+				this->tailBlock = this->tailBlock->next;
+			}
+			
+			if (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst))) && firstAllocatedBlock != nullptr) {
+				blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
+			}
+			
+			this->tailIndex.store(newTailIndex, std::memory_order_release);
+			return true;
+		}
+		
+		template<typename It>
+		size_t dequeue_bulk(It& itemFirst, size_t max)
+		{
+			auto tail = this->tailIndex.load(std::memory_order_relaxed);
+			auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
+			auto desiredCount = static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit));
+			if (details::circular_less_than<size_t>(0, desiredCount)) {
+				desiredCount = desiredCount < max ? desiredCount : max;
+				std::atomic_thread_fence(std::memory_order_acquire);
+				
+				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
+				assert(overcommit <= myDequeueCount);
+				
+				tail = this->tailIndex.load(std::memory_order_acquire);
+				auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
+				if (details::circular_less_than<size_t>(0, actualCount)) {
+					actualCount = desiredCount < actualCount ? desiredCount : actualCount;
+					if (actualCount < desiredCount) {
+						this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);
+					}
+					
+					// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
+					// will never exceed tail.
+					auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
+					
+					// Determine which block the first element is in
+					auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
+					auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
+					
+					auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
+					auto firstBlockBaseIndex = firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1);
+					auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) / BLOCK_SIZE);
+					auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1);
+					
+					// Iterate the blocks and dequeue
+					auto index = firstIndex;
+					do {
+						auto firstIndexInBlock = index;
+						auto endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+						endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
+						auto block = localBlockIndex->entries[indexIndex].block;
+						if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {
+							while (index != endIndex) {
+								auto& el = *((*block)[index]);
+								*itemFirst++ = std::move(el);
+								el.~T();
+								++index;
+							}
+						}
+						else {
+							MOODYCAMEL_TRY {
+								while (index != endIndex) {
+									auto& el = *((*block)[index]);
+									*itemFirst = std::move(el);
+									++itemFirst;
+									el.~T();
+									++index;
+								}
+							}
+							MOODYCAMEL_CATCH (...) {
+								// It's too late to revert the dequeue, but we can make sure that all
+								// the dequeued objects are properly destroyed and the block index
+								// (and empty count) are properly updated before we propagate the exception
+								do {
+									block = localBlockIndex->entries[indexIndex].block;
+									while (index != endIndex) {
+										(*block)[index++]->~T();
+									}
+									block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
+									indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
+									
+									firstIndexInBlock = index;
+									endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+									endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
+								} while (index != firstIndex + actualCount);
+								
+								MOODYCAMEL_RETHROW;
+							}
+						}
+						block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
+						indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
+					} while (index != firstIndex + actualCount);
+					
+					return actualCount;
+				}
+				else {
+					// Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent
+					this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
+				}
+			}
+			
+			return 0;
+		}
+		
+	private:
+		struct BlockIndexEntry
+		{
+			index_t base;
+			Block* block;
+		};
+		
+		struct BlockIndexHeader
+		{
+			size_t size;
+			std::atomic<size_t> front;		// Current slot (not next, like pr_blockIndexFront)
+			BlockIndexEntry* entries;
+			void* prev;
+		};
+		
+		
+		bool new_block_index(size_t numberOfFilledSlotsToExpose)
+		{
+			auto prevBlockSizeMask = pr_blockIndexSize - 1;
+			
+			// Create the new block
+			pr_blockIndexSize <<= 1;
+			auto newRawPtr = static_cast<char*>((Traits::malloc)(sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * pr_blockIndexSize));
+			if (newRawPtr == nullptr) {
+				pr_blockIndexSize >>= 1;		// Reset to allow graceful retry
+				return false;
+			}
+			
+			auto newBlockIndexEntries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(newRawPtr + sizeof(BlockIndexHeader)));
+			
+			// Copy in all the old indices, if any
+			size_t j = 0;
+			if (pr_blockIndexSlotsUsed != 0) {
+				auto i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask;
+				do {
+					newBlockIndexEntries[j++] = pr_blockIndexEntries[i];
+					i = (i + 1) & prevBlockSizeMask;
+				} while (i != pr_blockIndexFront);
+			}
+			
+			// Update everything
+			auto header = new (newRawPtr) BlockIndexHeader;
+			header->size = pr_blockIndexSize;
+			header->front.store(numberOfFilledSlotsToExpose - 1, std::memory_order_relaxed);
+			header->entries = newBlockIndexEntries;
+			header->prev = pr_blockIndexRaw;		// we link the new block to the old one so we can free it later
+			
+			pr_blockIndexFront = j;
+			pr_blockIndexEntries = newBlockIndexEntries;
+			pr_blockIndexRaw = newRawPtr;
+			blockIndex.store(header, std::memory_order_release);
+			
+			return true;
+		}
+		
+	private:
+		std::atomic<BlockIndexHeader*> blockIndex;
+		
+		// To be used by producer only -- consumer must use the ones in referenced by blockIndex
+		size_t pr_blockIndexSlotsUsed;
+		size_t pr_blockIndexSize;
+		size_t pr_blockIndexFront;		// Next slot (not current)
+		BlockIndexEntry* pr_blockIndexEntries;
+		void* pr_blockIndexRaw;
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+	public:
+		ExplicitProducer* nextExplicitProducer;
+	private:
+#endif
+		
+#if MCDBGQ_TRACKMEM
+		friend struct MemStats;
+#endif
+	};
+	
+	
+	//////////////////////////////////
+	// Implicit queue
+	//////////////////////////////////
+	
+	struct ImplicitProducer : public ProducerBase
+	{			
+		ImplicitProducer(ConcurrentQueue* parent) :
+			ProducerBase(parent, false),
+			nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE),
+			blockIndex(nullptr)
+		{
+			new_block_index();
+		}
+		
+		~ImplicitProducer()
+		{
+			// Note that since we're in the destructor we can assume that all enqueue/dequeue operations
+			// completed already; this means that all undequeued elements are placed contiguously across
+			// contiguous blocks, and that only the first and last remaining blocks can be only partially
+			// empty (all other remaining blocks must be completely full).
+			
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+			// Unregister ourselves for thread termination notification
+			if (!this->inactive.load(std::memory_order_relaxed)) {
+				details::ThreadExitNotifier::unsubscribe(&threadExitListener);
+			}
+#endif
+			
+			// Destroy all remaining elements!
+			auto tail = this->tailIndex.load(std::memory_order_relaxed);
+			auto index = this->headIndex.load(std::memory_order_relaxed);
+			Block* block = nullptr;
+			assert(index == tail || details::circular_less_than(index, tail));
+			bool forceFreeLastBlock = index != tail;		// If we enter the loop, then the last (tail) block will not be freed
+			while (index != tail) {
+				if ((index & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 || block == nullptr) {
+					if (block != nullptr) {
+						// Free the old block
+						this->parent->add_block_to_free_list(block);
+					}
+					
+					block = get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed);
+				}
+				
+				((*block)[index])->~T();
+				++index;
+			}
+			// Even if the queue is empty, there's still one block that's not on the free list
+			// (unless the head index reached the end of it, in which case the tail will be poised
+			// to create a new block).
+			if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0)) {
+				this->parent->add_block_to_free_list(this->tailBlock);
+			}
+			
+			// Destroy block index
+			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
+			if (localBlockIndex != nullptr) {
+				for (size_t i = 0; i != localBlockIndex->capacity; ++i) {
+					localBlockIndex->index[i]->~BlockIndexEntry();
+				}
+				do {
+					auto prev = localBlockIndex->prev;
+					localBlockIndex->~BlockIndexHeader();
+					(Traits::free)(localBlockIndex);
+					localBlockIndex = prev;
+				} while (localBlockIndex != nullptr);
+			}
+		}
+		
+		template<AllocationMode allocMode, typename U>
+		inline bool enqueue(U&& element)
+		{
+			index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
+			index_t newTailIndex = 1 + currentTailIndex;
+			if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
+				// We reached the end of a block, start a new one
+				auto head = this->headIndex.load(std::memory_order_relaxed);
+				assert(!details::circular_less_than<index_t>(currentTailIndex, head));
+				if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
+					return false;
+				}
+#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+				debug::DebugLock lock(mutex);
+#endif
+				// Find out where we'll be inserting this block in the block index
+				BlockIndexEntry* idxEntry;
+				if (!insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) {
+					return false;
+				}
+				
+				// Get ahold of a new block
+				auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
+				if (newBlock == nullptr) {
+					rewind_block_index_tail();
+					idxEntry->value.store(nullptr, std::memory_order_relaxed);
+					return false;
+				}
+#if MCDBGQ_TRACKMEM
+				newBlock->owner = this;
+#endif
+				newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
+				
+				if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {
+					// May throw, try to insert now before we publish the fact that we have this new block
+					MOODYCAMEL_TRY {
+						new ((*newBlock)[currentTailIndex]) T(std::forward<U>(element));
+					}
+					MOODYCAMEL_CATCH (...) {
+						rewind_block_index_tail();
+						idxEntry->value.store(nullptr, std::memory_order_relaxed);
+						this->parent->add_block_to_free_list(newBlock);
+						MOODYCAMEL_RETHROW;
+					}
+				}
+				
+				// Insert the new block into the index
+				idxEntry->value.store(newBlock, std::memory_order_relaxed);
+				
+				this->tailBlock = newBlock;
+				
+				if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {
+					this->tailIndex.store(newTailIndex, std::memory_order_release);
+					return true;
+				}
+			}
+			
+			// Enqueue
+			new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
+			
+			this->tailIndex.store(newTailIndex, std::memory_order_release);
+			return true;
+		}
+		
+		template<typename U>
+		bool dequeue(U& element)
+		{
+			// See ExplicitProducer::dequeue for rationale and explanation
+			index_t tail = this->tailIndex.load(std::memory_order_relaxed);
+			index_t overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
+			if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {
+				std::atomic_thread_fence(std::memory_order_acquire);
+				
+				index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
+				assert(overcommit <= myDequeueCount);
+				tail = this->tailIndex.load(std::memory_order_acquire);
+				if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {
+					index_t index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);
+					
+					// Determine which block the element is in
+					auto entry = get_block_index_entry_for_index(index);
+					
+					// Dequeue
+					auto block = entry->value.load(std::memory_order_relaxed);
+					auto& el = *((*block)[index]);
+					
+					if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {
+#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+						// Note: Acquiring the mutex with every dequeue instead of only when a block
+						// is released is very sub-optimal, but it is, after all, purely debug code.
+						debug::DebugLock lock(producer->mutex);
+#endif
+						struct Guard {
+							Block* block;
+							index_t index;
+							BlockIndexEntry* entry;
+							ConcurrentQueue* parent;
+							
+							~Guard()
+							{
+								(*block)[index]->~T();
+								if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {
+									entry->value.store(nullptr, std::memory_order_relaxed);
+									parent->add_block_to_free_list(block);
+								}
+							}
+						} guard = { block, index, entry, this->parent };
+						
+						element = std::move(el);
+					}
+					else {
+						element = std::move(el);
+						el.~T();
+					
+						if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {
+							{
+#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+								debug::DebugLock lock(mutex);
+#endif
+								// Add the block back into the global free pool (and remove from block index)
+								entry->value.store(nullptr, std::memory_order_relaxed);
+							}
+							this->parent->add_block_to_free_list(block);		// releases the above store
+						}
+					}
+					
+					return true;
+				}
+				else {
+					this->dequeueOvercommit.fetch_add(1, std::memory_order_release);
+				}
+			}
+		
+			return false;
+		}
+		
+		template<AllocationMode allocMode, typename It>
+		bool enqueue_bulk(It itemFirst, size_t count)
+		{
+			// First, we need to make sure we have enough room to enqueue all of the elements;
+			// this means pre-allocating blocks and putting them in the block index (but only if
+			// all the allocations succeeded).
+			
+			// Note that the tailBlock we start off with may not be owned by us any more;
+			// this happens if it was filled up exactly to the top (setting tailIndex to
+			// the first index of the next block which is not yet allocated), then dequeued
+			// completely (putting it on the free list) before we enqueue again.
+			
+			index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);
+			auto startBlock = this->tailBlock;
+			Block* firstAllocatedBlock = nullptr;
+			auto endBlock = this->tailBlock;
+			
+			// Figure out how many blocks we'll need to allocate, and do so
+			size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
+			index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
+			if (blockBaseDiff > 0) {
+#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+				debug::DebugLock lock(mutex);
+#endif
+				do {
+					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
+					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+					
+					// Find out where we'll be inserting this block in the block index
+					BlockIndexEntry* idxEntry = nullptr;  // initialization here unnecessary but compiler can't always tell
+					Block* newBlock;
+					bool indexInserted = false;
+					auto head = this->headIndex.load(std::memory_order_relaxed);
+					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
+					bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
+					if (full || !(indexInserted = insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) || (newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>()) == nullptr) {
+						// Index allocation or block allocation failed; revert any other allocations
+						// and index insertions done so far for this operation
+						if (indexInserted) {
+							rewind_block_index_tail();
+							idxEntry->value.store(nullptr, std::memory_order_relaxed);
+						}
+						currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
+						for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {
+							currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+							idxEntry = get_block_index_entry_for_index(currentTailIndex);
+							idxEntry->value.store(nullptr, std::memory_order_relaxed);
+							rewind_block_index_tail();
+						}
+						this->parent->add_blocks_to_free_list(firstAllocatedBlock);
+						this->tailBlock = startBlock;
+						
+						return false;
+					}
+					
+#if MCDBGQ_TRACKMEM
+					newBlock->owner = this;
+#endif
+					newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
+					newBlock->next = nullptr;
+					
+					// Insert the new block into the index
+					idxEntry->value.store(newBlock, std::memory_order_relaxed);
+					
+					// Store the chain of blocks so that we can undo if later allocations fail,
+					// and so that we can find the blocks when we do the actual enqueueing
+					if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr) {
+						assert(this->tailBlock != nullptr);
+						this->tailBlock->next = newBlock;
+					}
+					this->tailBlock = newBlock;
+					endBlock = newBlock;
+					firstAllocatedBlock = firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock;
+				} while (blockBaseDiff > 0);
+			}
+			
+			// Enqueue, one block at a time
+			index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
+			currentTailIndex = startTailIndex;
+			this->tailBlock = startBlock;
+			assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0);
+			if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {
+				this->tailBlock = firstAllocatedBlock;
+			}
+			while (true) {
+				auto stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+				if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
+					stopIndex = newTailIndex;
+				}
+				if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) {
+					while (currentTailIndex != stopIndex) {
+						new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
+					}
+				}
+				else {
+					MOODYCAMEL_TRY {
+						while (currentTailIndex != stopIndex) {
+							new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
+							++currentTailIndex;
+							++itemFirst;
+						}
+					}
+					MOODYCAMEL_CATCH (...) {
+						auto constructedStopIndex = currentTailIndex;
+						auto lastBlockEnqueued = this->tailBlock;
+						
+						if (!details::is_trivially_destructible<T>::value) {
+							auto block = startBlock;
+							if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
+								block = firstAllocatedBlock;
+							}
+							currentTailIndex = startTailIndex;
+							while (true) {
+								stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+								if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {
+									stopIndex = constructedStopIndex;
+								}
+								while (currentTailIndex != stopIndex) {
+									(*block)[currentTailIndex++]->~T();
+								}
+								if (block == lastBlockEnqueued) {
+									break;
+								}
+								block = block->next;
+							}
+						}
+						
+						currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
+						for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {
+							currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+							auto idxEntry = get_block_index_entry_for_index(currentTailIndex);
+							idxEntry->value.store(nullptr, std::memory_order_relaxed);
+							rewind_block_index_tail();
+						}
+						this->parent->add_blocks_to_free_list(firstAllocatedBlock);
+						this->tailBlock = startBlock;
+						MOODYCAMEL_RETHROW;
+					}
+				}
+				
+				if (this->tailBlock == endBlock) {
+					assert(currentTailIndex == newTailIndex);
+					break;
+				}
+				this->tailBlock = this->tailBlock->next;
+			}
+			this->tailIndex.store(newTailIndex, std::memory_order_release);
+			return true;
+		}
+		
+		template<typename It>
+		size_t dequeue_bulk(It& itemFirst, size_t max)
+		{
+			auto tail = this->tailIndex.load(std::memory_order_relaxed);
+			auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
+			auto desiredCount = static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit));
+			if (details::circular_less_than<size_t>(0, desiredCount)) {
+				desiredCount = desiredCount < max ? desiredCount : max;
+				std::atomic_thread_fence(std::memory_order_acquire);
+				
+				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
+				assert(overcommit <= myDequeueCount);
+				
+				tail = this->tailIndex.load(std::memory_order_acquire);
+				auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
+				if (details::circular_less_than<size_t>(0, actualCount)) {
+					actualCount = desiredCount < actualCount ? desiredCount : actualCount;
+					if (actualCount < desiredCount) {
+						this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);
+					}
+					
+					// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
+					// will never exceed tail.
+					auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
+					
+					// Iterate the blocks and dequeue
+					auto index = firstIndex;
+					BlockIndexHeader* localBlockIndex;
+					auto indexIndex = get_block_index_index_for_index(index, localBlockIndex);
+					do {
+						auto blockStartIndex = index;
+						auto endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+						endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
+						
+						auto entry = localBlockIndex->index[indexIndex];
+						auto block = entry->value.load(std::memory_order_relaxed);
+						if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {
+							while (index != endIndex) {
+								auto& el = *((*block)[index]);
+								*itemFirst++ = std::move(el);
+								el.~T();
+								++index;
+							}
+						}
+						else {
+							MOODYCAMEL_TRY {
+								while (index != endIndex) {
+									auto& el = *((*block)[index]);
+									*itemFirst = std::move(el);
+									++itemFirst;
+									el.~T();
+									++index;
+								}
+							}
+							MOODYCAMEL_CATCH (...) {
+								do {
+									entry = localBlockIndex->index[indexIndex];
+									block = entry->value.load(std::memory_order_relaxed);
+									while (index != endIndex) {
+										(*block)[index++]->~T();
+									}
+									
+									if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {
+#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+										debug::DebugLock lock(mutex);
+#endif
+										entry->value.store(nullptr, std::memory_order_relaxed);
+										this->parent->add_block_to_free_list(block);
+									}
+									indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
+									
+									blockStartIndex = index;
+									endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+									endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
+								} while (index != firstIndex + actualCount);
+								
+								MOODYCAMEL_RETHROW;
+							}
+						}
+						if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {
+							{
+#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+								debug::DebugLock lock(mutex);
+#endif
+								// Note that the set_many_empty above did a release, meaning that anybody who acquires the block
+								// we're about to free can use it safely since our writes (and reads!) will have happened-before then.
+								entry->value.store(nullptr, std::memory_order_relaxed);
+							}
+							this->parent->add_block_to_free_list(block);		// releases the above store
+						}
+						indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
+					} while (index != firstIndex + actualCount);
+					
+					return actualCount;
+				}
+				else {
+					this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
+				}
+			}
+			
+			return 0;
+		}
+		
+	private:
+		// The block size must be > 1, so any number with the low bit set is an invalid block base index
+		static const index_t INVALID_BLOCK_BASE = 1;
+		
+		struct BlockIndexEntry
+		{
+			std::atomic<index_t> key;
+			std::atomic<Block*> value;
+		};
+		
+		struct BlockIndexHeader
+		{
+			size_t capacity;
+			std::atomic<size_t> tail;
+			BlockIndexEntry* entries;
+			BlockIndexEntry** index;
+			BlockIndexHeader* prev;
+		};
+		
+		template<AllocationMode allocMode>
+		inline bool insert_block_index_entry(BlockIndexEntry*& idxEntry, index_t blockStartIndex)
+		{
+			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);		// We're the only writer thread, relaxed is OK
+			if (localBlockIndex == nullptr) {
+				return false;  // this can happen if new_block_index failed in the constructor
+			}
+			auto newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);
+			idxEntry = localBlockIndex->index[newTail];
+			if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE ||
+				idxEntry->value.load(std::memory_order_relaxed) == nullptr) {
+				
+				idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
+				localBlockIndex->tail.store(newTail, std::memory_order_release);
+				return true;
+			}
+			
+			// No room in the old block index, try to allocate another one!
+			if (allocMode == CannotAlloc || !new_block_index()) {
+				return false;
+			}
+			localBlockIndex = blockIndex.load(std::memory_order_relaxed);
+			newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);
+			idxEntry = localBlockIndex->index[newTail];
+			assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE);
+			idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
+			localBlockIndex->tail.store(newTail, std::memory_order_release);
+			return true;
+		}
+		
+		inline void rewind_block_index_tail()
+		{
+			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
+			localBlockIndex->tail.store((localBlockIndex->tail.load(std::memory_order_relaxed) - 1) & (localBlockIndex->capacity - 1), std::memory_order_relaxed);
+		}
+		
+		inline BlockIndexEntry* get_block_index_entry_for_index(index_t index) const
+		{
+			BlockIndexHeader* localBlockIndex;
+			auto idx = get_block_index_index_for_index(index, localBlockIndex);
+			return localBlockIndex->index[idx];
+		}
+		
+		inline size_t get_block_index_index_for_index(index_t index, BlockIndexHeader*& localBlockIndex) const
+		{
+#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+			debug::DebugLock lock(mutex);
+#endif
+			index &= ~static_cast<index_t>(BLOCK_SIZE - 1);
+			localBlockIndex = blockIndex.load(std::memory_order_acquire);
+			auto tail = localBlockIndex->tail.load(std::memory_order_acquire);
+			auto tailBase = localBlockIndex->index[tail]->key.load(std::memory_order_relaxed);
+			assert(tailBase != INVALID_BLOCK_BASE);
+			// Note: Must use division instead of shift because the index may wrap around, causing a negative
+			// offset, whose negativity we want to preserve
+			auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(index - tailBase) / BLOCK_SIZE);
+			size_t idx = (tail + offset) & (localBlockIndex->capacity - 1);
+			assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == index && localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != nullptr);
+			return idx;
+		}
+		
+		bool new_block_index()
+		{
+			auto prev = blockIndex.load(std::memory_order_relaxed);
+			size_t prevCapacity = prev == nullptr ? 0 : prev->capacity;
+			auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity;
+			auto raw = static_cast<char*>((Traits::malloc)(
+				sizeof(BlockIndexHeader) +
+				std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * entryCount +
+				std::alignment_of<BlockIndexEntry*>::value - 1 + sizeof(BlockIndexEntry*) * nextBlockIndexCapacity));
+			if (raw == nullptr) {
+				return false;
+			}
+			
+			auto header = new (raw) BlockIndexHeader;
+			auto entries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(raw + sizeof(BlockIndexHeader)));
+			auto index = reinterpret_cast<BlockIndexEntry**>(details::align_for<BlockIndexEntry*>(reinterpret_cast<char*>(entries) + sizeof(BlockIndexEntry) * entryCount));
+			if (prev != nullptr) {
+				auto prevTail = prev->tail.load(std::memory_order_relaxed);
+				auto prevPos = prevTail;
+				size_t i = 0;
+				do {
+					prevPos = (prevPos + 1) & (prev->capacity - 1);
+					index[i++] = prev->index[prevPos];
+				} while (prevPos != prevTail);
+				assert(i == prevCapacity);
+			}
+			for (size_t i = 0; i != entryCount; ++i) {
+				new (entries + i) BlockIndexEntry;
+				entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed);
+				index[prevCapacity + i] = entries + i;
+			}
+			header->prev = prev;
+			header->entries = entries;
+			header->index = index;
+			header->capacity = nextBlockIndexCapacity;
+			header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), std::memory_order_relaxed);
+			
+			blockIndex.store(header, std::memory_order_release);
+			
+			nextBlockIndexCapacity <<= 1;
+			
+			return true;
+		}
+		
+	private:
+		size_t nextBlockIndexCapacity;
+		std::atomic<BlockIndexHeader*> blockIndex;
+
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+	public:
+		details::ThreadExitListener threadExitListener;
+	private:
+#endif
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+	public:
+		ImplicitProducer* nextImplicitProducer;
+	private:
+#endif
+
+#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+		mutable debug::DebugMutex mutex;
+#endif
+#if MCDBGQ_TRACKMEM
+		friend struct MemStats;
+#endif
+	};
+	
+	
+	//////////////////////////////////
+	// Block pool manipulation
+	//////////////////////////////////
+	
+	void populate_initial_block_list(size_t blockCount)
+	{
+		initialBlockPoolSize = blockCount;
+		if (initialBlockPoolSize == 0) {
+			initialBlockPool = nullptr;
+			return;
+		}
+		
+		initialBlockPool = create_array<Block>(blockCount);
+		if (initialBlockPool == nullptr) {
+			initialBlockPoolSize = 0;
+		}
+		for (size_t i = 0; i < initialBlockPoolSize; ++i) {
+			initialBlockPool[i].dynamicallyAllocated = false;
+		}
+	}
+	
+	inline Block* try_get_block_from_initial_pool()
+	{
+		if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= initialBlockPoolSize) {
+			return nullptr;
+		}
+		
+		auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed);
+		
+		return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr;
+	}
+	
+	inline void add_block_to_free_list(Block* block)
+	{
+#if MCDBGQ_TRACKMEM
+		block->owner = nullptr;
+#endif
+		freeList.add(block);
+	}
+	
+	inline void add_blocks_to_free_list(Block* block)
+	{
+		while (block != nullptr) {
+			auto next = block->next;
+			add_block_to_free_list(block);
+			block = next;
+		}
+	}
+	
+	inline Block* try_get_block_from_free_list()
+	{
+		return freeList.try_get();
+	}
+	
+	// Gets a free block from one of the memory pools, or allocates a new one (if applicable)
+	template<AllocationMode canAlloc>
+	Block* requisition_block()
+	{
+		auto block = try_get_block_from_initial_pool();
+		if (block != nullptr) {
+			return block;
+		}
+		
+		block = try_get_block_from_free_list();
+		if (block != nullptr) {
+			return block;
+		}
+		
+		if (canAlloc == CanAlloc) {
+			return create<Block>();
+		}
+		
+		return nullptr;
+	}
+	
+
+#if MCDBGQ_TRACKMEM
+	public:
+		struct MemStats {
+			size_t allocatedBlocks;
+			size_t usedBlocks;
+			size_t freeBlocks;
+			size_t ownedBlocksExplicit;
+			size_t ownedBlocksImplicit;
+			size_t implicitProducers;
+			size_t explicitProducers;
+			size_t elementsEnqueued;
+			size_t blockClassBytes;
+			size_t queueClassBytes;
+			size_t implicitBlockIndexBytes;
+			size_t explicitBlockIndexBytes;
+			
+			friend class ConcurrentQueue;
+			
+		private:
+			static MemStats getFor(ConcurrentQueue* q)
+			{
+				MemStats stats = { 0 };
+				
+				stats.elementsEnqueued = q->size_approx();
+			
+				auto block = q->freeList.head_unsafe();
+				while (block != nullptr) {
+					++stats.allocatedBlocks;
+					++stats.freeBlocks;
+					block = block->freeListNext.load(std::memory_order_relaxed);
+				}
+				
+				for (auto ptr = q->producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+					bool implicit = dynamic_cast<ImplicitProducer*>(ptr) != nullptr;
+					stats.implicitProducers += implicit ? 1 : 0;
+					stats.explicitProducers += implicit ? 0 : 1;
+					
+					if (implicit) {
+						auto prod = static_cast<ImplicitProducer*>(ptr);
+						stats.queueClassBytes += sizeof(ImplicitProducer);
+						auto head = prod->headIndex.load(std::memory_order_relaxed);
+						auto tail = prod->tailIndex.load(std::memory_order_relaxed);
+						auto hash = prod->blockIndex.load(std::memory_order_relaxed);
+						if (hash != nullptr) {
+							for (size_t i = 0; i != hash->capacity; ++i) {
+								if (hash->index[i]->key.load(std::memory_order_relaxed) != ImplicitProducer::INVALID_BLOCK_BASE && hash->index[i]->value.load(std::memory_order_relaxed) != nullptr) {
+									++stats.allocatedBlocks;
+									++stats.ownedBlocksImplicit;
+								}
+							}
+							stats.implicitBlockIndexBytes += hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry);
+							for (; hash != nullptr; hash = hash->prev) {
+								stats.implicitBlockIndexBytes += sizeof(typename ImplicitProducer::BlockIndexHeader) + hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry*);
+							}
+						}
+						for (; details::circular_less_than<index_t>(head, tail); head += BLOCK_SIZE) {
+							//auto block = prod->get_block_index_entry_for_index(head);
+							++stats.usedBlocks;
+						}
+					}
+					else {
+						auto prod = static_cast<ExplicitProducer*>(ptr);
+						stats.queueClassBytes += sizeof(ExplicitProducer);
+						auto tailBlock = prod->tailBlock;
+						bool wasNonEmpty = false;
+						if (tailBlock != nullptr) {
+							auto block = tailBlock;
+							do {
+								++stats.allocatedBlocks;
+								if (!block->ConcurrentQueue::Block::template is_empty<explicit_context>() || wasNonEmpty) {
+									++stats.usedBlocks;
+									wasNonEmpty = wasNonEmpty || block != tailBlock;
+								}
+								++stats.ownedBlocksExplicit;
+								block = block->next;
+							} while (block != tailBlock);
+						}
+						auto index = prod->blockIndex.load(std::memory_order_relaxed);
+						while (index != nullptr) {
+							stats.explicitBlockIndexBytes += sizeof(typename ExplicitProducer::BlockIndexHeader) + index->size * sizeof(typename ExplicitProducer::BlockIndexEntry);
+							index = static_cast<typename ExplicitProducer::BlockIndexHeader*>(index->prev);
+						}
+					}
+				}
+				
+				auto freeOnInitialPool = q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= q->initialBlockPoolSize ? 0 : q->initialBlockPoolSize - q->initialBlockPoolIndex.load(std::memory_order_relaxed);
+				stats.allocatedBlocks += freeOnInitialPool;
+				stats.freeBlocks += freeOnInitialPool;
+				
+				stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks;
+				stats.queueClassBytes += sizeof(ConcurrentQueue);
+				
+				return stats;
+			}
+		};
+		
+		// For debugging only. Not thread-safe.
+		MemStats getMemStats()
+		{
+			return MemStats::getFor(this);
+		}
+	private:
+		friend struct MemStats;
+#endif
+	
+	
+	//////////////////////////////////
+	// Producer list manipulation
+	//////////////////////////////////	
+	
+	ProducerBase* recycle_or_create_producer(bool isExplicit)
+	{
+		bool recycled;
+		return recycle_or_create_producer(isExplicit, recycled);
+	}
+	
+	ProducerBase* recycle_or_create_producer(bool isExplicit, bool& recycled)
+	{
+#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+		debug::DebugLock lock(implicitProdMutex);
+#endif
+		// Try to re-use one first
+		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+			if (ptr->inactive.load(std::memory_order_relaxed) && ptr->isExplicit == isExplicit) {
+				bool expected = true;
+				if (ptr->inactive.compare_exchange_strong(expected, /* desired */ false, std::memory_order_acquire, std::memory_order_relaxed)) {
+					// We caught one! It's been marked as activated, the caller can have it
+					recycled = true;
+					return ptr;
+				}
+			}
+		}
+		
+		recycled = false;
+		return add_producer(isExplicit ? static_cast<ProducerBase*>(create<ExplicitProducer>(this)) : create<ImplicitProducer>(this));
+	}
+	
+	ProducerBase* add_producer(ProducerBase* producer)
+	{
+		// Handle failed memory allocation
+		if (producer == nullptr) {
+			return nullptr;
+		}
+		
+		producerCount.fetch_add(1, std::memory_order_relaxed);
+		
+		// Add it to the lock-free list
+		auto prevTail = producerListTail.load(std::memory_order_relaxed);
+		do {
+			producer->next = prevTail;
+		} while (!producerListTail.compare_exchange_weak(prevTail, producer, std::memory_order_release, std::memory_order_relaxed));
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		if (producer->isExplicit) {
+			auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed);
+			do {
+				static_cast<ExplicitProducer*>(producer)->nextExplicitProducer = prevTailExplicit;
+			} while (!explicitProducers.compare_exchange_weak(prevTailExplicit, static_cast<ExplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));
+		}
+		else {
+			auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed);
+			do {
+				static_cast<ImplicitProducer*>(producer)->nextImplicitProducer = prevTailImplicit;
+			} while (!implicitProducers.compare_exchange_weak(prevTailImplicit, static_cast<ImplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));
+		}
+#endif
+		
+		return producer;
+	}
+	
+	void reown_producers()
+	{
+		// After another instance is moved-into/swapped-with this one, all the
+		// producers we stole still think their parents are the other queue.
+		// So fix them up!
+		for (auto ptr = producerListTail.load(std::memory_order_relaxed); ptr != nullptr; ptr = ptr->next_prod()) {
+			ptr->parent = this;
+		}
+	}
+	
+	
+	//////////////////////////////////
+	// Implicit producer hash
+	//////////////////////////////////
+	
+	struct ImplicitProducerKVP
+	{
+		std::atomic<details::thread_id_t> key;
+		ImplicitProducer* value;		// No need for atomicity since it's only read by the thread that sets it in the first place
+		
+		ImplicitProducerKVP() : value(nullptr) { }
+		
+		ImplicitProducerKVP(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT
+		{
+			key.store(other.key.load(std::memory_order_relaxed), std::memory_order_relaxed);
+			value = other.value;
+		}
+		
+		inline ImplicitProducerKVP& operator=(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT
+		{
+			swap(other);
+			return *this;
+		}
+		
+		inline void swap(ImplicitProducerKVP& other) MOODYCAMEL_NOEXCEPT
+		{
+			if (this != &other) {
+				details::swap_relaxed(key, other.key);
+				std::swap(value, other.value);
+			}
+		}
+	};
+	
+	template<typename XT, typename XTraits>
+	friend void moodycamel::swap(typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&, typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&) MOODYCAMEL_NOEXCEPT;
+	
+	struct ImplicitProducerHash
+	{
+		size_t capacity;
+		ImplicitProducerKVP* entries;
+		ImplicitProducerHash* prev;
+	};
+	
+	inline void populate_initial_implicit_producer_hash()
+	{
+		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return;
+		
+		implicitProducerHashCount.store(0, std::memory_order_relaxed);
+		auto hash = &initialImplicitProducerHash;
+		hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
+		hash->entries = &initialImplicitProducerHashEntries[0];
+		for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) {
+			initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
+		}
+		hash->prev = nullptr;
+		implicitProducerHash.store(hash, std::memory_order_relaxed);
+	}
+	
+	void swap_implicit_producer_hashes(ConcurrentQueue& other)
+	{
+		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return;
+		
+		// Swap (assumes our implicit producer hash is initialized)
+		initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries);
+		initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0];
+		other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0];
+		
+		details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount);
+		
+		details::swap_relaxed(implicitProducerHash, other.implicitProducerHash);
+		if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) {
+			implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed);
+		}
+		else {
+			ImplicitProducerHash* hash;
+			for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) {
+				continue;
+			}
+			hash->prev = &initialImplicitProducerHash;
+		}
+		if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) {
+			other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed);
+		}
+		else {
+			ImplicitProducerHash* hash;
+			for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; hash = hash->prev) {
+				continue;
+			}
+			hash->prev = &other.initialImplicitProducerHash;
+		}
+	}
+	
+	// Only fails (returns nullptr) if memory allocation fails
+	ImplicitProducer* get_or_add_implicit_producer()
+	{
+		// Note that since the data is essentially thread-local (key is thread ID),
+		// there's a reduced need for fences (memory ordering is already consistent
+		// for any individual thread), except for the current table itself.
+		
+		// Start by looking for the thread ID in the current and all previous hash tables.
+		// If it's not found, it must not be in there yet, since this same thread would
+		// have added it previously to one of the tables that we traversed.
+		
+		// Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table
+		
+#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+		debug::DebugLock lock(implicitProdMutex);
+#endif
+		
+		auto id = details::thread_id();
+		auto hashedId = details::hash_thread_id(id);
+		
+		auto mainHash = implicitProducerHash.load(std::memory_order_acquire);
+		for (auto hash = mainHash; hash != nullptr; hash = hash->prev) {
+			// Look for the id in this hash
+			auto index = hashedId;
+			while (true) {		// Not an infinite loop because at least one slot is free in the hash table
+				index &= hash->capacity - 1;
+				
+				auto probedKey = hash->entries[index].key.load(std::memory_order_relaxed);
+				if (probedKey == id) {
+					// Found it! If we had to search several hashes deep, though, we should lazily add it
+					// to the current main hash table to avoid the extended search next time.
+					// Note there's guaranteed to be room in the current hash table since every subsequent
+					// table implicitly reserves space for all previous tables (there's only one
+					// implicitProducerHashCount).
+					auto value = hash->entries[index].value;
+					if (hash != mainHash) {
+						index = hashedId;
+						while (true) {
+							index &= mainHash->capacity - 1;
+							probedKey = mainHash->entries[index].key.load(std::memory_order_relaxed);
+							auto empty = details::invalid_thread_id;
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+							auto reusable = details::invalid_thread_id2;
+							if ((probedKey == empty    && mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_relaxed, std::memory_order_relaxed)) ||
+								(probedKey == reusable && mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_acquire, std::memory_order_acquire))) {
+#else
+							if ((probedKey == empty    && mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_relaxed, std::memory_order_relaxed))) {
+#endif
+								mainHash->entries[index].value = value;
+								break;
+							}
+							++index;
+						}
+					}
+					
+					return value;
+				}
+				if (probedKey == details::invalid_thread_id) {
+					break;		// Not in this hash table
+				}
+				++index;
+			}
+		}
+		
+		// Insert!
+		auto newCount = 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed);
+		while (true) {
+			if (newCount >= (mainHash->capacity >> 1) && !implicitProducerHashResizeInProgress.test_and_set(std::memory_order_acquire)) {
+				// We've acquired the resize lock, try to allocate a bigger hash table.
+				// Note the acquire fence synchronizes with the release fence at the end of this block, and hence when
+				// we reload implicitProducerHash it must be the most recent version (it only gets changed within this
+				// locked block).
+				mainHash = implicitProducerHash.load(std::memory_order_acquire);
+				if (newCount >= (mainHash->capacity >> 1)) {
+					auto newCapacity = mainHash->capacity << 1;
+					while (newCount >= (newCapacity >> 1)) {
+						newCapacity <<= 1;
+					}
+					auto raw = static_cast<char*>((Traits::malloc)(sizeof(ImplicitProducerHash) + std::alignment_of<ImplicitProducerKVP>::value - 1 + sizeof(ImplicitProducerKVP) * newCapacity));
+					if (raw == nullptr) {
+						// Allocation failed
+						implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
+						implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+						return nullptr;
+					}
+					
+					auto newHash = new (raw) ImplicitProducerHash;
+					newHash->capacity = newCapacity;
+					newHash->entries = reinterpret_cast<ImplicitProducerKVP*>(details::align_for<ImplicitProducerKVP>(raw + sizeof(ImplicitProducerHash)));
+					for (size_t i = 0; i != newCapacity; ++i) {
+						new (newHash->entries + i) ImplicitProducerKVP;
+						newHash->entries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
+					}
+					newHash->prev = mainHash;
+					implicitProducerHash.store(newHash, std::memory_order_release);
+					implicitProducerHashResizeInProgress.clear(std::memory_order_release);
+					mainHash = newHash;
+				}
+				else {
+					implicitProducerHashResizeInProgress.clear(std::memory_order_release);
+				}
+			}
+			
+			// If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table
+			// to finish being allocated by another thread (and if we just finished allocating above, the condition will
+			// always be true)
+			if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) {
+				bool recycled;
+				auto producer = static_cast<ImplicitProducer*>(recycle_or_create_producer(false, recycled));
+				if (producer == nullptr) {
+					implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
+					return nullptr;
+				}
+				if (recycled) {
+					implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
+				}
+				
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+				producer->threadExitListener.callback = &ConcurrentQueue::implicit_producer_thread_exited_callback;
+				producer->threadExitListener.userData = producer;
+				details::ThreadExitNotifier::subscribe(&producer->threadExitListener);
+#endif
+				
+				auto index = hashedId;
+				while (true) {
+					index &= mainHash->capacity - 1;
+					auto probedKey = mainHash->entries[index].key.load(std::memory_order_relaxed);
+					
+					auto empty = details::invalid_thread_id;
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+					auto reusable = details::invalid_thread_id2;
+					if ((probedKey == empty    && mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_relaxed, std::memory_order_relaxed)) ||
+						(probedKey == reusable && mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_acquire, std::memory_order_acquire))) {
+#else
+					if ((probedKey == empty    && mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_relaxed, std::memory_order_relaxed))) {
+#endif
+						mainHash->entries[index].value = producer;
+						break;
+					}
+					++index;
+				}
+				return producer;
+			}
+			
+			// Hmm, the old hash is quite full and somebody else is busy allocating a new one.
+			// We need to wait for the allocating thread to finish (if it succeeds, we add, if not,
+			// we try to allocate ourselves).
+			mainHash = implicitProducerHash.load(std::memory_order_acquire);
+		}
+	}
+	
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+	void implicit_producer_thread_exited(ImplicitProducer* producer)
+	{
+		// Remove from thread exit listeners
+		details::ThreadExitNotifier::unsubscribe(&producer->threadExitListener);
+		
+		// Remove from hash
+#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+		debug::DebugLock lock(implicitProdMutex);
+#endif
+		auto hash = implicitProducerHash.load(std::memory_order_acquire);
+		assert(hash != nullptr);		// The thread exit listener is only registered if we were added to a hash in the first place
+		auto id = details::thread_id();
+		auto hashedId = details::hash_thread_id(id);
+		details::thread_id_t probedKey;
+		
+		// We need to traverse all the hashes just in case other threads aren't on the current one yet and are
+		// trying to add an entry thinking there's a free slot (because they reused a producer)
+		for (; hash != nullptr; hash = hash->prev) {
+			auto index = hashedId;
+			do {
+				index &= hash->capacity - 1;
+				probedKey = hash->entries[index].key.load(std::memory_order_relaxed);
+				if (probedKey == id) {
+					hash->entries[index].key.store(details::invalid_thread_id2, std::memory_order_release);
+					break;
+				}
+				++index;
+			} while (probedKey != details::invalid_thread_id);		// Can happen if the hash has changed but we weren't put back in it yet, or if we weren't added to this hash in the first place
+		}
+		
+		// Mark the queue as being recyclable
+		producer->inactive.store(true, std::memory_order_release);
+	}
+	
+	static void implicit_producer_thread_exited_callback(void* userData)
+	{
+		auto producer = static_cast<ImplicitProducer*>(userData);
+		auto queue = producer->parent;
+		queue->implicit_producer_thread_exited(producer);
+	}
+#endif
+	
+	//////////////////////////////////
+	// Utility functions
+	//////////////////////////////////
+	
+	template<typename U>
+	static inline U* create_array(size_t count)
+	{
+		assert(count > 0);
+		auto p = static_cast<U*>((Traits::malloc)(sizeof(U) * count));
+		if (p == nullptr) {
+			return nullptr;
+		}
+		
+		for (size_t i = 0; i != count; ++i) {
+			new (p + i) U();
+		}
+		return p;
+	}
+	
+	template<typename U>
+	static inline void destroy_array(U* p, size_t count)
+	{
+		if (p != nullptr) {
+			assert(count > 0);
+			for (size_t i = count; i != 0; ) {
+				(p + --i)->~U();
+			}
+			(Traits::free)(p);
+		}
+	}
+	
+	template<typename U>
+	static inline U* create()
+	{
+		auto p = (Traits::malloc)(sizeof(U));
+		return p != nullptr ? new (p) U : nullptr;
+	}
+	
+	template<typename U, typename A1>
+	static inline U* create(A1&& a1)
+	{
+		auto p = (Traits::malloc)(sizeof(U));
+		return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;
+	}
+	
+	template<typename U>
+	static inline void destroy(U* p)
+	{
+		if (p != nullptr) {
+			p->~U();
+		}
+		(Traits::free)(p);
+	}
+
+private:
+	std::atomic<ProducerBase*> producerListTail;
+	std::atomic<std::uint32_t> producerCount;
+	
+	std::atomic<size_t> initialBlockPoolIndex;
+	Block* initialBlockPool;
+	size_t initialBlockPoolSize;
+	
+#if !MCDBGQ_USEDEBUGFREELIST
+	FreeList<Block> freeList;
+#else
+	debug::DebugFreeList<Block> freeList;
+#endif
+	
+	std::atomic<ImplicitProducerHash*> implicitProducerHash;
+	std::atomic<size_t> implicitProducerHashCount;		// Number of slots logically used
+	ImplicitProducerHash initialImplicitProducerHash;
+	std::array<ImplicitProducerKVP, INITIAL_IMPLICIT_PRODUCER_HASH_SIZE> initialImplicitProducerHashEntries;
+	std::atomic_flag implicitProducerHashResizeInProgress;
+	
+	std::atomic<std::uint32_t> nextExplicitConsumerId;
+	std::atomic<std::uint32_t> globalExplicitConsumerOffset;
+	
+#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+	debug::DebugMutex implicitProdMutex;
+#endif
+	
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+	std::atomic<ExplicitProducer*> explicitProducers;
+	std::atomic<ImplicitProducer*> implicitProducers;
+#endif
+};
+
+
+template<typename T, typename Traits>
+ProducerToken::ProducerToken(ConcurrentQueue<T, Traits>& queue)
+	: producer(queue.recycle_or_create_producer(true))
+{
+	if (producer != nullptr) {
+		producer->token = this;
+	}
+}
+
+template<typename T, typename Traits>
+ProducerToken::ProducerToken(BlockingConcurrentQueue<T, Traits>& queue)
+	: producer(reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->recycle_or_create_producer(true))
+{
+	if (producer != nullptr) {
+		producer->token = this;
+	}
+}
+
+template<typename T, typename Traits>
+ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits>& queue)
+	: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
+{
+	initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
+	lastKnownGlobalOffset = -1;
+}
+
+template<typename T, typename Traits>
+ConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, Traits>& queue)
+	: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
+{
+	initialOffset = reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
+	lastKnownGlobalOffset = -1;
+}
+
+template<typename T, typename Traits>
+inline void swap(ConcurrentQueue<T, Traits>& a, ConcurrentQueue<T, Traits>& b) MOODYCAMEL_NOEXCEPT
+{
+	a.swap(b);
+}
+
+inline void swap(ProducerToken& a, ProducerToken& b) MOODYCAMEL_NOEXCEPT
+{
+	a.swap(b);
+}
+
+inline void swap(ConsumerToken& a, ConsumerToken& b) MOODYCAMEL_NOEXCEPT
+{
+	a.swap(b);
+}
+
+template<typename T, typename Traits>
+inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT
+{
+	a.swap(b);
+}
+
+}
+
+#if defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
diff --git a/c_src/enlfq/enlfq.cc b/c_src/enlfq/enlfq.cc
new file mode 100644
index 0000000..4ccc7a9
--- /dev/null
+++ b/c_src/enlfq/enlfq.cc
@@ -0,0 +1,84 @@
+#include "enlfq.h"
+#include "enlfq_nif.h"
+
+#include "nif_utils.h"
+
+#include "concurrentqueue.h"
+
+
+struct q_item {
+    ErlNifEnv *env;
+    ERL_NIF_TERM term;
+};
+
+struct squeue {
+    moodycamel::ConcurrentQueue<q_item> *queue;
+};
+
+
+void nif_enlfq_free(ErlNifEnv *, void *obj) {
+    squeue *inst = static_cast<squeue *>(obj);
+
+    if (inst != nullptr) {
+        q_item item;
+        while (inst->queue->try_dequeue(item)) {
+            enif_free_env(item.env);
+        }
+        delete inst->queue;
+    }
+}
+
+ERL_NIF_TERM nif_enlfq_new(ErlNifEnv *env, int, const ERL_NIF_TERM *) {
+    shared_data *data = static_cast<shared_data *>(enif_priv_data(env));
+
+
+    squeue *qinst = static_cast<squeue *>(enif_alloc_resource(data->resQueueInstance, sizeof(squeue)));
+    qinst->queue = new moodycamel::ConcurrentQueue<q_item>;
+
+    if (qinst == NULL)
+        return make_error(env, "enif_alloc_resource failed");
+
+    ERL_NIF_TERM term = enif_make_resource(env, qinst);
+    enif_release_resource(qinst);
+    return enif_make_tuple2(env, ATOMS.atomOk, term);
+}
+
+ERL_NIF_TERM nif_enlfq_push(ErlNifEnv *env, int, const ERL_NIF_TERM argv[]) {
+    shared_data *data = static_cast<shared_data *>(enif_priv_data(env));
+
+    squeue *inst;
+
+    if (!enif_get_resource(env, argv[0], data->resQueueInstance, (void **) &inst)) {
+        return enif_make_badarg(env);
+    }
+
+    q_item item;
+
+    item.env = enif_alloc_env();
+    item.term = enif_make_copy(item.env, argv[1]);
+
+    inst->queue->enqueue(item);
+
+    return ATOMS.atomTrue;
+}
+
+ERL_NIF_TERM nif_enlfq_pop(ErlNifEnv *env, int, const ERL_NIF_TERM argv[]) {
+    shared_data *data = static_cast<shared_data *>(enif_priv_data(env));
+    squeue *inst = NULL;
+
+    if (!enif_get_resource(env, argv[0], data->resQueueInstance, (void **) &inst)) {
+        return enif_make_badarg(env);
+    }
+
+    ERL_NIF_TERM term;
+    q_item item;
+
+    if (inst->queue->try_dequeue(item)) {
+        term = enif_make_copy(env, item.term);
+        enif_free_env(item.env);
+        return enif_make_tuple2(env, ATOMS.atomOk, term);
+    } else {
+        return ATOMS.atomEmpty;
+    }
+
+}
diff --git a/c_src/enlfq/enlfq.h b/c_src/enlfq/enlfq.h
new file mode 100644
index 0000000..08b2ca4
--- /dev/null
+++ b/c_src/enlfq/enlfq.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include "erl_nif.h"
+
+extern "C" {
+void nif_enlfq_free(ErlNifEnv *env, void *obj);
+ERL_NIF_TERM nif_enlfq_new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+ERL_NIF_TERM nif_enlfq_push(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+ERL_NIF_TERM nif_enlfq_pop(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+}
\ No newline at end of file
diff --git a/c_src/enlfq/enlfq_nif.cc b/c_src/enlfq/enlfq_nif.cc
new file mode 100644
index 0000000..cf5c4f2
--- /dev/null
+++ b/c_src/enlfq/enlfq_nif.cc
@@ -0,0 +1,57 @@
+#include <string.h>
+#include "enlfq_nif.h"
+#include "enlfq.h"
+#include "nif_utils.h"
+
+const char kAtomOk[] = "ok";
+const char kAtomError[] = "error";
+const char kAtomTrue[] = "true";
+//const char kAtomFalse[] = "false";
+//const char kAtomUndefined[] = "undefined";
+const char kAtomEmpty[] = "empty";
+
+atoms ATOMS;
+
+void open_resources(ErlNifEnv *env, shared_data *data) {
+    ErlNifResourceFlags flags = static_cast<ErlNifResourceFlags>(ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER);
+    data->resQueueInstance = enif_open_resource_type(env, NULL, "enlfq_instance", nif_enlfq_free, flags, NULL);
+}
+
+int on_nif_load(ErlNifEnv *env, void **priv_data, ERL_NIF_TERM) {
+
+    ATOMS.atomOk = make_atom(env, kAtomOk);
+    ATOMS.atomError = make_atom(env, kAtomError);
+    ATOMS.atomTrue = make_atom(env, kAtomTrue);
+//    ATOMS.atomFalse = make_atom(env, kAtomFalse);
+//    ATOMS.atomUndefined = make_atom(env, kAtomUndefined);
+    ATOMS.atomEmpty = make_atom(env, kAtomEmpty);
+
+    shared_data *data = static_cast<shared_data *>(enif_alloc(sizeof(shared_data)));
+    open_resources(env, data);
+
+    *priv_data = data;
+    return 0;
+}
+
+void on_nif_unload(ErlNifEnv *, void *priv_data) {
+    shared_data *data = static_cast<shared_data *>(priv_data);
+    enif_free(data);
+}
+
+int on_nif_upgrade(ErlNifEnv *env, void **priv, void **, ERL_NIF_TERM) {
+    shared_data *data = static_cast<shared_data *>(enif_alloc(sizeof(shared_data)));
+    open_resources(env, data);
+
+    *priv = data;
+    return 0;
+}
+
+static ErlNifFunc nif_funcs[] =
+        {
+                {"new",  0, nif_enlfq_new},
+                {"push", 2, nif_enlfq_push},
+                {"pop",  1, nif_enlfq_pop}
+        };
+
+ERL_NIF_INIT(enlfq, nif_funcs, on_nif_load, NULL, on_nif_upgrade, on_nif_unload)
+
diff --git a/c_src/enlfq/enlfq_nif.h b/c_src/enlfq/enlfq_nif.h
new file mode 100644
index 0000000..88f7da5
--- /dev/null
+++ b/c_src/enlfq/enlfq_nif.h
@@ -0,0 +1,19 @@
+#pragma once
+#include "erl_nif.h"
+
+struct atoms
+{
+    ERL_NIF_TERM atomOk;
+    ERL_NIF_TERM atomError;
+    ERL_NIF_TERM atomTrue;
+//    ERL_NIF_TERM atomFalse;
+//    ERL_NIF_TERM atomUndefined;
+    ERL_NIF_TERM atomEmpty;
+};
+
+struct shared_data
+{
+    ErlNifResourceType* resQueueInstance;
+};
+
+extern atoms ATOMS;
diff --git a/c_src/enlfq/nif_utils.cc b/c_src/enlfq/nif_utils.cc
new file mode 100644
index 0000000..a32e17d
--- /dev/null
+++ b/c_src/enlfq/nif_utils.cc
@@ -0,0 +1,27 @@
+#include "nif_utils.h"
+#include "enlfq_nif.h"
+
+#include <string.h>
+
+ERL_NIF_TERM make_atom(ErlNifEnv* env, const char* name)
+{
+    ERL_NIF_TERM ret;
+    
+    if(enif_make_existing_atom(env, name, &ret, ERL_NIF_LATIN1))
+        return ret;
+
+    return enif_make_atom(env, name);
+}
+
+ERL_NIF_TERM make_binary(ErlNifEnv* env, const char* buff, size_t length)
+{
+    ERL_NIF_TERM term;
+    unsigned char *destination_buffer = enif_make_new_binary(env, length, &term);
+    memcpy(destination_buffer, buff, length);
+    return term;
+}
+
+ERL_NIF_TERM make_error(ErlNifEnv* env, const char* error)
+{
+    return enif_make_tuple2(env, ATOMS.atomError, make_binary(env, error, strlen(error)));
+}
diff --git a/c_src/enlfq/nif_utils.h b/c_src/enlfq/nif_utils.h
new file mode 100644
index 0000000..3b0a929
--- /dev/null
+++ b/c_src/enlfq/nif_utils.h
@@ -0,0 +1,6 @@
+#pragma once
+#include "erl_nif.h"
+
+ERL_NIF_TERM make_atom(ErlNifEnv* env, const char* name);
+ERL_NIF_TERM make_error(ErlNifEnv* env, const char* error);
+ERL_NIF_TERM make_binary(ErlNifEnv* env, const char* buff, size_t length);
\ No newline at end of file
diff --git a/c_src/enlfq/rebar.config b/c_src/enlfq/rebar.config
new file mode 100644
index 0000000..da73819
--- /dev/null
+++ b/c_src/enlfq/rebar.config
@@ -0,0 +1,7 @@
+{port_specs, [
+    {"../../priv/enlfq.so", ["*.cc"]}
+]}.
+
+
+
+
diff --git a/c_src/etsq/etsq.cpp b/c_src/etsq/etsq.cpp
new file mode 100644
index 0000000..15e3c67
--- /dev/null
+++ b/c_src/etsq/etsq.cpp
@@ -0,0 +1,172 @@
+#include "etsq.h"
+
+ErlNifRWLock *qinfo_map_rwlock;
+QInfoMap qinfo_map;
+
+// Function finds the queue from map and returns QueueInfo
+// Not thread safe.
+QueueInfo* get_q_info(char* name)
+{
+	//std::cout<<"Info: "<< name<<std::endl;
+	QInfoMap::iterator iter = qinfo_map.find(name);
+	if (iter != qinfo_map.end())
+	{
+		//std::cout<<" Fetched ";
+		return iter->second;
+	}
+	return NULL;
+}
+
+void new_q(char* name)
+{
+	//std::cout<<"Create: " << name<<std::endl;
+	WriteLock write_lock(qinfo_map_rwlock);
+	QueueInfo *queue_info = new QueueInfo(name);
+	qinfo_map.insert(QInfoMapPair(name, queue_info));
+	//std::cout<<"Created: " << name<<std::endl;
+}
+
+bool push(char* name, ErlTerm *erl_term)
+{
+	QueueInfo *pqueue_info = NULL;
+	ReadLock read_lock(qinfo_map_rwlock);
+	if (NULL != (pqueue_info = get_q_info(name)))
+	{
+		Mutex mutex(pqueue_info->pmutex);
+		pqueue_info->queue.push(erl_term);
+		return true;
+	}
+	return false;
+}
+
+// Returns new ErlTerm. Caller should delete it
+ErlTerm* pop(char* name, bool read_only)
+{
+	QueueInfo *pqueue_info = NULL;
+	ReadLock read_lock(qinfo_map_rwlock);
+	if (NULL != (pqueue_info = get_q_info(name)))
+	{
+		Mutex mutex(pqueue_info->pmutex);
+		if (!pqueue_info->queue.empty())
+		{
+			ErlTerm *erl_term = pqueue_info->queue.front();
+			if(read_only)
+			{
+				return new ErlTerm(erl_term);
+			}
+			pqueue_info->queue.pop();
+			return erl_term;
+		}
+		return new ErlTerm("empty");
+	}
+	return NULL;
+}
+
+static ERL_NIF_TERM new_queue(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	int size = 100;
+	char *name = new char(size);
+	enif_get_atom(env, argv[0], name, size, ERL_NIF_LATIN1);
+	{
+		QueueInfo *pqueue_info = NULL;
+		ReadLock read_lock(qinfo_map_rwlock);
+		if (NULL != (pqueue_info = get_q_info(name)))
+		{
+			return enif_make_error(env, "already_exists");
+		}
+	}
+	new_q(name);
+	return enif_make_atom(env, "ok");
+}
+
+static ERL_NIF_TERM info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	int size = 100;
+	char name[100];
+	enif_get_atom(env, argv[0], name, size, ERL_NIF_LATIN1);
+	int queue_size = 0;
+	{
+		QueueInfo *pqueue_info = NULL;
+		ReadLock read_lock(qinfo_map_rwlock);
+		if (NULL == (pqueue_info = get_q_info(name)))
+			return enif_make_badarg(env);
+		queue_size = pqueue_info->queue.size();
+	}
+	return enif_make_list2(env,
+			enif_make_tuple2(env, enif_make_atom(env, "name"), enif_make_atom(env, name)),
+			enif_make_tuple2(env, enif_make_atom(env, "size"), enif_make_int(env, queue_size)));
+}
+
+static ERL_NIF_TERM push_back(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	int size = 100;
+	char name[100];
+	enif_get_atom(env, argv[0], name, size, ERL_NIF_LATIN1);
+	ErlTerm *erl_term = new ErlTerm(argv[1]);
+	if (push(name, erl_term))
+		return enif_make_atom(env, "ok");
+	delete erl_term;
+	return enif_make_badarg(env);
+}
+
+static ERL_NIF_TERM pop_front(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	int size = 100;
+	char name[100];
+	enif_get_atom(env, argv[0], name, size, ERL_NIF_LATIN1);
+	ErlTerm *erl_term = NULL;
+	if (NULL == (erl_term = pop(name, false)))
+		return enif_make_badarg(env);
+	ERL_NIF_TERM return_term = enif_make_copy(env, erl_term->term);
+	delete erl_term;
+	return return_term;
+}
+
+static ERL_NIF_TERM get_front(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+	int size = 100;
+	char name[100];
+	enif_get_atom(env, argv[0], name, size, ERL_NIF_LATIN1);
+	ErlTerm *erl_term = NULL;
+	if (NULL == (erl_term = pop(name, true)))
+		return enif_make_badarg(env);
+	ERL_NIF_TERM return_term = enif_make_copy(env, erl_term->term);
+	delete erl_term;
+	return return_term;
+}
+
+static int is_ok_load_info(ErlNifEnv* env, ERL_NIF_TERM load_info)
+{
+    int i;
+    return enif_get_int(env, load_info, &i) && i == 1;
+}
+
+static int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info)
+{
+    if (!is_ok_load_info(env, load_info))
+    	return -1;
+    qinfo_map_rwlock = enif_rwlock_create((char*)"qinfo");
+    return 0;
+}
+
+static int upgrade(ErlNifEnv* env, void** priv_data, void** old_priv_data, ERL_NIF_TERM load_info)
+{
+    if (!is_ok_load_info(env, load_info))
+	return -1;
+    return 0;
+}
+
+static void unload(ErlNifEnv* env, void* priv_data)
+{
+	enif_rwlock_destroy(qinfo_map_rwlock);
+}
+
+static ErlNifFunc nif_funcs[] =  {
+    {"new", 1, new_queue},
+	{"info", 1, info},
+	{"push_back", 2, push_back},
+	{"pop_front", 1, pop_front},
+	{"get_front", 1, get_front}
+};
+
+ERL_NIF_INIT(etsq, nif_funcs, load, NULL, upgrade, unload)
diff --git a/c_src/etsq/etsq.h b/c_src/etsq/etsq.h
new file mode 100644
index 0000000..ef0d346
--- /dev/null
+++ b/c_src/etsq/etsq.h
@@ -0,0 +1,130 @@
+/*
+ * etsq.h
+ *
+ *  Created on: Mar 21, 2016
+ *      Author: Vinod
+ */
+
+#ifndef ETSQ_H_
+#define ETSQ_H_
+
+#include <iostream>       // std::cin, std::cout
+#include <map>          // std::map
+#include <queue>          // std::queue
+#include <string.h>
+#include "erl_nif.h"
+
+#define enif_make_error(env, error) enif_make_tuple2(env, \
+	enif_make_atom(env, "error"), enif_make_atom(env, error))
+
+struct cmp_str
+{
+   bool operator()(char *a, char *b) const
+   {
+      return strcmp(a, b) < 0;
+   }
+};
+
+class ErlTerm
+{
+public:
+	ErlNifEnv *term_env;
+	ERL_NIF_TERM term;
+public:
+	ErlTerm(ERL_NIF_TERM erl_nif_term)
+	{
+		term_env = enif_alloc_env();
+		this->term = enif_make_copy(term_env, erl_nif_term);
+	}
+	ErlTerm(ErlTerm *erl_term)
+	{
+		term_env = enif_alloc_env();
+		this->term = enif_make_copy(term_env, erl_term->term);
+	}
+	ErlTerm(int value)
+	{
+		term_env = enif_alloc_env();
+		this->term = enif_make_int(term_env, value);
+	}
+	ErlTerm(const char *error)
+	{
+		term_env = enif_alloc_env();
+		this->term = enif_make_error(term_env, error);
+	}
+	~ErlTerm()
+	{
+		enif_free_env(term_env);
+		term_env = NULL;
+	}
+};
+
+typedef std::queue<ErlTerm*> ErlQueue;
+
+class QueueInfo
+{
+public:
+	ErlNifMutex* pmutex;
+	ErlQueue queue;
+public:
+	QueueInfo(char* name)
+	{
+		pmutex = enif_mutex_create(name);
+	}
+	~QueueInfo()
+	{
+		enif_mutex_destroy(pmutex);
+	}
+};
+
+typedef std::map<char *, QueueInfo*, cmp_str> QInfoMap;
+typedef std::pair<char *, QueueInfo*> QInfoMapPair;
+
+// Class to handle Read lock
+class ReadLock
+{
+	ErlNifRWLock *pread_lock;
+public:
+	ReadLock(ErlNifRWLock *pread_lock)
+	{
+		this->pread_lock = pread_lock;
+		enif_rwlock_rlock(this->pread_lock);
+	};
+	~ReadLock()
+	{
+		enif_rwlock_runlock(pread_lock);
+	};
+};
+
+// Class to handle Write lock
+class WriteLock
+{
+	ErlNifRWLock *pwrite_lock;
+public:
+	WriteLock(ErlNifRWLock *pwrite_lock)
+	{
+		this->pwrite_lock = pwrite_lock;
+		enif_rwlock_rwlock(this->pwrite_lock);
+	};
+	~WriteLock()
+	{
+		enif_rwlock_rwunlock(pwrite_lock);
+	};
+};
+
+// Class to handle Mutex lock and unlock
+class Mutex
+{
+	ErlNifMutex *pmtx;
+public:
+	Mutex(ErlNifMutex *pmtx)
+	{
+		this->pmtx = pmtx;
+		enif_mutex_lock(this->pmtx);
+	};
+	~Mutex()
+	{
+		enif_mutex_unlock(pmtx);
+	};
+};
+
+#endif /* ETSQ_H_ */
diff --git a/c_src/etsq/rebar.config b/c_src/etsq/rebar.config
new file mode 100644
index 0000000..f6093dd
--- /dev/null
+++ b/c_src/etsq/rebar.config
@@ -0,0 +1,7 @@
+{port_specs, [
+    {"../../priv/etsq.so", ["*.cpp"]}
+]}.
+
+
+
+
diff --git a/c_src/gb_lru/binary.h b/c_src/gb_lru/binary.h
new file mode 100644
index 0000000..dd21ae6
--- /dev/null
+++ b/c_src/gb_lru/binary.h
@@ -0,0 +1,103 @@
+#include <iostream>
+#include <algorithm>
+#include <string.h>
+
+class Binary {
+    public:
+    unsigned char *bin;
+    size_t size;
+    bool allocated;
+
+    Binary() : bin(NULL), size(0), allocated(false) { }
+    Binary(const char *data) {
+	bin = (unsigned char *) data;
+	size = strlen(data);
+	allocated = false;
+    }
+    
+    Binary(const Binary &b) {
+	bin = b.bin;
+	size = b.size;
+	allocated = false;
+    }
+
+    ~Binary() {
+	if (allocated) {
+	    delete bin;
+	}
+    }
+
+    operator std::string() {
+	return (const char *) bin;
+    }
+
+    friend std::ostream & operator<<(std::ostream & str, Binary const &b) {
+	return str << b.bin;
+    }
+
+    bool operator<(const Binary &b) {
+	if(size < b.size) {
+	  return true;
+	} else if (size > b.size) {
+	  return false;
+	} else {
+	  return memcmp(bin,b.bin,size) < 0;
+	}
+    }
+
+    bool operator<(Binary &b) {
+	if(size < b.size) {
+	  return true;
+	} else if (size > b.size) {
+	  return false;
+	} else {
+	  return memcmp(bin,b.bin,size) < 0;
+	}
+    }
+
+    bool operator>(const Binary &b) {
+	if(size > b.size) {
+	  return true;
+	} else if (size < b.size) {
+	  return false;
+	} else {
+	  return memcmp(bin,b.bin,size) > 0;
+	}
+    }
+
+    bool operator== (const Binary &b) {
+	if (size == b.size ) {
+	    return memcmp(bin,b.bin, std::min(size, b.size)) == 0;
+	} else {
+	  return false;
+	}
+    }
+    operator std::string() const {
+	return (const char*) bin;
+    }
+
+    Binary& set_data(const char *data) {
+	bin = (unsigned char *) data;
+	size = strlen(data);
+	return *this;
+    }
+
+    void copy(char *inbin, size_t insize) {
+	bin = (unsigned char *) operator new(insize);
+	allocated = true;
+	size = insize;
+	memcpy(bin, inbin, size);
+    }
+};
+
+inline bool operator < (const Binary &a, const Binary &b) {
+
+	if(a.size < b.size) {
+	  return true;
+	} else if (a.size > b.size) {
+	  return false;
+	} else {
+	  return memcmp(a.bin,b.bin, std::min(a.size, b.size)) < 0;
+	}
+}
+
diff --git a/c_src/gb_lru/btree.h b/c_src/gb_lru/btree.h
new file mode 100644
index 0000000..5035835
--- /dev/null
+++ b/c_src/gb_lru/btree.h
@@ -0,0 +1,2394 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// A btree implementation of the STL set and map interfaces. A btree is both
+// smaller and faster than STL set/map. The red-black tree implementation of
+// STL set/map has an overhead of 3 pointers (left, right and parent) plus the
+// node color information for each stored value. So a set<int32> consumes 20
+// bytes for each value stored. This btree implementation stores multiple
+// values on fixed size nodes (usually 256 bytes) and doesn't store child
+// pointers for leaf nodes. The result is that a btree_set<int32> may use much
+// less memory per stored value. For the random insertion benchmark in
+// btree_test.cc, a btree_set<int32> with node-size of 256 uses 4.9 bytes per
+// stored value.
+//
+// The packing of multiple values on to each node of a btree has another effect
+// besides better space utilization: better cache locality due to fewer cache
+// lines being accessed. Better cache locality translates into faster
+// operations.
+//
+// CAVEATS
+//
+// Insertions and deletions on a btree can cause splitting, merging or
+// rebalancing of btree nodes. And even without these operations, insertions
+// and deletions on a btree will move values around within a node. In both
+// cases, the result is that insertions and deletions can invalidate iterators
+// pointing to values other than the one being inserted/deleted. This is
+// notably different from STL set/map which takes care to not invalidate
+// iterators on insert/erase except, of course, for iterators pointing to the
+// value being erased.  A partial workaround when erasing is available:
+// erase() returns an iterator pointing to the item just after the one that was
+// erased (or end() if none exists).  See also safe_btree.
+
+// PERFORMANCE
+//
+//   btree_bench --benchmarks=. 2>&1 | ./benchmarks.awk
+//
+// Run on pmattis-warp.nyc (4 X 2200 MHz CPUs); 2010/03/04-15:23:06
+// Benchmark                 STL(ns) B-Tree(ns) @    <size>
+// --------------------------------------------------------
+// BM_set_int32_insert        1516      608  +59.89%  <256>    [40.0,  5.2]
+// BM_set_int32_lookup        1160      414  +64.31%  <256>    [40.0,  5.2]
+// BM_set_int32_fulllookup     960      410  +57.29%  <256>    [40.0,  4.4]
+// BM_set_int32_delete        1741      528  +69.67%  <256>    [40.0,  5.2]
+// BM_set_int32_queueaddrem   3078     1046  +66.02%  <256>    [40.0,  5.5]
+// BM_set_int32_mixedaddrem   3600     1384  +61.56%  <256>    [40.0,  5.3]
+// BM_set_int32_fifo           227      113  +50.22%  <256>    [40.0,  4.4]
+// BM_set_int32_fwditer        158       26  +83.54%  <256>    [40.0,  5.2]
+// BM_map_int32_insert        1551      636  +58.99%  <256>    [48.0, 10.5]
+// BM_map_int32_lookup        1200      508  +57.67%  <256>    [48.0, 10.5]
+// BM_map_int32_fulllookup     989      487  +50.76%  <256>    [48.0,  8.8]
+// BM_map_int32_delete        1794      628  +64.99%  <256>    [48.0, 10.5]
+// BM_map_int32_queueaddrem   3189     1266  +60.30%  <256>    [48.0, 11.6]
+// BM_map_int32_mixedaddrem   3822     1623  +57.54%  <256>    [48.0, 10.9]
+// BM_map_int32_fifo           151      134  +11.26%  <256>    [48.0,  8.8]
+// BM_map_int32_fwditer        161       32  +80.12%  <256>    [48.0, 10.5]
+// BM_set_int64_insert        1546      636  +58.86%  <256>    [40.0, 10.5]
+// BM_set_int64_lookup        1200      512  +57.33%  <256>    [40.0, 10.5]
+// BM_set_int64_fulllookup     971      487  +49.85%  <256>    [40.0,  8.8]
+// BM_set_int64_delete        1745      616  +64.70%  <256>    [40.0, 10.5]
+// BM_set_int64_queueaddrem   3163     1195  +62.22%  <256>    [40.0, 11.6]
+// BM_set_int64_mixedaddrem   3760     1564  +58.40%  <256>    [40.0, 10.9]
+// BM_set_int64_fifo           146      103  +29.45%  <256>    [40.0,  8.8]
+// BM_set_int64_fwditer        162       31  +80.86%  <256>    [40.0, 10.5]
+// BM_map_int64_insert        1551      720  +53.58%  <256>    [48.0, 20.7]
+// BM_map_int64_lookup        1214      612  +49.59%  <256>    [48.0, 20.7]
+// BM_map_int64_fulllookup     994      592  +40.44%  <256>    [48.0, 17.2]
+// BM_map_int64_delete        1778      764  +57.03%  <256>    [48.0, 20.7]
+// BM_map_int64_queueaddrem   3189     1547  +51.49%  <256>    [48.0, 20.9]
+// BM_map_int64_mixedaddrem   3779     1887  +50.07%  <256>    [48.0, 21.6]
+// BM_map_int64_fifo           147      145   +1.36%  <256>    [48.0, 17.2]
+// BM_map_int64_fwditer        162       41  +74.69%  <256>    [48.0, 20.7]
+// BM_set_string_insert       1989     1966   +1.16%  <256>    [64.0, 44.5]
+// BM_set_string_lookup       1709     1600   +6.38%  <256>    [64.0, 44.5]
+// BM_set_string_fulllookup   1573     1529   +2.80%  <256>    [64.0, 35.4]
+// BM_set_string_delete       2520     1920  +23.81%  <256>    [64.0, 44.5]
+// BM_set_string_queueaddrem  4706     4309   +8.44%  <256>    [64.0, 48.3]
+// BM_set_string_mixedaddrem  5080     4654   +8.39%  <256>    [64.0, 46.7]
+// BM_set_string_fifo          318      512  -61.01%  <256>    [64.0, 35.4]
+// BM_set_string_fwditer       182       93  +48.90%  <256>    [64.0, 44.5]
+// BM_map_string_insert       2600     2227  +14.35%  <256>    [72.0, 55.8]
+// BM_map_string_lookup       2068     1730  +16.34%  <256>    [72.0, 55.8]
+// BM_map_string_fulllookup   1859     1618  +12.96%  <256>    [72.0, 44.0]
+// BM_map_string_delete       3168     2080  +34.34%  <256>    [72.0, 55.8]
+// BM_map_string_queueaddrem  5840     4701  +19.50%  <256>    [72.0, 59.4]
+// BM_map_string_mixedaddrem  6400     5200  +18.75%  <256>    [72.0, 57.8]
+// BM_map_string_fifo          398      596  -49.75%  <256>    [72.0, 44.0]
+// BM_map_string_fwditer       243      113  +53.50%  <256>    [72.0, 55.8]
+
+#ifndef UTIL_BTREE_BTREE_H__
+#define UTIL_BTREE_BTREE_H__
+
+#include <assert.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+#include <algorithm>
+#include <functional>
+#include <iostream>
+#include <iterator>
+#include <limits>
+#include <type_traits>
+#include <new>
+#include <ostream>
+#include <string>
+#include <utility>
+
+#ifndef NDEBUG
+#define NDEBUG 1
+#endif
+
+namespace btree {
+
+// Inside a btree method, if we just call swap(), it will choose the
+// btree::swap method, which we don't want. And we can't say ::swap
+// because then MSVC won't pickup any std::swap() implementations. We
+// can't just use std::swap() directly because then we don't get the
+// specialization for types outside the std namespace. So the solution
+// is to have a special swap helper function whose name doesn't
+// collide with other swap functions defined by the btree classes.
+template <typename T>
+inline void btree_swap_helper(T &a, T &b) {
+  using std::swap;
+  swap(a, b);
+}
+
+// A template helper used to select A or B based on a condition.
+template<bool cond, typename A, typename B>
+struct if_{
+  typedef A type;
+};
+
+template<typename A, typename B>
+struct if_<false, A, B> {
+  typedef B type;
+};
+
+// Types small_ and big_ are promise that sizeof(small_) < sizeof(big_)
+typedef char small_;
+
+struct big_ {
+  char dummy[2];
+};
+
+// A compile-time assertion.
+template <bool>
+struct CompileAssert {
+};
+
+#define COMPILE_ASSERT(expr, msg) \
+  typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : 0]
+
+// A helper type used to indicate that a key-compare-to functor has been
+// provided. A user can specify a key-compare-to functor by doing:
+//
+//  struct MyStringComparer
+//      : public util::btree::btree_key_compare_to_tag {
+//    int operator()(const string &a, const string &b) const {
+//      return a.compare(b);
+//    }
+//  };
+//
+// Note that the return type is an int and not a bool. There is a
+// COMPILE_ASSERT which enforces this return type.
+struct btree_key_compare_to_tag {
+};
+
+// A helper class that indicates if the Compare parameter is derived from
+// btree_key_compare_to_tag.
+template <typename Compare>
+struct btree_is_key_compare_to
+    : public std::is_convertible<Compare, btree_key_compare_to_tag> {
+};
+
+// A helper class to convert a boolean comparison into a three-way
+// "compare-to" comparison that returns a negative value to indicate
+// less-than, zero to indicate equality and a positive value to
+// indicate greater-than. This helper class is specialized for
+// less<string> and greater<string>. The btree_key_compare_to_adapter
+// class is provided so that btree users automatically get the more
+// efficient compare-to code when using common google string types
+// with common comparison functors.
+template <typename Compare>
+struct btree_key_compare_to_adapter : Compare {
+  btree_key_compare_to_adapter() { }
+  btree_key_compare_to_adapter(const Compare &c) : Compare(c) { }
+  btree_key_compare_to_adapter(const btree_key_compare_to_adapter<Compare> &c)
+      : Compare(c) {
+  }
+};
+
+template <>
+struct btree_key_compare_to_adapter<std::less<std::string> >
+    : public btree_key_compare_to_tag {
+  btree_key_compare_to_adapter() {}
+  btree_key_compare_to_adapter(const std::less<std::string>&) {}
+  btree_key_compare_to_adapter(
+      const btree_key_compare_to_adapter<std::less<std::string> >&) {}
+  int operator()(const std::string &a, const std::string &b) const {
+    return a.compare(b);
+  }
+};
+
+template <>
+struct btree_key_compare_to_adapter<std::greater<std::string> >
+    : public btree_key_compare_to_tag {
+  btree_key_compare_to_adapter() {}
+  btree_key_compare_to_adapter(const std::greater<std::string>&) {}
+  btree_key_compare_to_adapter(
+      const btree_key_compare_to_adapter<std::greater<std::string> >&) {}
+  int operator()(const std::string &a, const std::string &b) const {
+    return b.compare(a);
+  }
+};
+
+// A helper class that allows a compare-to functor to behave like a plain
+// compare functor. This specialization is used when we do not have a
+// compare-to functor.
+template <typename Key, typename Compare, bool HaveCompareTo>
+struct btree_key_comparer {
+  btree_key_comparer() {}
+  btree_key_comparer(Compare c) : comp(c) {}
+  static bool bool_compare(const Compare &comp, const Key &x, const Key &y) {
+    return comp(x, y);
+  }
+  bool operator()(const Key &x, const Key &y) const {
+    return bool_compare(comp, x, y);
+  }
+  Compare comp;
+};
+
+// A specialization of btree_key_comparer when a compare-to functor is
+// present. We need a plain (boolean) comparison in some parts of the btree
+// code, such as insert-with-hint.
+template <typename Key, typename Compare>
+struct btree_key_comparer<Key, Compare, true> {
+  btree_key_comparer() {}
+  btree_key_comparer(Compare c) : comp(c) {}
+  static bool bool_compare(const Compare &comp, const Key &x, const Key &y) {
+    return comp(x, y) < 0;
+  }
+  bool operator()(const Key &x, const Key &y) const {
+    return bool_compare(comp, x, y);
+  }
+  Compare comp;
+};
+
+// A helper function to compare to keys using the specified compare
+// functor. This dispatches to the appropriate btree_key_comparer comparison,
+// depending on whether we have a compare-to functor or not (which depends on
+// whether Compare is derived from btree_key_compare_to_tag).
+template <typename Key, typename Compare>
+static bool btree_compare_keys(
+    const Compare &comp, const Key &x, const Key &y) {
+  typedef btree_key_comparer<Key, Compare,
+      btree_is_key_compare_to<Compare>::value> key_comparer;
+  return key_comparer::bool_compare(comp, x, y);
+}
+
+template <typename Key, typename Compare,
+          typename Alloc, int TargetNodeSize, int ValueSize>
+struct btree_common_params {
+  // If Compare is derived from btree_key_compare_to_tag then use it as the
+  // key_compare type. Otherwise, use btree_key_compare_to_adapter<> which will
+  // fall-back to Compare if we don't have an appropriate specialization.
+  typedef typename if_<
+    btree_is_key_compare_to<Compare>::value,
+    Compare, btree_key_compare_to_adapter<Compare> >::type key_compare;
+  // A type which indicates if we have a key-compare-to functor or a plain old
+  // key-compare functor.
+  typedef btree_is_key_compare_to<key_compare> is_key_compare_to;
+
+  typedef Alloc allocator_type;
+  typedef Key key_type;
+  typedef size_t size_type;
+  typedef ptrdiff_t difference_type;
+
+  enum {
+    kTargetNodeSize = TargetNodeSize,
+
+    // Available space for values.  This is largest for leaf nodes,
+    // which has overhead no fewer than two pointers.
+    kNodeValueSpace = TargetNodeSize - 2 * sizeof(void*),
+  };
+
+  // This is an integral type large enough to hold as many
+  // ValueSize-values as will fit a node of TargetNodeSize bytes.
+  typedef typename if_<
+    (kNodeValueSpace / ValueSize) >= 256,
+    uint16_t,
+    uint8_t>::type node_count_type;
+};
+
+// A parameters structure for holding the type parameters for a btree_map.
+template <typename Key, typename Data, typename Compare,
+          typename Alloc, int TargetNodeSize>
+struct btree_map_params
+    : public btree_common_params<Key, Compare, Alloc, TargetNodeSize,
+                                 sizeof(Key) + sizeof(Data)> {
+  typedef Data data_type;
+  typedef Data mapped_type;
+  typedef std::pair<const Key, data_type> value_type;
+  typedef std::pair<Key, data_type> mutable_value_type;
+  typedef value_type* pointer;
+  typedef const value_type* const_pointer;
+  typedef value_type& reference;
+  typedef const value_type& const_reference;
+
+  enum {
+    kValueSize = sizeof(Key) + sizeof(data_type),
+  };
+
+  static const Key& key(const value_type &x) { return x.first; }
+  static const Key& key(const mutable_value_type &x) { return x.first; }
+  static void swap(mutable_value_type *a, mutable_value_type *b) {
+    btree_swap_helper(a->first, b->first);
+    btree_swap_helper(a->second, b->second);
+  }
+};
+
+// A parameters structure for holding the type parameters for a btree_set.
+template <typename Key, typename Compare, typename Alloc, int TargetNodeSize>
+struct btree_set_params
+    : public btree_common_params<Key, Compare, Alloc, TargetNodeSize,
+                                 sizeof(Key)> {
+  typedef std::false_type data_type;
+  typedef std::false_type mapped_type;
+  typedef Key value_type;
+  typedef value_type mutable_value_type;
+  typedef value_type* pointer;
+  typedef const value_type* const_pointer;
+  typedef value_type& reference;
+  typedef const value_type& const_reference;
+
+  enum {
+    kValueSize = sizeof(Key),
+  };
+
+  static const Key& key(const value_type &x) { return x; }
+  static void swap(mutable_value_type *a, mutable_value_type *b) {
+    btree_swap_helper<mutable_value_type>(*a, *b);
+  }
+};
+
+// An adapter class that converts a lower-bound compare into an upper-bound
+// compare.
+template <typename Key, typename Compare>
+struct btree_upper_bound_adapter : public Compare {
+  btree_upper_bound_adapter(Compare c) : Compare(c) {}
+  bool operator()(const Key &a, const Key &b) const {
+    return !static_cast<const Compare&>(*this)(b, a);
+  }
+};
+
+template <typename Key, typename CompareTo>
+struct btree_upper_bound_compare_to_adapter : public CompareTo {
+  btree_upper_bound_compare_to_adapter(CompareTo c) : CompareTo(c) {}
+  int operator()(const Key &a, const Key &b) const {
+    return static_cast<const CompareTo&>(*this)(b, a);
+  }
+};
+
+// Dispatch helper class for using linear search with plain compare.
+template <typename K, typename N, typename Compare>
+struct btree_linear_search_plain_compare {
+  static int lower_bound(const K &k, const N &n, Compare comp)  {
+    return n.linear_search_plain_compare(k, 0, n.count(), comp);
+  }
+  static int upper_bound(const K &k, const N &n, Compare comp)  {
+    typedef btree_upper_bound_adapter<K, Compare> upper_compare;
+    return n.linear_search_plain_compare(k, 0, n.count(), upper_compare(comp));
+  }
+};
+
+// Dispatch helper class for using linear search with compare-to
+template <typename K, typename N, typename CompareTo>
+struct btree_linear_search_compare_to {
+  static int lower_bound(const K &k, const N &n, CompareTo comp)  {
+    return n.linear_search_compare_to(k, 0, n.count(), comp);
+  }
+  static int upper_bound(const K &k, const N &n, CompareTo comp)  {
+    typedef btree_upper_bound_adapter<K,
+        btree_key_comparer<K, CompareTo, true> > upper_compare;
+    return n.linear_search_plain_compare(k, 0, n.count(), upper_compare(comp));
+  }
+};
+
+// Dispatch helper class for using binary search with plain compare.
+template <typename K, typename N, typename Compare>
+struct btree_binary_search_plain_compare {
+  static int lower_bound(const K &k, const N &n, Compare comp)  {
+    return n.binary_search_plain_compare(k, 0, n.count(), comp);
+  }
+  static int upper_bound(const K &k, const N &n, Compare comp)  {
+    typedef btree_upper_bound_adapter<K, Compare> upper_compare;
+    return n.binary_search_plain_compare(k, 0, n.count(), upper_compare(comp));
+  }
+};
+
+// Dispatch helper class for using binary search with compare-to.
+template <typename K, typename N, typename CompareTo>
+struct btree_binary_search_compare_to {
+  static int lower_bound(const K &k, const N &n, CompareTo comp)  {
+    return n.binary_search_compare_to(k, 0, n.count(), CompareTo());
+  }
+  static int upper_bound(const K &k, const N &n, CompareTo comp)  {
+    typedef btree_upper_bound_adapter<K,
+        btree_key_comparer<K, CompareTo, true> > upper_compare;
+    return n.linear_search_plain_compare(k, 0, n.count(), upper_compare(comp));
+  }
+};
+
+// A node in the btree holding. The same node type is used for both internal
+// and leaf nodes in the btree, though the nodes are allocated in such a way
+// that the children array is only valid in internal nodes.
+template <typename Params>
+class btree_node {
+ public:
+  typedef Params params_type;
+  typedef btree_node<Params> self_type;
+  typedef typename Params::key_type key_type;
+  typedef typename Params::data_type data_type;
+  typedef typename Params::value_type value_type;
+  typedef typename Params::mutable_value_type mutable_value_type;
+  typedef typename Params::pointer pointer;
+  typedef typename Params::const_pointer const_pointer;
+  typedef typename Params::reference reference;
+  typedef typename Params::const_reference const_reference;
+  typedef typename Params::key_compare key_compare;
+  typedef typename Params::size_type size_type;
+  typedef typename Params::difference_type difference_type;
+  // Typedefs for the various types of node searches.
+  typedef btree_linear_search_plain_compare<
+    key_type, self_type, key_compare> linear_search_plain_compare_type;
+  typedef btree_linear_search_compare_to<
+    key_type, self_type, key_compare> linear_search_compare_to_type;
+  typedef btree_binary_search_plain_compare<
+    key_type, self_type, key_compare> binary_search_plain_compare_type;
+  typedef btree_binary_search_compare_to<
+    key_type, self_type, key_compare> binary_search_compare_to_type;
+  // If we have a valid key-compare-to type, use linear_search_compare_to,
+  // otherwise use linear_search_plain_compare.
+  typedef typename if_<
+    Params::is_key_compare_to::value,
+    linear_search_compare_to_type,
+    linear_search_plain_compare_type>::type linear_search_type;
+  // If we have a valid key-compare-to type, use binary_search_compare_to,
+  // otherwise use binary_search_plain_compare.
+  typedef typename if_<
+    Params::is_key_compare_to::value,
+    binary_search_compare_to_type,
+    binary_search_plain_compare_type>::type binary_search_type;
+  // If the key is an integral or floating point type, use linear search which
+  // is faster than binary search for such types. Might be wise to also
+  // configure linear search based on node-size.
+  typedef typename if_<
+    std::is_integral<key_type>::value ||
+    std::is_floating_point<key_type>::value,
+    linear_search_type, binary_search_type>::type search_type;
+
+  struct base_fields {
+    typedef typename Params::node_count_type field_type;
+
+    // A boolean indicating whether the node is a leaf or not.
+    bool leaf;
+    // The position of the node in the node's parent.
+    field_type position;
+    // The maximum number of values the node can hold.
+    field_type max_count;
+    // The count of the number of values in the node.
+    field_type count;
+    // A pointer to the node's parent.
+    btree_node *parent;
+  };
+
+  enum {
+    kValueSize = params_type::kValueSize,
+    kTargetNodeSize = params_type::kTargetNodeSize,
+
+    // Compute how many values we can fit onto a leaf node.
+    kNodeTargetValues = (kTargetNodeSize - sizeof(base_fields)) / kValueSize,
+    // We need a minimum of 3 values per internal node in order to perform
+    // splitting (1 value for the two nodes involved in the split and 1 value
+    // propagated to the parent as the delimiter for the split).
+    kNodeValues = kNodeTargetValues >= 3 ? kNodeTargetValues : 3,
+
+    kExactMatch = 1 << 30,
+    kMatchMask = kExactMatch - 1,
+  };
+
+  struct leaf_fields : public base_fields {
+    // The array of values. Only the first count of these values have been
+    // constructed and are valid.
+    mutable_value_type values[kNodeValues];
+  };
+
+  struct internal_fields : public leaf_fields {
+    // The array of child pointers. The keys in children_[i] are all less than
+    // key(i). The keys in children_[i + 1] are all greater than key(i). There
+    // are always count + 1 children.
+    btree_node *children[kNodeValues + 1];
+  };
+
+  struct root_fields : public internal_fields {
+    btree_node *rightmost;
+    size_type size;
+  };
+
+ public:
+  // Getter/setter for whether this is a leaf node or not. This value doesn't
+  // change after the node is created.
+  bool leaf() const { return fields_.leaf; }
+
+  // Getter for the position of this node in its parent.
+  int position() const { return fields_.position; }
+  void set_position(int v) { fields_.position = v; }
+
+  // Getter/setter for the number of values stored in this node.
+  int count() const { return fields_.count; }
+  void set_count(int v) { fields_.count = v; }
+  int max_count() const { return fields_.max_count; }
+
+  // Getter for the parent of this node.
+  btree_node* parent() const { return fields_.parent; }
+  // Getter for whether the node is the root of the tree. The parent of the
+  // root of the tree is the leftmost node in the tree which is guaranteed to
+  // be a leaf.
+  bool is_root() const { return parent()->leaf(); }
+  void make_root() {
+    assert(parent()->is_root());
+    fields_.parent = fields_.parent->parent();
+  }
+
+  // Getter for the rightmost root node field. Only valid on the root node.
+  btree_node* rightmost() const { return fields_.rightmost; }
+  btree_node** mutable_rightmost() { return &fields_.rightmost; }
+
+  // Getter for the size root node field. Only valid on the root node.
+  size_type size() const { return fields_.size; }
+  size_type* mutable_size() { return &fields_.size; }
+
+  // Getters for the key/value at position i in the node.
+  const key_type& key(int i) const {
+    return params_type::key(fields_.values[i]);
+  }
+  reference value(int i) {
+    return reinterpret_cast<reference>(fields_.values[i]);
+  }
+  const_reference value(int i) const {
+    return reinterpret_cast<const_reference>(fields_.values[i]);
+  }
+  mutable_value_type* mutable_value(int i) {
+    return &fields_.values[i];
+  }
+
+  // Swap value i in this node with value j in node x.
+  void value_swap(int i, btree_node *x, int j) {
+    params_type::swap(mutable_value(i), x->mutable_value(j));
+  }
+
+  // Getters/setter for the child at position i in the node.
+  btree_node* child(int i) const { return fields_.children[i]; }
+  btree_node** mutable_child(int i) { return &fields_.children[i]; }
+  void set_child(int i, btree_node *c) {
+    *mutable_child(i) = c;
+    c->fields_.parent = this;
+    c->fields_.position = i;
+  }
+
+  // Returns the position of the first value whose key is not less than k.
+  template <typename Compare>
+  int lower_bound(const key_type &k, const Compare &comp) const {
+    return search_type::lower_bound(k, *this, comp);
+  }
+  // Returns the position of the first value whose key is greater than k.
+  template <typename Compare>
+  int upper_bound(const key_type &k, const Compare &comp) const {
+    return search_type::upper_bound(k, *this, comp);
+  }
+
+  // Returns the position of the first value whose key is not less than k using
+  // linear search performed using plain compare.
+  template <typename Compare>
+  int linear_search_plain_compare(
+      const key_type &k, int s, int e, const Compare &comp) const {
+    while (s < e) {
+      if (!btree_compare_keys(comp, key(s), k)) {
+        break;
+      }
+      ++s;
+    }
+    return s;
+  }
+
+  // Returns the position of the first value whose key is not less than k using
+  // linear search performed using compare-to.
+  template <typename Compare>
+  int linear_search_compare_to(
+      const key_type &k, int s, int e, const Compare &comp) const {
+    while (s < e) {
+      int c = comp(key(s), k);
+      if (c == 0) {
+        return s | kExactMatch;
+      } else if (c > 0) {
+        break;
+      }
+      ++s;
+    }
+    return s;
+  }
+
+  // Returns the position of the first value whose key is not less than k using
+  // binary search performed using plain compare.
+  template <typename Compare>
+  int binary_search_plain_compare(
+      const key_type &k, int s, int e, const Compare &comp) const {
+    while (s != e) {
+      int mid = (s + e) / 2;
+      if (btree_compare_keys(comp, key(mid), k)) {
+        s = mid + 1;
+      } else {
+        e = mid;
+      }
+    }
+    return s;
+  }
+
+  // Returns the position of the first value whose key is not less than k using
+  // binary search performed using compare-to.
+  template <typename CompareTo>
+  int binary_search_compare_to(
+      const key_type &k, int s, int e, const CompareTo &comp) const {
+    while (s != e) {
+      int mid = (s + e) / 2;
+      int c = comp(key(mid), k);
+      if (c < 0) {
+        s = mid + 1;
+      } else if (c > 0) {
+        e = mid;
+      } else {
+        // Need to return the first value whose key is not less than k, which
+        // requires continuing the binary search. Note that we are guaranteed
+        // that the result is an exact match because if "key(mid-1) < k" the
+        // call to binary_search_compare_to() will return "mid".
+        s = binary_search_compare_to(k, s, mid, comp);
+        return s | kExactMatch;
+      }
+    }
+    return s;
+  }
+
+  // Inserts the value x at position i, shifting all existing values and
+  // children at positions >= i to the right by 1.
+  void insert_value(int i, const value_type &x);
+
+  // Removes the value at position i, shifting all existing values and children
+  // at positions > i to the left by 1.
+  void remove_value(int i);
+
+  // Rebalances a node with its right sibling.
+  void rebalance_right_to_left(btree_node *sibling, int to_move);
+  void rebalance_left_to_right(btree_node *sibling, int to_move);
+
+  // Splits a node, moving a portion of the node's values to its right sibling.
+  void split(btree_node *sibling, int insert_position);
+
+  // Merges a node with its right sibling, moving all of the values and the
+  // delimiting key in the parent node onto itself.
+  void merge(btree_node *sibling);
+
+  // Swap the contents of "this" and "src".
+  void swap(btree_node *src);
+
+  // Node allocation/deletion routines.
+  static btree_node* init_leaf(
+      leaf_fields *f, btree_node *parent, int max_count) {
+    btree_node *n = reinterpret_cast<btree_node*>(f);
+    f->leaf = 1;
+    f->position = 0;
+    f->max_count = max_count;
+    f->count = 0;
+    f->parent = parent;
+    if (!NDEBUG) {
+      memset(&f->values, 0, max_count * sizeof(value_type));
+    }
+    return n;
+  }
+  static btree_node* init_internal(internal_fields *f, btree_node *parent) {
+    btree_node *n = init_leaf(f, parent, kNodeValues);
+    f->leaf = 0;
+    if (!NDEBUG) {
+      memset(f->children, 0, sizeof(f->children));
+    }
+    return n;
+  }
+  static btree_node* init_root(root_fields *f, btree_node *parent) {
+    btree_node *n = init_internal(f, parent);
+    f->rightmost = parent;
+    f->size = parent->count();
+    return n;
+  }
+  void destroy() {
+    for (int i = 0; i < count(); ++i) {
+      value_destroy(i);
+    }
+  }
+
+ private:
+  void value_init(int i) {
+    new (&fields_.values[i]) mutable_value_type;
+  }
+  void value_init(int i, const value_type &x) {
+    new (&fields_.values[i]) mutable_value_type(x);
+  }
+  void value_destroy(int i) {
+    fields_.values[i].~mutable_value_type();
+  }
+
+ private:
+  root_fields fields_;
+
+ private:
+  btree_node(const btree_node&);
+  void operator=(const btree_node&);
+};
+
+template <typename Node, typename Reference, typename Pointer>
+struct btree_iterator {
+  typedef typename Node::key_type key_type;
+  typedef typename Node::size_type size_type;
+  typedef typename Node::difference_type difference_type;
+  typedef typename Node::params_type params_type;
+
+  typedef Node node_type;
+  typedef typename std::remove_const<Node>::type normal_node;
+  typedef const Node const_node;
+  typedef typename params_type::value_type value_type;
+  typedef typename params_type::pointer normal_pointer;
+  typedef typename params_type::reference normal_reference;
+  typedef typename params_type::const_pointer const_pointer;
+  typedef typename params_type::const_reference const_reference;
+
+  typedef Pointer pointer;
+  typedef Reference reference;
+  typedef std::bidirectional_iterator_tag iterator_category;
+
+  typedef btree_iterator<
+    normal_node, normal_reference, normal_pointer> iterator;
+  typedef btree_iterator<
+    const_node, const_reference, const_pointer> const_iterator;
+  typedef btree_iterator<Node, Reference, Pointer> self_type;
+
+  btree_iterator()
+      : node(NULL),
+        position(-1) {
+  }
+  btree_iterator(Node *n, int p)
+      : node(n),
+        position(p) {
+  }
+  btree_iterator(const iterator &x)
+      : node(x.node),
+        position(x.position) {
+  }
+
+  // Increment/decrement the iterator.
+  void increment() {
+    if (node->leaf() && ++position < node->count()) {
+      return;
+    }
+    increment_slow();
+  }
+  void increment_by(int count);
+  void increment_slow();
+
+  void decrement() {
+    if (node->leaf() && --position >= 0) {
+      return;
+    }
+    decrement_slow();
+  }
+  void decrement_slow();
+
+  bool operator==(const const_iterator &x) const {
+    return node == x.node && position == x.position;
+  }
+  bool operator!=(const const_iterator &x) const {
+    return node != x.node || position != x.position;
+  }
+
+  // Accessors for the key/value the iterator is pointing at.
+  const key_type& key() const {
+    return node->key(position);
+  }
+  reference operator*() const {
+    return node->value(position);
+  }
+  pointer operator->() const {
+    return &node->value(position);
+  }
+
+  self_type& operator++() {
+    increment();
+    return *this;
+  }
+  self_type& operator--() {
+    decrement();
+    return *this;
+  }
+  self_type operator++(int) {
+    self_type tmp = *this;
+    ++*this;
+    return tmp;
+  }
+  self_type operator--(int) {
+    self_type tmp = *this;
+    --*this;
+    return tmp;
+  }
+
+  // The node in the tree the iterator is pointing at.
+  Node *node;
+  // The position within the node of the tree the iterator is pointing at.
+  int position;
+};
+
+// Dispatch helper class for using btree::internal_locate with plain compare.
+struct btree_internal_locate_plain_compare {
+  template <typename K, typename T, typename Iter>
+  static std::pair<Iter, int> dispatch(const K &k, const T &t, Iter iter) {
+    return t.internal_locate_plain_compare(k, iter);
+  }
+};
+
+// Dispatch helper class for using btree::internal_locate with compare-to.
+struct btree_internal_locate_compare_to {
+  template <typename K, typename T, typename Iter>
+  static std::pair<Iter, int> dispatch(const K &k, const T &t, Iter iter) {
+    return t.internal_locate_compare_to(k, iter);
+  }
+};
+
+template <typename Params>
+class btree : public Params::key_compare {
+  typedef btree<Params> self_type;
+  typedef btree_node<Params> node_type;
+  typedef typename node_type::base_fields base_fields;
+  typedef typename node_type::leaf_fields leaf_fields;
+  typedef typename node_type::internal_fields internal_fields;
+  typedef typename node_type::root_fields root_fields;
+  typedef typename Params::is_key_compare_to is_key_compare_to;
+
+  friend struct btree_internal_locate_plain_compare;
+  friend struct btree_internal_locate_compare_to;
+  typedef typename if_<
+    is_key_compare_to::value,
+    btree_internal_locate_compare_to,
+    btree_internal_locate_plain_compare>::type internal_locate_type;
+
+  enum {
+    kNodeValues = node_type::kNodeValues,
+    kMinNodeValues = kNodeValues / 2,
+    kValueSize = node_type::kValueSize,
+    kExactMatch = node_type::kExactMatch,
+    kMatchMask = node_type::kMatchMask,
+  };
+
+  // A helper class to get the empty base class optimization for 0-size
+  // allocators. Base is internal_allocator_type.
+  // (e.g. empty_base_handle<internal_allocator_type, node_type*>). If Base is
+  // 0-size, the compiler doesn't have to reserve any space for it and
+  // sizeof(empty_base_handle) will simply be sizeof(Data). Google [empty base
+  // class optimization] for more details.
+  template <typename Base, typename Data>
+  struct empty_base_handle : public Base {
+    empty_base_handle(const Base &b, const Data &d)
+        : Base(b),
+          data(d) {
+    }
+    Data data;
+  };
+
+  struct node_stats {
+    node_stats(size_t l, size_t i)
+        : leaf_nodes(l),
+          internal_nodes(i) {
+    }
+
+    node_stats& operator+=(const node_stats &x) {
+      leaf_nodes += x.leaf_nodes;
+      internal_nodes += x.internal_nodes;
+      return *this;
+    }
+
+    size_t leaf_nodes;
+    size_t internal_nodes;
+  };
+
+ public:
+  typedef Params params_type;
+  typedef typename Params::key_type key_type;
+  typedef typename Params::data_type data_type;
+  typedef typename Params::mapped_type mapped_type;
+  typedef typename Params::value_type value_type;
+  typedef typename Params::key_compare key_compare;
+  typedef typename Params::pointer pointer;
+  typedef typename Params::const_pointer const_pointer;
+  typedef typename Params::reference reference;
+  typedef typename Params::const_reference const_reference;
+  typedef typename Params::size_type size_type;
+  typedef typename Params::difference_type difference_type;
+  typedef btree_iterator<node_type, reference, pointer> iterator;
+  typedef typename iterator::const_iterator const_iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+  typedef std::reverse_iterator<iterator> reverse_iterator;
+
+  typedef typename Params::allocator_type allocator_type;
+  typedef typename allocator_type::template rebind<char>::other
+    internal_allocator_type;
+
+ public:
+  // Default constructor.
+  btree(const key_compare &comp, const allocator_type &alloc);
+
+  // Copy constructor.
+  btree(const self_type &x);
+
+  // Destructor.
+  ~btree() {
+    clear();
+  }
+
+  // Iterator routines.
+  iterator begin() {
+    return iterator(leftmost(), 0);
+  }
+  const_iterator begin() const {
+    return const_iterator(leftmost(), 0);
+  }
+  iterator end() {
+    return iterator(rightmost(), rightmost() ? rightmost()->count() : 0);
+  }
+  const_iterator end() const {
+    return const_iterator(rightmost(), rightmost() ? rightmost()->count() : 0);
+  }
+  reverse_iterator rbegin() {
+    return reverse_iterator(end());
+  }
+  const_reverse_iterator rbegin() const {
+    return const_reverse_iterator(end());
+  }
+  reverse_iterator rend() {
+    return reverse_iterator(begin());
+  }
+  const_reverse_iterator rend() const {
+    return const_reverse_iterator(begin());
+  }
+
+  // Finds the first element whose key is not less than key.
+  iterator lower_bound(const key_type &key) {
+    return internal_end(
+        internal_lower_bound(key, iterator(root(), 0)));
+  }
+  const_iterator lower_bound(const key_type &key) const {
+    return internal_end(
+        internal_lower_bound(key, const_iterator(root(), 0)));
+  }
+
+  // Finds the first element whose key is greater than key.
+  iterator upper_bound(const key_type &key) {
+    return internal_end(
+        internal_upper_bound(key, iterator(root(), 0)));
+  }
+  const_iterator upper_bound(const key_type &key) const {
+    return internal_end(
+        internal_upper_bound(key, const_iterator(root(), 0)));
+  }
+
+  // Finds the range of values which compare equal to key. The first member of
+  // the returned pair is equal to lower_bound(key). The second member pair of
+  // the pair is equal to upper_bound(key).
+  std::pair<iterator,iterator> equal_range(const key_type &key) {
+    return std::make_pair(lower_bound(key), upper_bound(key));
+  }
+  std::pair<const_iterator,const_iterator> equal_range(const key_type &key) const {
+    return std::make_pair(lower_bound(key), upper_bound(key));
+  }
+
+  // Inserts a value into the btree only if it does not already exist. The
+  // boolean return value indicates whether insertion succeeded or failed. The
+  // ValuePointer type is used to avoid instatiating the value unless the key
+  // is being inserted. Value is not dereferenced if the key already exists in
+  // the btree. See btree_map::operator[].
+  template <typename ValuePointer>
+  std::pair<iterator,bool> insert_unique(const key_type &key, ValuePointer value);
+
+  // Inserts a value into the btree only if it does not already exist. The
+  // boolean return value indicates whether insertion succeeded or failed.
+  std::pair<iterator,bool> insert_unique(const value_type &v) {
+    return insert_unique(params_type::key(v), &v);
+  }
+
+  // Insert with hint. Check to see if the value should be placed immediately
+  // before position in the tree. If it does, then the insertion will take
+  // amortized constant time. If not, the insertion will take amortized
+  // logarithmic time as if a call to insert_unique(v) were made.
+  iterator insert_unique(iterator position, const value_type &v);
+
+  // Insert a range of values into the btree.
+  template <typename InputIterator>
+  void insert_unique(InputIterator b, InputIterator e);
+
+  // Inserts a value into the btree. The ValuePointer type is used to avoid
+  // instatiating the value unless the key is being inserted. Value is not
+  // dereferenced if the key already exists in the btree. See
+  // btree_map::operator[].
+  template <typename ValuePointer>
+  iterator insert_multi(const key_type &key, ValuePointer value);
+
+  // Inserts a value into the btree.
+  iterator insert_multi(const value_type &v) {
+    return insert_multi(params_type::key(v), &v);
+  }
+
+  // Insert with hint. Check to see if the value should be placed immediately
+  // before position in the tree. If it does, then the insertion will take
+  // amortized constant time. If not, the insertion will take amortized
+  // logarithmic time as if a call to insert_multi(v) were made.
+  iterator insert_multi(iterator position, const value_type &v);
+
+  // Insert a range of values into the btree.
+  template <typename InputIterator>
+  void insert_multi(InputIterator b, InputIterator e);
+
+  void assign(const self_type &x);
+
+  // Erase the specified iterator from the btree. The iterator must be valid
+  // (i.e. not equal to end()).  Return an iterator pointing to the node after
+  // the one that was erased (or end() if none exists).
+  iterator erase(iterator iter);
+
+  // Erases range. Returns the number of keys erased.
+  int erase(iterator begin, iterator end);
+
+  // Erases the specified key from the btree. Returns 1 if an element was
+  // erased and 0 otherwise.
+  int erase_unique(const key_type &key);
+
+  // Erases all of the entries matching the specified key from the
+  // btree. Returns the number of elements erased.
+  int erase_multi(const key_type &key);
+
+  // Finds the iterator corresponding to a key or returns end() if the key is
+  // not present.
+  iterator find_unique(const key_type &key) {
+    return internal_end(
+        internal_find_unique(key, iterator(root(), 0)));
+  }
+  const_iterator find_unique(const key_type &key) const {
+    return internal_end(
+        internal_find_unique(key, const_iterator(root(), 0)));
+  }
+  iterator find_multi(const key_type &key) {
+    return internal_end(
+        internal_find_multi(key, iterator(root(), 0)));
+  }
+  const_iterator find_multi(const key_type &key) const {
+    return internal_end(
+        internal_find_multi(key, const_iterator(root(), 0)));
+  }
+
+  // Returns a count of the number of times the key appears in the btree.
+  size_type count_unique(const key_type &key) const {
+    const_iterator begin = internal_find_unique(
+        key, const_iterator(root(), 0));
+    if (!begin.node) {
+      // The key doesn't exist in the tree.
+      return 0;
+    }
+    return 1;
+  }
+  // Returns a count of the number of times the key appears in the btree.
+  size_type count_multi(const key_type &key) const {
+    return distance(lower_bound(key), upper_bound(key));
+  }
+
+  // Clear the btree, deleting all of the values it contains.
+  void clear();
+
+  // Swap the contents of *this and x.
+  void swap(self_type &x);
+
+  // Assign the contents of x to *this.
+  self_type& operator=(const self_type &x) {
+    if (&x == this) {
+      // Don't copy onto ourselves.
+      return *this;
+    }
+    assign(x);
+    return *this;
+  }
+
+  key_compare* mutable_key_comp() {
+    return this;
+  }
+  const key_compare& key_comp() const {
+    return *this;
+  }
+  bool compare_keys(const key_type &x, const key_type &y) const {
+    return btree_compare_keys(key_comp(), x, y);
+  }
+
+  // Dump the btree to the specified ostream. Requires that operator<< is
+  // defined for Key and Value.
+  void dump(std::ostream &os) const {
+    if (root() != NULL) {
+      internal_dump(os, root(), 0);
+    }
+  }
+
+  // Verifies the structure of the btree.
+  void verify() const;
+
+  // Size routines. Note that empty() is slightly faster than doing size()==0.
+  size_type size() const {
+    if (empty()) return 0;
+    if (root()->leaf()) return root()->count();
+    return root()->size();
+  }
+  size_type max_size() const { return std::numeric_limits<size_type>::max(); }
+  bool empty() const { return root() == NULL; }
+
+  // The height of the btree. An empty tree will have height 0.
+  size_type height() const {
+    size_type h = 0;
+    if (root()) {
+      // Count the length of the chain from the leftmost node up to the
+      // root. We actually count from the root back around to the level below
+      // the root, but the calculation is the same because of the circularity
+      // of that traversal.
+      const node_type *n = root();
+      do {
+        ++h;
+        n = n->parent();
+      } while (n != root());
+    }
+    return h;
+  }
+
+  // The number of internal, leaf and total nodes used by the btree.
+  size_type leaf_nodes() const {
+    return internal_stats(root()).leaf_nodes;
+  }
+  size_type internal_nodes() const {
+    return internal_stats(root()).internal_nodes;
+  }
+  size_type nodes() const {
+    node_stats stats = internal_stats(root());
+    return stats.leaf_nodes + stats.internal_nodes;
+  }
+
+  // The total number of bytes used by the btree.
+  size_type bytes_used() const {
+    node_stats stats = internal_stats(root());
+    if (stats.leaf_nodes == 1 && stats.internal_nodes == 0) {
+      return sizeof(*this) +
+          sizeof(base_fields) + root()->max_count() * sizeof(value_type);
+    } else {
+      return sizeof(*this) +
+          sizeof(root_fields) - sizeof(internal_fields) +
+          stats.leaf_nodes * sizeof(leaf_fields) +
+          stats.internal_nodes * sizeof(internal_fields);
+    }
+  }
+
+  // The average number of bytes used per value stored in the btree.
+  static double average_bytes_per_value() {
+    // Returns the number of bytes per value on a leaf node that is 75%
+    // full. Experimentally, this matches up nicely with the computed number of
+    // bytes per value in trees that had their values inserted in random order.
+    return sizeof(leaf_fields) / (kNodeValues * 0.75);
+  }
+
+  // The fullness of the btree. Computed as the number of elements in the btree
+  // divided by the maximum number of elements a tree with the current number
+  // of nodes could hold. A value of 1 indicates perfect space
+  // utilization. Smaller values indicate space wastage.
+  double fullness() const {
+    return double(size()) / (nodes() * kNodeValues);
+  }
+  // The overhead of the btree structure in bytes per node. Computed as the
+  // total number of bytes used by the btree minus the number of bytes used for
+  // storing elements divided by the number of elements.
+  double overhead() const {
+    if (empty()) {
+      return 0.0;
+    }
+    return (bytes_used() - size() * kValueSize) / double(size());
+  }
+
+ private:
+  // Internal accessor routines.
+  node_type* root() { return root_.data; }
+  const node_type* root() const { return root_.data; }
+  node_type** mutable_root() { return &root_.data; }
+
+  // The rightmost node is stored in the root node.
+  node_type* rightmost() {
+    return (!root() || root()->leaf()) ? root() : root()->rightmost();
+  }
+  const node_type* rightmost() const {
+    return (!root() || root()->leaf()) ? root() : root()->rightmost();
+  }
+  node_type** mutable_rightmost() { return root()->mutable_rightmost(); }
+
+  // The leftmost node is stored as the parent of the root node.
+  node_type* leftmost() { return root() ? root()->parent() : NULL; }
+  const node_type* leftmost() const { return root() ? root()->parent() : NULL; }
+
+  // The size of the tree is stored in the root node.
+  size_type* mutable_size() { return root()->mutable_size(); }
+
+  // Allocator routines.
+  internal_allocator_type* mutable_internal_allocator() {
+    return static_cast<internal_allocator_type*>(&root_);
+  }
+  const internal_allocator_type& internal_allocator() const {
+    return *static_cast<const internal_allocator_type*>(&root_);
+  }
+
+  // Node creation/deletion routines.
+  node_type* new_internal_node(node_type *parent) {
+    internal_fields *p = reinterpret_cast<internal_fields*>(
+        mutable_internal_allocator()->allocate(sizeof(internal_fields)));
+    return node_type::init_internal(p, parent);
+  }
+  node_type* new_internal_root_node() {
+    root_fields *p = reinterpret_cast<root_fields*>(
+        mutable_internal_allocator()->allocate(sizeof(root_fields)));
+    return node_type::init_root(p, root()->parent());
+  }
+  node_type* new_leaf_node(node_type *parent) {
+    leaf_fields *p = reinterpret_cast<leaf_fields*>(
+        mutable_internal_allocator()->allocate(sizeof(leaf_fields)));
+    return node_type::init_leaf(p, parent, kNodeValues);
+  }
+  node_type* new_leaf_root_node(int max_count) {
+    leaf_fields *p = reinterpret_cast<leaf_fields*>(
+        mutable_internal_allocator()->allocate(
+            sizeof(base_fields) + max_count * sizeof(value_type)));
+    return node_type::init_leaf(p, reinterpret_cast<node_type*>(p), max_count);
+  }
+  void delete_internal_node(node_type *node) {
+    node->destroy();
+    assert(node != root());
+    mutable_internal_allocator()->deallocate(
+        reinterpret_cast<char*>(node), sizeof(internal_fields));
+  }
+  void delete_internal_root_node() {
+    root()->destroy();
+    mutable_internal_allocator()->deallocate(
+        reinterpret_cast<char*>(root()), sizeof(root_fields));
+  }
+  void delete_leaf_node(node_type *node) {
+    node->destroy();
+    mutable_internal_allocator()->deallocate(
+        reinterpret_cast<char*>(node),
+        sizeof(base_fields) + node->max_count() * sizeof(value_type));
+  }
+
+  // Rebalances or splits the node iter points to.
+  void rebalance_or_split(iterator *iter);
+
+  // Merges the values of left, right and the delimiting key on their parent
+  // onto left, removing the delimiting key and deleting right.
+  void merge_nodes(node_type *left, node_type *right);
+
+  // Tries to merge node with its left or right sibling, and failing that,
+  // rebalance with its left or right sibling. Returns true if a merge
+  // occurred, at which point it is no longer valid to access node. Returns
+  // false if no merging took place.
+  bool try_merge_or_rebalance(iterator *iter);
+
+  // Tries to shrink the height of the tree by 1.
+  void try_shrink();
+
+  iterator internal_end(iterator iter) {
+    return iter.node ? iter : end();
+  }
+  const_iterator internal_end(const_iterator iter) const {
+    return iter.node ? iter : end();
+  }
+
+  // Inserts a value into the btree immediately before iter. Requires that
+  // key(v) <= iter.key() and (--iter).key() <= key(v).
+  iterator internal_insert(iterator iter, const value_type &v);
+
+  // Returns an iterator pointing to the first value >= the value "iter" is
+  // pointing at. Note that "iter" might be pointing to an invalid location as
+  // iter.position == iter.node->count(). This routine simply moves iter up in
+  // the tree to a valid location.
+  template <typename IterType>
+  static IterType internal_last(IterType iter);
+
+  // Returns an iterator pointing to the leaf position at which key would
+  // reside in the tree. We provide 2 versions of internal_locate. The first
+  // version (internal_locate_plain_compare) always returns 0 for the second
+  // field of the pair. The second version (internal_locate_compare_to) is for
+  // the key-compare-to specialization and returns either kExactMatch (if the
+  // key was found in the tree) or -kExactMatch (if it wasn't) in the second
+  // field of the pair. The compare_to specialization allows the caller to
+  // avoid a subsequent comparison to determine if an exact match was made,
+  // speeding up string keys.
+  template <typename IterType>
+  std::pair<IterType, int> internal_locate(
+      const key_type &key, IterType iter) const;
+  template <typename IterType>
+  std::pair<IterType, int> internal_locate_plain_compare(
+      const key_type &key, IterType iter) const;
+  template <typename IterType>
+  std::pair<IterType, int> internal_locate_compare_to(
+      const key_type &key, IterType iter) const;
+
+  // Internal routine which implements lower_bound().
+  template <typename IterType>
+  IterType internal_lower_bound(
+      const key_type &key, IterType iter) const;
+
+  // Internal routine which implements upper_bound().
+  template <typename IterType>
+  IterType internal_upper_bound(
+      const key_type &key, IterType iter) const;
+
+  // Internal routine which implements find_unique().
+  template <typename IterType>
+  IterType internal_find_unique(
+      const key_type &key, IterType iter) const;
+
+  // Internal routine which implements find_multi().
+  template <typename IterType>
+  IterType internal_find_multi(
+      const key_type &key, IterType iter) const;
+
+  // Deletes a node and all of its children.
+  void internal_clear(node_type *node);
+
+  // Dumps a node and all of its children to the specified ostream.
+  void internal_dump(std::ostream &os, const node_type *node, int level) const;
+
+  // Verifies the tree structure of node.
+  int internal_verify(const node_type *node,
+                      const key_type *lo, const key_type *hi) const;
+
+  node_stats internal_stats(const node_type *node) const {
+    if (!node) {
+      return node_stats(0, 0);
+    }
+    if (node->leaf()) {
+      return node_stats(1, 0);
+    }
+    node_stats res(0, 1);
+    for (int i = 0; i <= node->count(); ++i) {
+      res += internal_stats(node->child(i));
+    }
+    return res;
+  }
+
+ private:
+  empty_base_handle<internal_allocator_type, node_type*> root_;
+
+ private:
+  // A never instantiated helper function that returns big_ if we have a
+  // key-compare-to functor or if R is bool and small_ otherwise.
+  template <typename R>
+  static typename if_<
+   if_<is_key_compare_to::value,
+             std::is_same<R, int>,
+             std::is_same<R, bool> >::type::value,
+   big_, small_>::type key_compare_checker(R);
+
+  // A never instantiated helper function that returns the key comparison
+  // functor.
+  static key_compare key_compare_helper();
+
+  // Verify that key_compare returns a bool. This is similar to the way
+  // is_convertible in base/type_traits.h works. Note that key_compare_checker
+  // is never actually invoked. The compiler will select which
+  // key_compare_checker() to instantiate and then figure out the size of the
+  // return type of key_compare_checker() at compile time which we then check
+  // against the sizeof of big_.
+  COMPILE_ASSERT(
+      sizeof(key_compare_checker(key_compare_helper()(key_type(), key_type()))) ==
+      sizeof(big_),
+      key_comparison_function_must_return_bool);
+
+  // Note: We insist on kTargetValues, which is computed from
+  // Params::kTargetNodeSize, must fit the base_fields::field_type.
+  COMPILE_ASSERT(kNodeValues <
+                 (1 << (8 * sizeof(typename base_fields::field_type))),
+                 target_node_size_too_large);
+
+  // Test the assumption made in setting kNodeValueSpace.
+  COMPILE_ASSERT(sizeof(base_fields) >= 2 * sizeof(void*),
+                 node_space_assumption_incorrect);
+};
+
+////
+// btree_node methods
+template <typename P>
+inline void btree_node<P>::insert_value(int i, const value_type &x) {
+  assert(i <= count());
+  value_init(count(), x);
+  for (int j = count(); j > i; --j) {
+    value_swap(j, this, j - 1);
+  }
+  set_count(count() + 1);
+
+  if (!leaf()) {
+    ++i;
+    for (int j = count(); j > i; --j) {
+      *mutable_child(j) = child(j - 1);
+      child(j)->set_position(j);
+    }
+    *mutable_child(i) = NULL;
+  }
+}
+
+template <typename P>
+inline void btree_node<P>::remove_value(int i) {
+  if (!leaf()) {
+    assert(child(i + 1)->count() == 0);
+    for (int j = i + 1; j < count(); ++j) {
+      *mutable_child(j) = child(j + 1);
+      child(j)->set_position(j);
+    }
+    *mutable_child(count()) = NULL;
+  }
+
+  set_count(count() - 1);
+  for (; i < count(); ++i) {
+    value_swap(i, this, i + 1);
+  }
+  value_destroy(i);
+}
+
+template <typename P>
+void btree_node<P>::rebalance_right_to_left(btree_node *src, int to_move) {
+  assert(parent() == src->parent());
+  assert(position() + 1 == src->position());
+  assert(src->count() >= count());
+  assert(to_move >= 1);
+  assert(to_move <= src->count());
+
+  // Make room in the left node for the new values.
+  for (int i = 0; i < to_move; ++i) {
+    value_init(i + count());
+  }
+
+  // Move the delimiting value to the left node and the new delimiting value
+  // from the right node.
+  value_swap(count(), parent(), position());
+  parent()->value_swap(position(), src, to_move - 1);
+
+  // Move the values from the right to the left node.
+  for (int i = 1; i < to_move; ++i) {
+    value_swap(count() + i, src, i - 1);
+  }
+  // Shift the values in the right node to their correct position.
+  for (int i = to_move; i < src->count(); ++i) {
+    src->value_swap(i - to_move, src, i);
+  }
+  for (int i = 1; i <= to_move; ++i) {
+    src->value_destroy(src->count() - i);
+  }
+
+  if (!leaf()) {
+    // Move the child pointers from the right to the left node.
+    for (int i = 0; i < to_move; ++i) {
+      set_child(1 + count() + i, src->child(i));
+    }
+    for (int i = 0; i <= src->count() - to_move; ++i) {
+      assert(i + to_move <= src->max_count());
+      src->set_child(i, src->child(i + to_move));
+      *src->mutable_child(i + to_move) = NULL;
+    }
+  }
+
+  // Fixup the counts on the src and dest nodes.
+  set_count(count() + to_move);
+  src->set_count(src->count() - to_move);
+}
+
+template <typename P>
+void btree_node<P>::rebalance_left_to_right(btree_node *dest, int to_move) {
+  assert(parent() == dest->parent());
+  assert(position() + 1 == dest->position());
+  assert(count() >= dest->count());
+  assert(to_move >= 1);
+  assert(to_move <= count());
+
+  // Make room in the right node for the new values.
+  for (int i = 0; i < to_move; ++i) {
+    dest->value_init(i + dest->count());
+  }
+  for (int i = dest->count() - 1; i >= 0; --i) {
+    dest->value_swap(i, dest, i + to_move);
+  }
+
+  // Move the delimiting value to the right node and the new delimiting value
+  // from the left node.
+  dest->value_swap(to_move - 1, parent(), position());
+  parent()->value_swap(position(), this, count() - to_move);
+  value_destroy(count() - to_move);
+
+  // Move the values from the left to the right node.
+  for (int i = 1; i < to_move; ++i) {
+    value_swap(count() - to_move + i, dest, i - 1);
+    value_destroy(count() - to_move + i);
+  }
+
+  if (!leaf()) {
+    // Move the child pointers from the left to the right node.
+    for (int i = dest->count(); i >= 0; --i) {
+      dest->set_child(i + to_move, dest->child(i));
+      *dest->mutable_child(i) = NULL;
+    }
+    for (int i = 1; i <= to_move; ++i) {
+      dest->set_child(i - 1, child(count() - to_move + i));
+      *mutable_child(count() - to_move + i) = NULL;
+    }
+  }
+
+  // Fixup the counts on the src and dest nodes.
+  set_count(count() - to_move);
+  dest->set_count(dest->count() + to_move);
+}
+
+template <typename P>
+void btree_node<P>::split(btree_node *dest, int insert_position) {
+  assert(dest->count() == 0);
+
+  // We bias the split based on the position being inserted. If we're
+  // inserting at the beginning of the left node then bias the split to put
+  // more values on the right node. If we're inserting at the end of the
+  // right node then bias the split to put more values on the left node.
+  if (insert_position == 0) {
+    dest->set_count(count() - 1);
+  } else if (insert_position == max_count()) {
+    dest->set_count(0);
+  } else {
+    dest->set_count(count() / 2);
+  }
+  set_count(count() - dest->count());
+  assert(count() >= 1);
+
+  // Move values from the left sibling to the right sibling.
+  for (int i = 0; i < dest->count(); ++i) {
+    dest->value_init(i);
+    value_swap(count() + i, dest, i);
+    value_destroy(count() + i);
+  }
+
+  // The split key is the largest value in the left sibling.
+  set_count(count() - 1);
+  parent()->insert_value(position(), value_type());
+  value_swap(count(), parent(), position());
+  value_destroy(count());
+  parent()->set_child(position() + 1, dest);
+
+  if (!leaf()) {
+    for (int i = 0; i <= dest->count(); ++i) {
+      assert(child(count() + i + 1) != NULL);
+      dest->set_child(i, child(count() + i + 1));
+      *mutable_child(count() + i + 1) = NULL;
+    }
+  }
+}
+
+template <typename P>
+void btree_node<P>::merge(btree_node *src) {
+  assert(parent() == src->parent());
+  assert(position() + 1 == src->position());
+
+  // Move the delimiting value to the left node.
+  value_init(count());
+  value_swap(count(), parent(), position());
+
+  // Move the values from the right to the left node.
+  for (int i = 0; i < src->count(); ++i) {
+    value_init(1 + count() + i);
+    value_swap(1 + count() + i, src, i);
+    src->value_destroy(i);
+  }
+
+  if (!leaf()) {
+    // Move the child pointers from the right to the left node.
+    for (int i = 0; i <= src->count(); ++i) {
+      set_child(1 + count() + i, src->child(i));
+      *src->mutable_child(i) = NULL;
+    }
+  }
+
+  // Fixup the counts on the src and dest nodes.
+  set_count(1 + count() + src->count());
+  src->set_count(0);
+
+  // Remove the value on the parent node.
+  parent()->remove_value(position());
+}
+
+template <typename P>
+void btree_node<P>::swap(btree_node *x) {
+  assert(leaf() == x->leaf());
+
+  // Swap the values.
+  for (int i = count(); i < x->count(); ++i) {
+    value_init(i);
+  }
+  for (int i = x->count(); i < count(); ++i) {
+    x->value_init(i);
+  }
+  int n = std::max(count(), x->count());
+  for (int i = 0; i < n; ++i) {
+    value_swap(i, x, i);
+  }
+  for (int i = count(); i < x->count(); ++i) {
+    x->value_destroy(i);
+  }
+  for (int i = x->count(); i < count(); ++i) {
+    value_destroy(i);
+  }
+
+  if (!leaf()) {
+    // Swap the child pointers.
+    for (int i = 0; i <= n; ++i) {
+      btree_swap_helper(*mutable_child(i), *x->mutable_child(i));
+    }
+    for (int i = 0; i <= count(); ++i) {
+      x->child(i)->fields_.parent = x;
+    }
+    for (int i = 0; i <= x->count(); ++i) {
+      child(i)->fields_.parent = this;
+    }
+  }
+
+  // Swap the counts.
+  btree_swap_helper(fields_.count, x->fields_.count);
+}
+
+////
+// btree_iterator methods
+template <typename N, typename R, typename P>
+void btree_iterator<N, R, P>::increment_slow() {
+  if (node->leaf()) {
+    assert(position >= node->count());
+    self_type save(*this);
+    while (position == node->count() && !node->is_root()) {
+      assert(node->parent()->child(node->position()) == node);
+      position = node->position();
+      node = node->parent();
+    }
+    if (position == node->count()) {
+      *this = save;
+    }
+  } else {
+    assert(position < node->count());
+    node = node->child(position + 1);
+    while (!node->leaf()) {
+      node = node->child(0);
+    }
+    position = 0;
+  }
+}
+
+template <typename N, typename R, typename P>
+void btree_iterator<N, R, P>::increment_by(int count) {
+  while (count > 0) {
+    if (node->leaf()) {
+      int rest = node->count() - position;
+      position += std::min(rest, count);
+      count = count - rest;
+      if (position < node->count()) {
+        return;
+      }
+    } else {
+      --count;
+    }
+    increment_slow();
+  }
+}
+
+template <typename N, typename R, typename P>
+void btree_iterator<N, R, P>::decrement_slow() {
+  if (node->leaf()) {
+    assert(position <= -1);
+    self_type save(*this);
+    while (position < 0 && !node->is_root()) {
+      assert(node->parent()->child(node->position()) == node);
+      position = node->position() - 1;
+      node = node->parent();
+    }
+    if (position < 0) {
+      *this = save;
+    }
+  } else {
+    assert(position >= 0);
+    node = node->child(position);
+    while (!node->leaf()) {
+      node = node->child(node->count());
+    }
+    position = node->count() - 1;
+  }
+}
+
+////
+// btree methods
+template <typename P>
+btree<P>::btree(const key_compare &comp, const allocator_type &alloc)
+    : key_compare(comp),
+      root_(alloc, NULL) {
+}
+
+template <typename P>
+btree<P>::btree(const self_type &x)
+    : key_compare(x.key_comp()),
+      root_(x.internal_allocator(), NULL) {
+  assign(x);
+}
+
+template <typename P> template <typename ValuePointer>
+std::pair<typename btree<P>::iterator, bool>
+btree<P>::insert_unique(const key_type &key, ValuePointer value) {
+  if (empty()) {
+    *mutable_root() = new_leaf_root_node(1);
+  }
+
+  std::pair<iterator, int> res = internal_locate(key, iterator(root(), 0));
+  iterator &iter = res.first;
+  if (res.second == kExactMatch) {
+    // The key already exists in the tree, do nothing.
+    return std::make_pair(internal_last(iter), false);
+  } else if (!res.second) {
+    iterator last = internal_last(iter);
+    if (last.node && !compare_keys(key, last.key())) {
+      // The key already exists in the tree, do nothing.
+      return std::make_pair(last, false);
+    }
+  }
+
+  return std::make_pair(internal_insert(iter, *value), true);
+}
+
+template <typename P>
+inline typename btree<P>::iterator
+btree<P>::insert_unique(iterator position, const value_type &v) {
+  if (!empty()) {
+    const key_type &key = params_type::key(v);
+    if (position == end() || compare_keys(key, position.key())) {
+      iterator prev = position;
+      if (position == begin() || compare_keys((--prev).key(), key)) {
+        // prev.key() < key < position.key()
+        return internal_insert(position, v);
+      }
+    } else if (compare_keys(position.key(), key)) {
+      iterator next = position;
+      ++next;
+      if (next == end() || compare_keys(key, next.key())) {
+        // position.key() < key < next.key()
+        return internal_insert(next, v);
+      }
+    } else {
+      // position.key() == key
+      return position;
+    }
+  }
+  return insert_unique(v).first;
+}
+
+template <typename P> template <typename InputIterator>
+void btree<P>::insert_unique(InputIterator b, InputIterator e) {
+  for (; b != e; ++b) {
+    insert_unique(end(), *b);
+  }
+}
+
+template <typename P> template <typename ValuePointer>
+typename btree<P>::iterator
+btree<P>::insert_multi(const key_type &key, ValuePointer value) {
+  if (empty()) {
+    *mutable_root() = new_leaf_root_node(1);
+  }
+
+  iterator iter = internal_upper_bound(key, iterator(root(), 0));
+  if (!iter.node) {
+    iter = end();
+  }
+  return internal_insert(iter, *value);
+}
+
+template <typename P>
+typename btree<P>::iterator
+btree<P>::insert_multi(iterator position, const value_type &v) {
+  if (!empty()) {
+    const key_type &key = params_type::key(v);
+    if (position == end() || !compare_keys(position.key(), key)) {
+      iterator prev = position;
+      if (position == begin() || !compare_keys(key, (--prev).key())) {
+        // prev.key() <= key <= position.key()
+        return internal_insert(position, v);
+      }
+    } else {
+      iterator next = position;
+      ++next;
+      if (next == end() || !compare_keys(next.key(), key)) {
+        // position.key() < key <= next.key()
+        return internal_insert(next, v);
+      }
+    }
+  }
+  return insert_multi(v);
+}
+
+template <typename P> template <typename InputIterator>
+void btree<P>::insert_multi(InputIterator b, InputIterator e) {
+  for (; b != e; ++b) {
+    insert_multi(end(), *b);
+  }
+}
+
+template <typename P>
+void btree<P>::assign(const self_type &x) {
+  clear();
+
+  *mutable_key_comp() = x.key_comp();
+  *mutable_internal_allocator() = x.internal_allocator();
+
+  // Assignment can avoid key comparisons because we know the order of the
+  // values is the same order we'll store them in.
+  for (const_iterator iter = x.begin(); iter != x.end(); ++iter) {
+    if (empty()) {
+      insert_multi(*iter);
+    } else {
+      // If the btree is not empty, we can just insert the new value at the end
+      // of the tree!
+      internal_insert(end(), *iter);
+    }
+  }
+}
+
+template <typename P>
+typename btree<P>::iterator btree<P>::erase(iterator iter) {
+  bool internal_delete = false;
+  if (!iter.node->leaf()) {
+    // Deletion of a value on an internal node. Swap the key with the largest
+    // value of our left child. This is easy, we just decrement iter.
+    iterator tmp_iter(iter--);
+    assert(iter.node->leaf());
+    assert(!compare_keys(tmp_iter.key(), iter.key()));
+    iter.node->value_swap(iter.position, tmp_iter.node, tmp_iter.position);
+    internal_delete = true;
+    --*mutable_size();
+  } else if (!root()->leaf()) {
+    --*mutable_size();
+  }
+
+  // Delete the key from the leaf.
+  iter.node->remove_value(iter.position);
+
+  // We want to return the next value after the one we just erased. If we
+  // erased from an internal node (internal_delete == true), then the next
+  // value is ++(++iter). If we erased from a leaf node (internal_delete ==
+  // false) then the next value is ++iter. Note that ++iter may point to an
+  // internal node and the value in the internal node may move to a leaf node
+  // (iter.node) when rebalancing is performed at the leaf level.
+
+  // Merge/rebalance as we walk back up the tree.
+  iterator res(iter);
+  for (;;) {
+    if (iter.node == root()) {
+      try_shrink();
+      if (empty()) {
+        return end();
+      }
+      break;
+    }
+    if (iter.node->count() >= kMinNodeValues) {
+      break;
+    }
+    bool merged = try_merge_or_rebalance(&iter);
+    if (iter.node->leaf()) {
+      res = iter;
+    }
+    if (!merged) {
+      break;
+    }
+    iter.node = iter.node->parent();
+  }
+
+  // Adjust our return value. If we're pointing at the end of a node, advance
+  // the iterator.
+  if (res.position == res.node->count()) {
+    res.position = res.node->count() - 1;
+    ++res;
+  }
+  // If we erased from an internal node, advance the iterator.
+  if (internal_delete) {
+    ++res;
+  }
+  return res;
+}
+
+template <typename P>
+int btree<P>::erase(iterator begin, iterator end) {
+  int count = distance(begin, end);
+  for (int i = 0; i < count; i++) {
+    begin = erase(begin);
+  }
+  return count;
+}
+
+template <typename P>
+int btree<P>::erase_unique(const key_type &key) {
+  iterator iter = internal_find_unique(key, iterator(root(), 0));
+  if (!iter.node) {
+    // The key doesn't exist in the tree, return nothing done.
+    return 0;
+  }
+  erase(iter);
+  return 1;
+}
+
+template <typename P>
+int btree<P>::erase_multi(const key_type &key) {
+  iterator begin = internal_lower_bound(key, iterator(root(), 0));
+  if (!begin.node) {
+    // The key doesn't exist in the tree, return nothing done.
+    return 0;
+  }
+  // Delete all of the keys between begin and upper_bound(key).
+  iterator end = internal_end(
+      internal_upper_bound(key, iterator(root(), 0)));
+  return erase(begin, end);
+}
+
+template <typename P>
+void btree<P>::clear() {
+  if (root() != NULL) {
+    internal_clear(root());
+  }
+  *mutable_root() = NULL;
+}
+
+template <typename P>
+void btree<P>::swap(self_type &x) {
+  std::swap(static_cast<key_compare&>(*this), static_cast<key_compare&>(x));
+  std::swap(root_, x.root_);
+}
+
+template <typename P>
+void btree<P>::verify() const {
+  if (root() != NULL) {
+    assert(size() == internal_verify(root(), NULL, NULL));
+    assert(leftmost() == (++const_iterator(root(), -1)).node);
+    assert(rightmost() == (--const_iterator(root(), root()->count())).node);
+    assert(leftmost()->leaf());
+    assert(rightmost()->leaf());
+  } else {
+    assert(size() == 0);
+    assert(leftmost() == NULL);
+    assert(rightmost() == NULL);
+  }
+}
+
+template <typename P>
+void btree<P>::rebalance_or_split(iterator *iter) {
+  node_type *&node = iter->node;
+  int &insert_position = iter->position;
+  assert(node->count() == node->max_count());
+
+  // First try to make room on the node by rebalancing.
+  node_type *parent = node->parent();
+  if (node != root()) {
+    if (node->position() > 0) {
+      // Try rebalancing with our left sibling.
+      node_type *left = parent->child(node->position() - 1);
+      if (left->count() < left->max_count()) {
+        // We bias rebalancing based on the position being inserted. If we're
+        // inserting at the end of the right node then we bias rebalancing to
+        // fill up the left node.
+        int to_move = (left->max_count() - left->count()) /
+            (1 + (insert_position < left->max_count()));
+        to_move = std::max(1, to_move);
+
+        if (((insert_position - to_move) >= 0) ||
+            ((left->count() + to_move) < left->max_count())) {
+          left->rebalance_right_to_left(node, to_move);
+
+          assert(node->max_count() - node->count() == to_move);
+          insert_position = insert_position - to_move;
+          if (insert_position < 0) {
+            insert_position = insert_position + left->count() + 1;
+            node = left;
+          }
+
+          assert(node->count() < node->max_count());
+          return;
+        }
+      }
+    }
+
+    if (node->position() < parent->count()) {
+      // Try rebalancing with our right sibling.
+      node_type *right = parent->child(node->position() + 1);
+      if (right->count() < right->max_count()) {
+        // We bias rebalancing based on the position being inserted. If we're
+        // inserting at the beginning of the left node then we bias rebalancing
+        // to fill up the right node.
+        int to_move = (right->max_count() - right->count()) /
+            (1 + (insert_position > 0));
+        to_move = std::max(1, to_move);
+
+        if ((insert_position <= (node->count() - to_move)) ||
+            ((right->count() + to_move) < right->max_count())) {
+          node->rebalance_left_to_right(right, to_move);
+
+          if (insert_position > node->count()) {
+            insert_position = insert_position - node->count() - 1;
+            node = right;
+          }
+
+          assert(node->count() < node->max_count());
+          return;
+        }
+      }
+    }
+
+    // Rebalancing failed, make sure there is room on the parent node for a new
+    // value.
+    if (parent->count() == parent->max_count()) {
+      iterator parent_iter(node->parent(), node->position());
+      rebalance_or_split(&parent_iter);
+    }
+  } else {
+    // Rebalancing not possible because this is the root node.
+    if (root()->leaf()) {
+      // The root node is currently a leaf node: create a new root node and set
+      // the current root node as the child of the new root.
+      parent = new_internal_root_node();
+      parent->set_child(0, root());
+      *mutable_root() = parent;
+      assert(*mutable_rightmost() == parent->child(0));
+    } else {
+      // The root node is an internal node. We do not want to create a new root
+      // node because the root node is special and holds the size of the tree
+      // and a pointer to the rightmost node. So we create a new internal node
+      // and move all of the items on the current root into the new node.
+      parent = new_internal_node(parent);
+      parent->set_child(0, parent);
+      parent->swap(root());
+      node = parent;
+    }
+  }
+
+  // Split the node.
+  node_type *split_node;
+  if (node->leaf()) {
+    split_node = new_leaf_node(parent);
+    node->split(split_node, insert_position);
+    if (rightmost() == node) {
+      *mutable_rightmost() = split_node;
+    }
+  } else {
+    split_node = new_internal_node(parent);
+    node->split(split_node, insert_position);
+  }
+
+  if (insert_position > node->count()) {
+    insert_position = insert_position - node->count() - 1;
+    node = split_node;
+  }
+}
+
+template <typename P>
+void btree<P>::merge_nodes(node_type *left, node_type *right) {
+  left->merge(right);
+  if (right->leaf()) {
+    if (rightmost() == right) {
+      *mutable_rightmost() = left;
+    }
+    delete_leaf_node(right);
+  } else {
+    delete_internal_node(right);
+  }
+}
+
+template <typename P>
+bool btree<P>::try_merge_or_rebalance(iterator *iter) {
+  node_type *parent = iter->node->parent();
+  if (iter->node->position() > 0) {
+    // Try merging with our left sibling.
+    node_type *left = parent->child(iter->node->position() - 1);
+    if ((1 + left->count() + iter->node->count()) <= left->max_count()) {
+      iter->position += 1 + left->count();
+      merge_nodes(left, iter->node);
+      iter->node = left;
+      return true;
+    }
+  }
+  if (iter->node->position() < parent->count()) {
+    // Try merging with our right sibling.
+    node_type *right = parent->child(iter->node->position() + 1);
+    if ((1 + iter->node->count() + right->count()) <= right->max_count()) {
+      merge_nodes(iter->node, right);
+      return true;
+    }
+    // Try rebalancing with our right sibling. We don't perform rebalancing if
+    // we deleted the first element from iter->node and the node is not
+    // empty. This is a small optimization for the common pattern of deleting
+    // from the front of the tree.
+    if ((right->count() > kMinNodeValues) &&
+        ((iter->node->count() == 0) ||
+         (iter->position > 0))) {
+      int to_move = (right->count() - iter->node->count()) / 2;
+      to_move = std::min(to_move, right->count() - 1);
+      iter->node->rebalance_right_to_left(right, to_move);
+      return false;
+    }
+  }
+  if (iter->node->position() > 0) {
+    // Try rebalancing with our left sibling. We don't perform rebalancing if
+    // we deleted the last element from iter->node and the node is not
+    // empty. This is a small optimization for the common pattern of deleting
+    // from the back of the tree.
+    node_type *left = parent->child(iter->node->position() - 1);
+    if ((left->count() > kMinNodeValues) &&
+        ((iter->node->count() == 0) ||
+         (iter->position < iter->node->count()))) {
+      int to_move = (left->count() - iter->node->count()) / 2;
+      to_move = std::min(to_move, left->count() - 1);
+      left->rebalance_left_to_right(iter->node, to_move);
+      iter->position += to_move;
+      return false;
+    }
+  }
+  return false;
+}
+
+template <typename P>
+void btree<P>::try_shrink() {
+  if (root()->count() > 0) {
+    return;
+  }
+  // Deleted the last item on the root node, shrink the height of the tree.
+  if (root()->leaf()) {
+    assert(size() == 0);
+    delete_leaf_node(root());
+    *mutable_root() = NULL;
+  } else {
+    node_type *child = root()->child(0);
+    if (child->leaf()) {
+      // The child is a leaf node so simply make it the root node in the tree.
+      child->make_root();
+      delete_internal_root_node();
+      *mutable_root() = child;
+    } else {
+      // The child is an internal node. We want to keep the existing root node
+      // so we move all of the values from the child node into the existing
+      // (empty) root node.
+      child->swap(root());
+      delete_internal_node(child);
+    }
+  }
+}
+
+template <typename P> template <typename IterType>
+inline IterType btree<P>::internal_last(IterType iter) {
+  while (iter.node && iter.position == iter.node->count()) {
+    iter.position = iter.node->position();
+    iter.node = iter.node->parent();
+    if (iter.node->leaf()) {
+      iter.node = NULL;
+    }
+  }
+  return iter;
+}
+
+template <typename P>
+inline typename btree<P>::iterator
+btree<P>::internal_insert(iterator iter, const value_type &v) {
+  if (!iter.node->leaf()) {
+    // We can't insert on an internal node. Instead, we'll insert after the
+    // previous value which is guaranteed to be on a leaf node.
+    --iter;
+    ++iter.position;
+  }
+  if (iter.node->count() == iter.node->max_count()) {
+    // Make room in the leaf for the new item.
+    if (iter.node->max_count() < kNodeValues) {
+      // Insertion into the root where the root is smaller that the full node
+      // size. Simply grow the size of the root node.
+      assert(iter.node == root());
+      iter.node = new_leaf_root_node(
+          std::min<int>(kNodeValues, 2 * iter.node->max_count()));
+      iter.node->swap(root());
+      delete_leaf_node(root());
+      *mutable_root() = iter.node;
+    } else {
+      rebalance_or_split(&iter);
+      ++*mutable_size();
+    }
+  } else if (!root()->leaf()) {
+    ++*mutable_size();
+  }
+  iter.node->insert_value(iter.position, v);
+  return iter;
+}
+
+template <typename P> template <typename IterType>
+inline std::pair<IterType, int> btree<P>::internal_locate(
+    const key_type &key, IterType iter) const {
+  return internal_locate_type::dispatch(key, *this, iter);
+}
+
+template <typename P> template <typename IterType>
+inline std::pair<IterType, int> btree<P>::internal_locate_plain_compare(
+    const key_type &key, IterType iter) const {
+  for (;;) {
+    iter.position = iter.node->lower_bound(key, key_comp());
+    if (iter.node->leaf()) {
+      break;
+    }
+    iter.node = iter.node->child(iter.position);
+  }
+  return std::make_pair(iter, 0);
+}
+
+template <typename P> template <typename IterType>
+inline std::pair<IterType, int> btree<P>::internal_locate_compare_to(
+    const key_type &key, IterType iter) const {
+  for (;;) {
+    int res = iter.node->lower_bound(key, key_comp());
+    iter.position = res & kMatchMask;
+    if (res & kExactMatch) {
+      return std::make_pair(iter, static_cast<int>(kExactMatch));
+    }
+    if (iter.node->leaf()) {
+      break;
+    }
+    iter.node = iter.node->child(iter.position);
+  }
+  return std::make_pair(iter, -kExactMatch);
+}
+
+template <typename P> template <typename IterType>
+IterType btree<P>::internal_lower_bound(
+    const key_type &key, IterType iter) const {
+  if (iter.node) {
+    for (;;) {
+      iter.position =
+          iter.node->lower_bound(key, key_comp()) & kMatchMask;
+      if (iter.node->leaf()) {
+        break;
+      }
+      iter.node = iter.node->child(iter.position);
+    }
+    iter = internal_last(iter);
+  }
+  return iter;
+}
+
+template <typename P> template <typename IterType>
+IterType btree<P>::internal_upper_bound(
+    const key_type &key, IterType iter) const {
+  if (iter.node) {
+    for (;;) {
+      iter.position = iter.node->upper_bound(key, key_comp());
+      if (iter.node->leaf()) {
+        break;
+      }
+      iter.node = iter.node->child(iter.position);
+    }
+    iter = internal_last(iter);
+  }
+  return iter;
+}
+
+template <typename P> template <typename IterType>
+IterType btree<P>::internal_find_unique(
+    const key_type &key, IterType iter) const {
+  if (iter.node) {
+    std::pair<IterType, int> res = internal_locate(key, iter);
+    if (res.second == kExactMatch) {
+      return res.first;
+    }
+    if (!res.second) {
+      iter = internal_last(res.first);
+      if (iter.node && !compare_keys(key, iter.key())) {
+        return iter;
+      }
+    }
+  }
+  return IterType(NULL, 0);
+}
+
+template <typename P> template <typename IterType>
+IterType btree<P>::internal_find_multi(
+    const key_type &key, IterType iter) const {
+  if (iter.node) {
+    iter = internal_lower_bound(key, iter);
+    if (iter.node) {
+      iter = internal_last(iter);
+      if (iter.node && !compare_keys(key, iter.key())) {
+        return iter;
+      }
+    }
+  }
+  return IterType(NULL, 0);
+}
+
+template <typename P>
+void btree<P>::internal_clear(node_type *node) {
+  if (!node->leaf()) {
+    for (int i = 0; i <= node->count(); ++i) {
+      internal_clear(node->child(i));
+    }
+    if (node == root()) {
+      delete_internal_root_node();
+    } else {
+      delete_internal_node(node);
+    }
+  } else {
+    delete_leaf_node(node);
+  }
+}
+
+template <typename P>
+void btree<P>::internal_dump(
+    std::ostream &os, const node_type *node, int level) const {
+  for (int i = 0; i < node->count(); ++i) {
+    if (!node->leaf()) {
+      internal_dump(os, node->child(i), level + 1);
+    }
+    for (int j = 0; j < level; ++j) {
+      os << "  ";
+    }
+    os << node->key(i) << " [" << level << "]\n";
+  }
+  if (!node->leaf()) {
+    internal_dump(os, node->child(node->count()), level + 1);
+  }
+}
+
+template <typename P>
+int btree<P>::internal_verify(
+    const node_type *node, const key_type *lo, const key_type *hi) const {
+  assert(node->count() > 0);
+  assert(node->count() <= node->max_count());
+  if (lo) {
+    assert(!compare_keys(node->key(0), *lo));
+  }
+  if (hi) {
+    assert(!compare_keys(*hi, node->key(node->count() - 1)));
+  }
+  for (int i = 1; i < node->count(); ++i) {
+    assert(!compare_keys(node->key(i), node->key(i - 1)));
+  }
+  int count = node->count();
+  if (!node->leaf()) {
+    for (int i = 0; i <= node->count(); ++i) {
+      assert(node->child(i) != NULL);
+      assert(node->child(i)->parent() == node);
+      assert(node->child(i)->position() == i);
+      count += internal_verify(
+          node->child(i),
+          (i == 0) ? lo : &node->key(i - 1),
+          (i == node->count()) ? hi : &node->key(i));
+    }
+  }
+  return count;
+}
+
+} // namespace btree
+
+#endif  // UTIL_BTREE_BTREE_H__
diff --git a/c_src/gb_lru/btree_container.h b/c_src/gb_lru/btree_container.h
new file mode 100644
index 0000000..fb617ab
--- /dev/null
+++ b/c_src/gb_lru/btree_container.h
@@ -0,0 +1,349 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef UTIL_BTREE_BTREE_CONTAINER_H__
+#define UTIL_BTREE_BTREE_CONTAINER_H__
+
+#include <iosfwd>
+#include <utility>
+
+#include "btree.h"
+
+namespace btree {
+
+// A common base class for btree_set, btree_map, btree_multiset and
+// btree_multimap.
+template <typename Tree>
+class btree_container {
+  typedef btree_container<Tree> self_type;
+
+ public:
+  typedef typename Tree::params_type params_type;
+  typedef typename Tree::key_type key_type;
+  typedef typename Tree::value_type value_type;
+  typedef typename Tree::key_compare key_compare;
+  typedef typename Tree::allocator_type allocator_type;
+  typedef typename Tree::pointer pointer;
+  typedef typename Tree::const_pointer const_pointer;
+  typedef typename Tree::reference reference;
+  typedef typename Tree::const_reference const_reference;
+  typedef typename Tree::size_type size_type;
+  typedef typename Tree::difference_type difference_type;
+  typedef typename Tree::iterator iterator;
+  typedef typename Tree::const_iterator const_iterator;
+  typedef typename Tree::reverse_iterator reverse_iterator;
+  typedef typename Tree::const_reverse_iterator const_reverse_iterator;
+
+ public:
+  // Default constructor.
+  btree_container(const key_compare &comp, const allocator_type &alloc)
+      : tree_(comp, alloc) {
+  }
+
+  // Copy constructor.
+  btree_container(const self_type &x)
+      : tree_(x.tree_) {
+  }
+
+  // Iterator routines.
+  iterator begin() { return tree_.begin(); }
+  const_iterator begin() const { return tree_.begin(); }
+  iterator end() { return tree_.end(); }
+  const_iterator end() const { return tree_.end(); }
+  reverse_iterator rbegin() { return tree_.rbegin(); }
+  const_reverse_iterator rbegin() const { return tree_.rbegin(); }
+  reverse_iterator rend() { return tree_.rend(); }
+  const_reverse_iterator rend() const { return tree_.rend(); }
+
+  // Lookup routines.
+  iterator lower_bound(const key_type &key) {
+    return tree_.lower_bound(key);
+  }
+  const_iterator lower_bound(const key_type &key) const {
+    return tree_.lower_bound(key);
+  }
+  iterator upper_bound(const key_type &key) {
+    return tree_.upper_bound(key);
+  }
+  const_iterator upper_bound(const key_type &key) const {
+    return tree_.upper_bound(key);
+  }
+  std::pair<iterator,iterator> equal_range(const key_type &key) {
+    return tree_.equal_range(key);
+  }
+  std::pair<const_iterator,const_iterator> equal_range(const key_type &key) const {
+    return tree_.equal_range(key);
+  }
+
+  // Utility routines.
+  void clear() {
+    tree_.clear();
+  }
+  void swap(self_type &x) {
+    tree_.swap(x.tree_);
+  }
+  void dump(std::ostream &os) const {
+    tree_.dump(os);
+  }
+  void verify() const {
+    tree_.verify();
+  }
+
+  // Size routines.
+  size_type size() const { return tree_.size(); }
+  size_type max_size() const { return tree_.max_size(); }
+  bool empty() const { return tree_.empty(); }
+  size_type height() const { return tree_.height(); }
+  size_type internal_nodes() const { return tree_.internal_nodes(); }
+  size_type leaf_nodes() const { return tree_.leaf_nodes(); }
+  size_type nodes() const { return tree_.nodes(); }
+  size_type bytes_used() const { return tree_.bytes_used(); }
+  static double average_bytes_per_value() {
+    return Tree::average_bytes_per_value();
+  }
+  double fullness() const { return tree_.fullness(); }
+  double overhead() const { return tree_.overhead(); }
+
+  bool operator==(const self_type& x) const {
+    if (size() != x.size()) {
+      return false;
+    }
+    for (const_iterator i = begin(), xi = x.begin(); i != end(); ++i, ++xi) {
+      if (*i != *xi) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  bool operator!=(const self_type& other) const {
+    return !operator==(other);
+  }
+
+
+ protected:
+  Tree tree_;
+};
+
+template <typename T>
+inline std::ostream& operator<<(std::ostream &os, const btree_container<T> &b) {
+  b.dump(os);
+  return os;
+}
+
+// A common base class for btree_set and safe_btree_set.
+template <typename Tree>
+class btree_unique_container : public btree_container<Tree> {
+  typedef btree_unique_container<Tree> self_type;
+  typedef btree_container<Tree> super_type;
+
+ public:
+  typedef typename Tree::key_type key_type;
+  typedef typename Tree::value_type value_type;
+  typedef typename Tree::size_type size_type;
+  typedef typename Tree::key_compare key_compare;
+  typedef typename Tree::allocator_type allocator_type;
+  typedef typename Tree::iterator iterator;
+  typedef typename Tree::const_iterator const_iterator;
+
+ public:
+  // Default constructor.
+  btree_unique_container(const key_compare &comp = key_compare(),
+                         const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+  }
+
+  // Copy constructor.
+  btree_unique_container(const self_type &x)
+      : super_type(x) {
+  }
+
+  // Range constructor.
+  template <class InputIterator>
+  btree_unique_container(InputIterator b, InputIterator e,
+                         const key_compare &comp = key_compare(),
+                         const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+    insert(b, e);
+  }
+
+  // Lookup routines.
+  iterator find(const key_type &key) {
+    return this->tree_.find_unique(key);
+  }
+  const_iterator find(const key_type &key) const {
+    return this->tree_.find_unique(key);
+  }
+  size_type count(const key_type &key) const {
+    return this->tree_.count_unique(key);
+  }
+
+  // Insertion routines.
+  std::pair<iterator,bool> insert(const value_type &x) {
+    return this->tree_.insert_unique(x);
+  }
+  iterator insert(iterator position, const value_type &x) {
+    return this->tree_.insert_unique(position, x);
+  }
+  template <typename InputIterator>
+  void insert(InputIterator b, InputIterator e) {
+    this->tree_.insert_unique(b, e);
+  }
+
+  // Deletion routines.
+  int erase(const key_type &key) {
+    return this->tree_.erase_unique(key);
+  }
+  // Erase the specified iterator from the btree. The iterator must be valid
+  // (i.e. not equal to end()).  Return an iterator pointing to the node after
+  // the one that was erased (or end() if none exists).
+  iterator erase(const iterator &iter) {
+    return this->tree_.erase(iter);
+  }
+  void erase(const iterator &first, const iterator &last) {
+    this->tree_.erase(first, last);
+  }
+};
+
+// A common base class for btree_map and safe_btree_map.
+template <typename Tree>
+class btree_map_container : public btree_unique_container<Tree> {
+  typedef btree_map_container<Tree> self_type;
+  typedef btree_unique_container<Tree> super_type;
+
+ public:
+  typedef typename Tree::key_type key_type;
+  typedef typename Tree::data_type data_type;
+  typedef typename Tree::value_type value_type;
+  typedef typename Tree::mapped_type mapped_type;
+  typedef typename Tree::key_compare key_compare;
+  typedef typename Tree::allocator_type allocator_type;
+
+ private:
+  // A pointer-like object which only generates its value when
+  // dereferenced. Used by operator[] to avoid constructing an empty data_type
+  // if the key already exists in the map.
+  struct generate_value {
+    generate_value(const key_type &k)
+        : key(k) {
+    }
+    value_type operator*() const {
+      return std::make_pair(key, data_type());
+    }
+    const key_type &key;
+  };
+
+ public:
+  // Default constructor.
+  btree_map_container(const key_compare &comp = key_compare(),
+                      const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+  }
+
+  // Copy constructor.
+  btree_map_container(const self_type &x)
+      : super_type(x) {
+  }
+
+  // Range constructor.
+  template <class InputIterator>
+  btree_map_container(InputIterator b, InputIterator e,
+                      const key_compare &comp = key_compare(),
+                      const allocator_type &alloc = allocator_type())
+      : super_type(b, e, comp, alloc) {
+  }
+
+  // Insertion routines.
+  data_type& operator[](const key_type &key) {
+    return this->tree_.insert_unique(key, generate_value(key)).first->second;
+  }
+};
+
+// A common base class for btree_multiset and btree_multimap.
+template <typename Tree>
+class btree_multi_container : public btree_container<Tree> {
+  typedef btree_multi_container<Tree> self_type;
+  typedef btree_container<Tree> super_type;
+
+ public:
+  typedef typename Tree::key_type key_type;
+  typedef typename Tree::value_type value_type;
+  typedef typename Tree::size_type size_type;
+  typedef typename Tree::key_compare key_compare;
+  typedef typename Tree::allocator_type allocator_type;
+  typedef typename Tree::iterator iterator;
+  typedef typename Tree::const_iterator const_iterator;
+
+ public:
+  // Default constructor.
+  btree_multi_container(const key_compare &comp = key_compare(),
+                        const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+  }
+
+  // Copy constructor.
+  btree_multi_container(const self_type &x)
+      : super_type(x) {
+  }
+
+  // Range constructor.
+  template <class InputIterator>
+  btree_multi_container(InputIterator b, InputIterator e,
+                        const key_compare &comp = key_compare(),
+                        const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+    insert(b, e);
+  }
+
+  // Lookup routines.
+  iterator find(const key_type &key) {
+    return this->tree_.find_multi(key);
+  }
+  const_iterator find(const key_type &key) const {
+    return this->tree_.find_multi(key);
+  }
+  size_type count(const key_type &key) const {
+    return this->tree_.count_multi(key);
+  }
+
+  // Insertion routines.
+  iterator insert(const value_type &x) {
+    return this->tree_.insert_multi(x);
+  }
+  iterator insert(iterator position, const value_type &x) {
+    return this->tree_.insert_multi(position, x);
+  }
+  template <typename InputIterator>
+  void insert(InputIterator b, InputIterator e) {
+    this->tree_.insert_multi(b, e);
+  }
+
+  // Deletion routines.
+  int erase(const key_type &key) {
+    return this->tree_.erase_multi(key);
+  }
+  // Erase the specified iterator from the btree. The iterator must be valid
+  // (i.e. not equal to end()).  Return an iterator pointing to the node after
+  // the one that was erased (or end() if none exists).
+  iterator erase(const iterator &iter) {
+    return this->tree_.erase(iter);
+  }
+  void erase(const iterator &first, const iterator &last) {
+    this->tree_.erase(first, last);
+  }
+};
+
+} // namespace btree
+
+#endif  // UTIL_BTREE_BTREE_CONTAINER_H__
diff --git a/c_src/gb_lru/btree_map.h b/c_src/gb_lru/btree_map.h
new file mode 100644
index 0000000..b83489f
--- /dev/null
+++ b/c_src/gb_lru/btree_map.h
@@ -0,0 +1,130 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// A btree_map<> implements the STL unique sorted associative container
+// interface and the pair associative container interface (a.k.a map<>) using a
+// btree. A btree_multimap<> implements the STL multiple sorted associative
+// container interface and the pair associtive container interface (a.k.a
+// multimap<>) using a btree. See btree.h for details of the btree
+// implementation and caveats.
+
+#ifndef UTIL_BTREE_BTREE_MAP_H__
+#define UTIL_BTREE_BTREE_MAP_H__
+
+#include <algorithm>
+#include <functional>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "btree.h"
+#include "btree_container.h"
+
+namespace btree {
+
+// The btree_map class is needed mainly for its constructors.
+template <typename Key, typename Value,
+          typename Compare = std::less<Key>,
+          typename Alloc = std::allocator<std::pair<const Key, Value> >,
+          int TargetNodeSize = 256>
+class btree_map : public btree_map_container<
+  btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > > {
+
+  typedef btree_map<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
+  typedef btree_map_params<
+    Key, Value, Compare, Alloc, TargetNodeSize> params_type;
+  typedef btree<params_type> btree_type;
+  typedef btree_map_container<btree_type> super_type;
+
+ public:
+  typedef typename btree_type::key_compare key_compare;
+  typedef typename btree_type::allocator_type allocator_type;
+
+ public:
+  // Default constructor.
+  btree_map(const key_compare &comp = key_compare(),
+            const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+  }
+
+  // Copy constructor.
+  btree_map(const self_type &x)
+      : super_type(x) {
+  }
+
+  // Range constructor.
+  template <class InputIterator>
+  btree_map(InputIterator b, InputIterator e,
+            const key_compare &comp = key_compare(),
+            const allocator_type &alloc = allocator_type())
+      : super_type(b, e, comp, alloc) {
+  }
+};
+
+template <typename K, typename V, typename C, typename A, int N>
+inline void swap(btree_map<K, V, C, A, N> &x,
+                 btree_map<K, V, C, A, N> &y) {
+  x.swap(y);
+}
+
+// The btree_multimap class is needed mainly for its constructors.
+template <typename Key, typename Value,
+          typename Compare = std::less<Key>,
+          typename Alloc = std::allocator<std::pair<const Key, Value> >,
+          int TargetNodeSize = 256>
+class btree_multimap : public btree_multi_container<
+  btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > > {
+
+  typedef btree_multimap<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
+  typedef btree_map_params<
+    Key, Value, Compare, Alloc, TargetNodeSize> params_type;
+  typedef btree<params_type> btree_type;
+  typedef btree_multi_container<btree_type> super_type;
+
+ public:
+  typedef typename btree_type::key_compare key_compare;
+  typedef typename btree_type::allocator_type allocator_type;
+  typedef typename btree_type::data_type data_type;
+  typedef typename btree_type::mapped_type mapped_type;
+
+ public:
+  // Default constructor.
+  btree_multimap(const key_compare &comp = key_compare(),
+                 const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+  }
+
+  // Copy constructor.
+  btree_multimap(const self_type &x)
+      : super_type(x) {
+  }
+
+  // Range constructor.
+  template <class InputIterator>
+  btree_multimap(InputIterator b, InputIterator e,
+                 const key_compare &comp = key_compare(),
+                 const allocator_type &alloc = allocator_type())
+      : super_type(b, e, comp, alloc) {
+  }
+};
+
+template <typename K, typename V, typename C, typename A, int N>
+inline void swap(btree_multimap<K, V, C, A, N> &x,
+                 btree_multimap<K, V, C, A, N> &y) {
+  x.swap(y);
+}
+
+} // namespace btree
+
+#endif  // UTIL_BTREE_BTREE_MAP_H__
diff --git a/c_src/gb_lru/btreelru_nif.cpp b/c_src/gb_lru/btreelru_nif.cpp
new file mode 100644
index 0000000..ce0712d
--- /dev/null
+++ b/c_src/gb_lru/btreelru_nif.cpp
@@ -0,0 +1,619 @@
+#include <string>
+#include <iostream>
+#include <vector>
+#include "erl_nif.h"
+#include "erlterm.h"
+#include "lru.h"
+
+
+using namespace std;
+
+namespace { /* anonymous namespace starts */
+
+typedef struct _obj_resource {
+  bool allocated;
+  void *object;
+  ErlNifMutex *emtx;
+} object_resource;
+
+ErlNifResourceFlags resource_flags = (ErlNifResourceFlags)(ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER);
+
+ErlNifResourceType* lruResource;
+ErlNifResourceType* iteratorResource;
+
+/* atoms */
+ERL_NIF_TERM atom_ok;
+ERL_NIF_TERM atom_key;
+ERL_NIF_TERM atom_error;
+ERL_NIF_TERM atom_invalid;
+ERL_NIF_TERM atom_value;
+ERL_NIF_TERM atom_max_size;
+ERL_NIF_TERM atom_tab;
+ERL_NIF_TERM atom_lru_old;
+
+void lru_dtor(ErlNifEnv* env, void *lru);
+void iterator_dtor(ErlNifEnv* env, void *it);
+
+int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info){
+    lruResource = enif_open_resource_type(env,
+		    "btreelru_nif",
+		    "lru",
+		    lru_dtor,
+		    resource_flags,
+		    NULL);
+
+    iteratorResource = enif_open_resource_type(env,
+		    "btreelru_nif", 
+		    "iterator",
+		    iterator_dtor,
+		    resource_flags,
+		    NULL);
+
+    atom_ok	  = enif_make_atom(env, "ok");
+    atom_key	  = enif_make_atom(env, "key");
+    atom_error	  = enif_make_atom(env, "error");
+    atom_invalid  = enif_make_atom(env, "invalid");
+    atom_value    = enif_make_atom(env, "value");
+    atom_max_size = enif_make_atom(env, "max_size");
+    atom_tab	  = enif_make_atom(env, "tab");
+    atom_lru_old  = enif_make_atom(env, "lru_old");
+
+    return 0;
+}
+
+int reload(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info){
+    return 0;
+}
+
+int upgrade(ErlNifEnv* env, void** priv_data,  void** old_priv_data,ERL_NIF_TERM load_info){
+    return 0;
+}
+
+void lru_dtor(ErlNifEnv* env, void* _lru_btree) {
+    object_resource *lru_btree = (object_resource*) _lru_btree;
+    if (lru_btree->allocated) 
+	delete (LRUBtree<ErlTerm,ErlTerm>*) lru_btree->object;
+}
+
+void iterator_dtor(ErlNifEnv* env, void* _lru_iterator) {
+    object_resource *lru_iterator = (object_resource*) _lru_iterator;
+    if (lru_iterator->allocated)
+	delete (LRUBtree<ErlTerm,ErlTerm>::iterator*) lru_iterator->object;
+}
+
+void node_free(LRUBtree<ErlTerm,ErlTerm> *bt_lru, LRUNode<ErlTerm,ErlTerm> *node) {
+    enif_free_env((ErlNifEnv*)node->kvenv);
+    return;
+}
+
+void node_kickout(LRUBtree<ErlTerm,ErlTerm> *bt_lru, LRUNode<ErlTerm,ErlTerm> *node, void *currenv) {
+    ErlNifEnv *env = (ErlNifEnv *) currenv; 
+
+    if (bt_lru->pid_set) {
+	enif_send(env, &bt_lru->pid, NULL, enif_make_tuple3(env, atom_lru_old, node->key.t, node->data.t));
+    }
+    
+    return;
+}
+
+ERL_NIF_TERM next(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    object_resource *lru;
+    LRUBtree<ErlTerm,ErlTerm> *bt_lru;
+    LRUNode<ErlTerm,ErlTerm> *node;
+    ErlTerm key;
+    ErlTerm value;
+
+    if (argc != 2) {
+	return enif_make_badarg(env);
+    }
+
+    if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) {
+	return enif_make_badarg(env);
+    }
+    
+    bt_lru = (LRUBtree<ErlTerm,ErlTerm> *) lru->object;
+    
+    key.t = argv[1];
+    node = bt_lru->get(key);
+    
+    if (!node) 
+	return enif_make_tuple2(env, atom_error, atom_invalid);
+    
+    node = node->next;
+    if (!node) 
+	return enif_make_tuple2(env, atom_error, atom_invalid);
+
+    key.t = enif_make_copy(env, node->key.t);
+    value.t = enif_make_copy(env, node->data.t);
+
+    return enif_make_tuple2(env, key.t, value.t);
+}
+
+ERL_NIF_TERM prev(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    object_resource *lru;
+    LRUBtree<ErlTerm,ErlTerm> *bt_lru;
+    LRUNode<ErlTerm,ErlTerm> *node;
+    ErlTerm key;
+    ErlTerm value;
+
+    if (argc != 2) {
+	return enif_make_badarg(env);
+    }
+
+    if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) {
+	return enif_make_badarg(env);
+    }
+    
+    bt_lru = (LRUBtree<ErlTerm,ErlTerm> *) lru->object;
+    
+    key.t = argv[1];
+    node = bt_lru->get(key);
+    
+    if (!node) 
+	return enif_make_tuple2(env, atom_error, atom_invalid);
+    
+    node = node->prev;
+    if (!node) 
+	return enif_make_tuple2(env, atom_error, atom_invalid);
+
+    key.t = enif_make_copy(env, node->key.t);
+    value.t = enif_make_copy(env, node->data.t);
+
+    return enif_make_tuple2(env, key.t, value.t);
+}
+    
+
+ERL_NIF_TERM create(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    unsigned long max_size;
+    object_resource *lru;
+    LRUBtree<ErlTerm,ErlTerm> *bt_lru;
+    ERL_NIF_TERM lru_term;
+
+    /* get max_size */
+    if (enif_get_ulong(env, argv[0], &max_size) < 1){
+	return enif_make_tuple2(env, atom_error, atom_max_size);
+    }
+    
+    if (!(bt_lru = new LRUBtree<ErlTerm,ErlTerm>(max_size, node_free, node_kickout))) {
+	return enif_make_tuple2(env, atom_error, enif_make_atom(env, "alloction"));
+    }
+    
+    lru = (object_resource *) enif_alloc_resource(lruResource, sizeof(object_resource));
+    lru->object = bt_lru;
+    lru->allocated = true;
+   
+    lru_term = enif_make_resource(env, lru);
+    enif_release_resource(lru);
+
+    return enif_make_tuple2(env, atom_ok, lru_term);
+
+}
+
+ERL_NIF_TERM seek(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    object_resource *lru;
+    object_resource *it;
+    LRUBtree<ErlTerm,ErlTerm> *bt_lru;
+    LRUBtree<ErlTerm,ErlTerm>::iterator *bt_it_;
+    LRUBtree<ErlTerm,ErlTerm>::iterator bt_it;
+    ErlTerm key;
+    ERL_NIF_TERM it_term;
+    ERL_NIF_TERM kv;
+
+    if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) {
+	return enif_make_badarg(env);
+    }
+
+    key.t = argv[1];
+
+    bt_lru = (LRUBtree<ErlTerm,ErlTerm> *)lru->object;
+    
+    bt_it = bt_lru->bmap.lower_bound(key);
+    if ( bt_it == bt_lru->bmap.end() ) {
+	return enif_make_tuple2(env, atom_error, atom_invalid);
+    }
+    
+    
+    bt_it_ = new LRUBtree<ErlTerm,ErlTerm>::iterator;
+    *bt_it_ = bt_it;
+    it = (object_resource *) enif_alloc_resource(iteratorResource, sizeof(object_resource));
+    it->object = bt_it_;
+    it->allocated = true;
+   
+    it_term = enif_make_resource(env, it);
+    enif_release_resource(it);
+    kv = enif_make_tuple2(env, bt_it->second->key.t, bt_it->second->data.t);
+    return enif_make_tuple2(env, kv, it_term);
+
+}
+
+ERL_NIF_TERM iterate_next(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    object_resource *lru;
+    object_resource *it;
+    LRUBtree<ErlTerm,ErlTerm>::iterator *bt_it_;
+    LRUBtree<ErlTerm,ErlTerm> *bt_lru;
+    ERL_NIF_TERM kv;
+
+    if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) {
+	return enif_make_badarg(env);
+    }
+
+    if (!enif_get_resource(env, argv[1], iteratorResource, (void **) &it)) {
+	return enif_make_badarg(env);
+    }
+
+    bt_lru = (LRUBtree<ErlTerm,ErlTerm> *)lru->object;
+    bt_it_ = (LRUBtree<ErlTerm,ErlTerm>::iterator *) it->object;
+    
+    if (bt_it_ == NULL)
+	return enif_make_tuple2(env, atom_error, atom_invalid);
+
+    (*bt_it_)++;
+    
+    if ( *bt_it_ == bt_lru->bmap.end() ) {
+	it->allocated = false;
+	delete bt_it_;
+	it->object = NULL;
+	return enif_make_tuple2(env, atom_error, atom_invalid);
+    }
+    
+    kv = enif_make_tuple2(env, (*bt_it_)->second->key.t, (*bt_it_)->second->data.t);
+    return enif_make_tuple2(env, atom_ok, kv);
+}
+
+ERL_NIF_TERM close(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    object_resource *lru;
+    LRUBtree<ErlTerm,ErlTerm> *bt_lru;
+    
+    if (argc != 1) {
+	return enif_make_badarg(env);
+    }
+
+    if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) {
+	return enif_make_badarg(env);
+    }
+
+    bt_lru = (LRUBtree<ErlTerm,ErlTerm> *)lru->object;
+    lru->allocated = false;
+    delete bt_lru;
+    
+
+    return atom_ok;
+}
+
+ERL_NIF_TERM read(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    object_resource *lru;
+    LRUBtree<ErlTerm,ErlTerm> *bt_lru;
+    LRUNode<ErlTerm,ErlTerm> *node;
+    ErlTerm key;
+    ERL_NIF_TERM kv;
+
+    if (argc != 2) {
+	return enif_make_badarg(env);
+    }
+
+    if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) {
+	return enif_make_badarg(env);
+    }
+    
+    bt_lru = (LRUBtree<ErlTerm,ErlTerm> *) lru->object;
+    
+    key.t = argv[1];
+    node = bt_lru->get(key);
+    
+    if (!node) 
+	return enif_make_tuple2(env, atom_error, atom_invalid);
+
+    kv = enif_make_tuple2(env, enif_make_copy(env, node->key.t), enif_make_copy(env, node->data.t));
+
+    return enif_make_tuple2(env, atom_ok, kv);
+}
+
+ERL_NIF_TERM remove(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    object_resource *lru;
+    LRUBtree<ErlTerm,ErlTerm> *bt_lru;
+    ErlTerm key;
+
+    if (argc != 2) {
+	return enif_make_badarg(env);
+    }
+
+    if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) {
+	return enif_make_badarg(env);
+    }
+    
+    bt_lru = (LRUBtree<ErlTerm,ErlTerm> *) lru->object;
+    
+    key.t = argv[1];
+    bt_lru->erase(key);
+    
+    return atom_ok;
+}
+
+ERL_NIF_TERM oldest(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    object_resource *lru;
+    LRUBtree<ErlTerm,ErlTerm> *bt_lru;
+    LRUNode<ErlTerm,ErlTerm> *node;
+    ERL_NIF_TERM key;
+    ERL_NIF_TERM value;
+    
+    if (argc != 1) {
+	return enif_make_badarg(env);
+    }
+
+    if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) {
+	return enif_make_badarg(env);
+    }
+    
+    bt_lru = (LRUBtree<ErlTerm,ErlTerm> *) lru->object;
+
+    node = bt_lru->getOldest();
+    
+    if (!node) 
+	return enif_make_tuple2(env, atom_error, atom_invalid);
+
+    key = enif_make_copy(env, node->key.t);
+    value = enif_make_copy(env, node->data.t);
+
+    return enif_make_tuple2(env, key, value);
+}
+
+ERL_NIF_TERM latest(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    object_resource *lru;
+    LRUBtree<ErlTerm,ErlTerm> *bt_lru;
+    LRUNode<ErlTerm,ErlTerm> *node;
+    ERL_NIF_TERM key;
+    ERL_NIF_TERM value;
+    
+    if (argc != 1) {
+	return enif_make_badarg(env);
+    }
+
+    if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) {
+	return enif_make_badarg(env);
+    }
+    
+    bt_lru = (LRUBtree<ErlTerm,ErlTerm> *) lru->object;
+
+    // last is "last in" in the lru
+    node = bt_lru->getLatest();
+    
+    if (!node) 
+	return enif_make_tuple2(env, atom_error, atom_invalid);
+
+    key = enif_make_copy(env, node->key.t);
+    value = enif_make_copy(env, node->data.t);
+
+    return enif_make_tuple2(env, key, value);
+}
+
+ERL_NIF_TERM last(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    object_resource *lru;
+    LRUBtree<ErlTerm,ErlTerm> *bt_lru;
+    LRUNode<ErlTerm,ErlTerm> *node;
+    ERL_NIF_TERM key;
+    ERL_NIF_TERM value;
+
+    if (argc != 1) {
+	return enif_make_badarg(env);
+    }
+
+    if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) {
+	return enif_make_badarg(env);
+    }
+
+    bt_lru = (LRUBtree<ErlTerm,ErlTerm> *) lru->object;
+
+    node = bt_lru->bmap.rbegin()->second;
+
+    if (!node)
+	return enif_make_tuple2(env, atom_error, atom_invalid);
+
+    key = enif_make_copy(env, node->key.t);
+    value = enif_make_copy(env, node->data.t);
+
+    return enif_make_tuple2(env, key, value);
+}
+
+ERL_NIF_TERM first(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    object_resource *lru;
+    LRUBtree<ErlTerm,ErlTerm> *bt_lru;
+    LRUNode<ErlTerm,ErlTerm> *node;
+    ERL_NIF_TERM key;
+    ERL_NIF_TERM value;
+
+    if (argc != 1) {
+	return enif_make_badarg(env);
+    }
+
+    if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) {
+	return enif_make_badarg(env);
+    }
+
+    bt_lru = (LRUBtree<ErlTerm,ErlTerm> *) lru->object;
+
+    node = bt_lru->bmap.begin()->second;
+
+    if (!node)
+	return enif_make_tuple2(env, atom_error, atom_invalid);
+
+    key = enif_make_copy(env, node->key.t);
+    value = enif_make_copy(env, node->data.t);
+
+    return enif_make_tuple2(env, key, value);
+}
+
+ERL_NIF_TERM write(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    object_resource *lru;
+    LRUBtree<ErlTerm,ErlTerm> *bt_lru;
+    ErlTerm key;
+    ErlTerm value;
+    ErlNifEnv	 *kv_env;
+    size_t size;
+
+    if (argc != 3) {
+	return enif_make_badarg(env);
+    }
+
+    if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) {
+	return enif_make_badarg(env);
+    }
+    
+    bt_lru = (LRUBtree<ErlTerm, ErlTerm> *) lru->object;
+
+    kv_env   = enif_alloc_env();
+    key.t    = enif_make_copy(kv_env, argv[1]);
+    value.t  = enif_make_copy(kv_env, argv[2]);
+    
+    /* do not use the size of term
+    size = enif_size_term(key.t);
+    size += enif_size_term(value.t);
+    */
+    
+    /* size based on entries */
+    size = 1; 
+    
+    bt_lru->put(key, value, kv_env, env, size);
+
+    return atom_ok;
+}
+
+ERL_NIF_TERM register_pid(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    object_resource *lru;
+    LRUBtree<ErlTerm,ErlTerm> *bt_lru;
+
+    if (argc != 2) {
+	return enif_make_badarg(env);
+    }
+
+    if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) {
+	return enif_make_badarg(env);
+    }
+    bt_lru = (LRUBtree<ErlTerm,ErlTerm> *) lru->object;
+    
+    if (!enif_get_local_pid(env, argv[1], &(bt_lru->pid))) {
+	return enif_make_badarg(env);
+    }
+    bt_lru->pid_set = true;
+
+    return atom_ok;
+}
+
+ERL_NIF_TERM unregister_pid(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    object_resource *lru;
+    LRUBtree<ErlTerm,ErlTerm> *bt_lru;
+
+    if (argc != 1) {
+	return enif_make_badarg(env);
+    }
+
+    if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) {
+	return enif_make_badarg(env);
+    }
+    bt_lru = (LRUBtree<ErlTerm,ErlTerm> *) lru->object;
+    
+    bt_lru->pid_set = false;
+
+    return atom_ok;
+}
+
+ERL_NIF_TERM get_registered_pid(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    object_resource *lru;
+    LRUBtree<ErlTerm,ErlTerm> *bt_lru;
+
+    if (argc != 1) {
+	return enif_make_badarg(env);
+    }
+
+    if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) {
+	return enif_make_badarg(env);
+    }
+    bt_lru = (LRUBtree<ErlTerm,ErlTerm> *) lru->object;
+
+    if (!bt_lru->pid_set) {
+	return enif_make_tuple2(env, atom_error, atom_invalid);
+    }
+    
+    return enif_make_pid(env, &(bt_lru->pid));
+}
+
+ERL_NIF_TERM get_size(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    object_resource *lru;
+    LRUBtree<ErlTerm,ErlTerm> *bt_lru;
+
+    if (argc != 1) {
+	return enif_make_badarg(env);
+    }
+
+    if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) {
+	return enif_make_badarg(env);
+    }
+    bt_lru = (LRUBtree<ErlTerm,ErlTerm> *) lru->object;
+
+    return enif_make_ulong(env, bt_lru->getSize());
+}
+
+ERL_NIF_TERM get_max_size(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    object_resource *lru;
+    LRUBtree<ErlTerm,ErlTerm> *bt_lru;
+
+    if (argc != 1) {
+	return enif_make_badarg(env);
+    }
+
+    if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) {
+	return enif_make_badarg(env);
+    }
+    bt_lru = (LRUBtree<ErlTerm,ErlTerm> *) lru->object;
+
+    return enif_make_ulong(env, bt_lru->getMaxSize());
+}
+
+ERL_NIF_TERM set_max_size(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    object_resource *lru;
+    unsigned long max_size;
+    LRUBtree<ErlTerm,ErlTerm> *bt_lru;
+
+    if (argc != 2) {
+	return enif_make_badarg(env);
+    }
+
+    if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) {
+	return enif_make_badarg(env);
+    }
+    /* get max_size */
+    if (enif_get_ulong(env, argv[1], &max_size) < 1){
+	return enif_make_tuple2(env, atom_error, atom_max_size);
+    }
+    
+    bt_lru = (LRUBtree<ErlTerm,ErlTerm> *) lru->object;
+
+    bt_lru->setMaxSize(max_size);
+    
+    return atom_ok;
+}
+
+ErlNifFunc nif_funcs[] = {
+    {"create", 1, create},
+    {"close", 1, close, ERL_NIF_DIRTY_JOB_IO_BOUND},
+    {"register_pid", 2, register_pid},
+    {"unregister_pid", 1, unregister_pid},
+    {"get_registered_pid", 1, get_registered_pid},
+    {"get_size", 1, get_size},
+    {"get_max_size", 1, get_max_size},
+    {"set_max_size", 2, set_max_size},
+    {"oldest", 1, oldest},
+    {"latest", 1, latest},
+    {"last", 1, last},
+    {"first", 1, first},
+    {"read", 2, read},
+    {"next", 2, next},
+    {"prev", 2, prev},
+    {"seek", 2, seek},
+    {"iterate_next", 2, iterate_next},
+    {"remove", 2, remove},
+    {"write", 3, write}
+};
+} /* anonymouse namespace ends */
+
+
+ERL_NIF_INIT(btree_lru, nif_funcs, load, reload, upgrade, NULL)
diff --git a/c_src/gb_lru/erlterm.h b/c_src/gb_lru/erlterm.h
new file mode 100644
index 0000000..42627c8
--- /dev/null
+++ b/c_src/gb_lru/erlterm.h
@@ -0,0 +1,71 @@
+#include "erl_nif.h"
+
+class ErlTerm {
+    public:
+    ERL_NIF_TERM t;
+    
+    static void *operator new(size_t size) {
+	return enif_alloc(size);
+    }
+   
+    static void operator delete(void *block) {
+	enif_free(block);
+    }
+     
+    bool operator< (const ErlTerm &term) {
+	if (enif_compare(t, term.t) < 0)
+	    return true;
+	return false;
+    }
+
+    bool operator< (ErlTerm &term) {
+	if (enif_compare(t, term.t) < 0)
+	    return true;
+	return false;
+    }
+
+    bool operator> (const ErlTerm &term) {
+	if (enif_compare(t, term.t) > 0)
+	    return true;
+	return false;
+    }
+    
+    bool operator> (ErlTerm &term) {
+	if (enif_compare(t, term.t) > 0)
+	    return true;
+	return false;
+    }
+    
+    bool operator== (const ErlTerm &term) {
+	if (enif_compare(t, term.t) == 0)
+	    return true;
+	return false;
+    }
+    
+    bool operator== (ErlTerm &term) {
+	if (enif_compare(t, term.t) == 0)
+	    return true;
+	return false;
+    }
+};
+
+inline bool operator < (const ErlTerm &a, const ErlTerm &b) {
+    if (enif_compare(a.t, b.t) < 0)
+	return true;
+    return false;
+}
+
+
+#if 0
+// extend std::hash to understand ErlTerm used by hashmap not btree
+namespace std {
+    template <>
+    struct hash<ErlTerm> 
+    {
+	size_t operator()(const ErlTerm& term) const 
+	{ 
+	    return (size_t) enif_hash_term(term.t);
+	}
+    };
+}
+#endif
diff --git a/c_src/gb_lru/lru.h b/c_src/gb_lru/lru.h
new file mode 100644
index 0000000..47567e4
--- /dev/null
+++ b/c_src/gb_lru/lru.h
@@ -0,0 +1,266 @@
+#include "btree_map.h"
+#include <algorithm>
+#include <iostream>
+#include "murmurhash2.h"
+#include "binary.h"
+#include "erl_nif.h"
+
+// extend std::hash to understand Binary type
+namespace std { 
+    template <> 
+    struct hash<Binary> 
+    {
+	size_t operator()(const Binary& b) const 
+	{ 
+	    return MurmurHash2(b.bin, b.size, 4242);
+    }
+    };
+}
+
+template <typename K, typename V>
+struct LRUNode
+{
+    K key;
+    V data;
+    void *kvenv;
+    LRUNode<K,V> *prev;
+    LRUNode<K,V> *next;
+    size_t size;
+    LRUNode(void *kvenv = NULL, size_t size=0) : kvenv(kvenv), prev(NULL), next(NULL), size(size) { }
+    
+/*
+    static void *LRUNode<ErlTerm,ErlTerm>::operator new(size_t size) {
+	return enif_alloc(size);
+    }
+
+    static void operator delete(void *block) {
+	enif_free(block);
+    }
+*/
+
+    void printChain() {
+	LRUNode<K,V>* node;
+	int i=11;
+	std::cout << "(";
+	for(node = this; node && i; node = node->next, i--) {
+	  std::cout << node->key << " -> ";
+	}
+	if (node) { 
+	    std::cout << " loop detection end ";
+	} else {
+	    std::cout << " end ";
+	}
+	std::cout << ")" << std::endl;
+    }
+
+    void printNextPrevKey() {
+	std::cout << "(";
+	printNextKey();
+	printPrevKey();
+	std::cout << ")";
+    }
+
+    void printNextKey() {
+	if (next) {
+	    std::cout << "next key " << next->key << " ";
+	}
+    }
+
+    void printPrevKey() {
+	if (prev) {
+	    std::cout << "prev key " << prev->key << " ";
+	}
+    }
+};
+
+template <class K,class V>
+class LRUBtree {
+    private:
+    LRUNode<K,V> *oldest;
+    LRUNode<K,V> *latest;
+    unsigned long size;
+    unsigned long max_size;
+    void (*node_free)(LRUBtree<K,V> *lru, LRUNode<K,V> *node);
+    void (*node_kickout)(LRUBtree<K,V> *lru, LRUNode<K,V> *node, void *call_env);
+    typedef btree::btree_map<K, LRUNode<K,V>*> LRUBtree_map;
+    
+    public:
+    LRUBtree_map bmap;
+    bool pid_set = false;
+    ErlNifPid pid;
+    typedef typename LRUBtree_map::iterator iterator;
+    typedef typename LRUBtree_map::reverse_iterator reverse_iterator;
+    
+    void printLatest() {
+	if (latest) {
+	    std::cout << " latest " << latest->key;
+	} else {
+	    std::cout << " no data in lru ";
+	}
+    }
+
+    private:
+    LRUNode<K,V>* erase(LRUNode<K,V> *node) {
+	if (node->next) {
+	    node->next->prev = node->prev;
+	} 
+	if (node->prev) {
+	    node->prev->next = node->next;
+	}
+
+	if (node == oldest) {
+	    oldest = node->prev;
+	}
+	
+	if (node == latest) {
+	    latest = node->next;
+	}
+	
+	if (node_free) {
+	    node_free(this, node);
+	}
+
+	node->next = NULL;
+	node->prev = NULL;
+	return node;
+    }
+    
+    void printOldest() {
+	if(oldest) {
+	    std::cout << " oldest " << oldest->key;
+	} else {
+	    std::cout << " no data in lru ";
+	}
+    }
+ 
+    void check_size(void *call_env) {
+	if (size > max_size) {
+	    if (oldest) { // remove check if oldest exist and rely on max_size always being positive
+		if (node_kickout)
+		    node_kickout(this, oldest, call_env);
+		erase(oldest->key);
+	    }
+	}
+    }
+    
+#define SIZE_100MB 100*1024*1024
+    public:
+    LRUBtree(unsigned long max_size = SIZE_100MB,
+	     void (*node_free)(LRUBtree<K,V> *lru, LRUNode<K,V> *node) = NULL,
+	     void (*node_kickout)(LRUBtree<K,V> *lru, LRUNode<K,V> *node, void *call_env) = NULL)
+	: oldest(NULL), latest(NULL), size(0), max_size(max_size), node_free(node_free),
+	  node_kickout(node_kickout) { }
+
+    ~LRUBtree() {
+	LRUNode<K,V> *node;
+	LRUNode<K,V> *next;
+	node = latest;
+	while(node) {
+	    if (node_free) {
+		node_free(this, node);
+	    }
+	    next = node->next;
+	    delete node;
+	    node = next;
+	}
+    }
+
+    void printSize() {
+	std::cout << "size " << size << std::endl;
+    }
+   
+    unsigned long getSize() {
+	return size;
+    }
+
+    unsigned long getMaxSize() {
+	return max_size;
+    }
+
+    void setMaxSize(unsigned long max_size) {
+	this->max_size = max_size;
+    }
+
+    void erase(K key) {
+	LRUNode<K,V> *node;
+	if ((node = bmap[key])) {
+	    erase(node);
+	    bmap.erase(key);
+	    size -= node->size;
+	    delete node;
+	}
+    }
+    
+    inline void put(K key, V data, 
+	     void *kvenv = NULL, void *call_env = NULL,
+	     size_t size = 1) {
+	LRUNode<K,V> *node;
+	
+	this->size += size;	
+	check_size(call_env);
+	
+	// overwrite already existing key
+	if ((node = bmap[key])) {
+	    this->size -= node->size;
+	    erase(node);
+	    node->kvenv = kvenv;
+	    node->next = latest;
+	    node->size = size;
+	    if (node->next) {
+		node->next->prev = node;
+	    }
+	    if (!oldest) {
+		oldest = node;
+	    }
+	    latest = node;
+	    node->key = key;
+	    node->data = data;
+	}
+
+	else if (!oldest) {
+	    node = new LRUNode<K,V>;
+	    node->key = key;
+	    node->data = data;
+	    node->kvenv = kvenv;
+	    node->size = size;
+	    oldest = node;
+	    latest  = node;
+	    bmap[node->key] = node;
+	}
+
+	else {
+	    node = new LRUNode<K,V>;
+	    node->key = key;
+	    node->data = data;	    
+	    node->kvenv = kvenv;
+	    node->size = size;
+	    latest->prev = node;
+	    node->next = latest;
+	    latest = node;
+	    bmap[node->key] = node;
+	}
+    }
+    
+    LRUNode<K,V>* get(K key) {
+	return bmap[key];
+    }
+    
+    LRUNode<K,V>* getOldest() {
+      return oldest;
+    }
+    
+    LRUNode<K,V>* getLatest() {
+      return latest;
+    }
+    
+    LRUNode<K,V>* getNext(LRUNode<K,V> *node) {
+	return node->next;
+    }
+    
+    LRUNode<K,V>* getPrev(LRUNode<K,V> *node) {
+	return node->prev;
+	
+    }
+};
+
+
diff --git a/c_src/gb_lru/murmurhash2.h b/c_src/gb_lru/murmurhash2.h
new file mode 100644
index 0000000..12542a5
--- /dev/null
+++ b/c_src/gb_lru/murmurhash2.h
@@ -0,0 +1,73 @@
+//-----------------------------------------------------------------------------
+// MurmurHash2, by Austin Appleby
+
+// Note - This code makes a few assumptions about how your machine behaves -
+
+// 1. We can read a 4-byte value from any address without crashing
+// 2. sizeof(int) == 4
+
+// And it has a few limitations -
+
+// 1. It will not wo
+//
+// rk incrementally.
+// 2. It will not produce the same results on little-endian and big-endian
+//    machines.
+
+unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
+{
+	// 'm' and 'r' are mixing constants generated offline.
+	// They're not really 'magic', they just happen to work well.
+
+	const unsigned int m = 0x5bd1e995;
+	const int r = 24;
+
+	// Initialize the hash to a 'random' value
+
+	unsigned int h = seed ^ len;
+
+	// Mix 4 bytes at a time into the hash
+
+	const unsigned char * data = (const unsigned char *)key;
+
+	while(len >= 4)
+	{
+
+		unsigned int k = *(unsigned int *)data;
+
+
+		k *= m; 
+		k ^= k >> r; 
+		k *= m; 
+		
+		h *= m; 
+		h ^= k;
+
+		data += 4;
+		len -= 4;
+	}
+	
+	// Handle the last few bytes of the input array
+
+	switch(len)
+	{
+	case 3: h ^= data[2] << 16;
+	case 2: h ^= data[1] << 8;
+	case 1: h ^= data[0];
+	        h *= m;
+	};
+
+	// Do a few final mixes of t
+	//
+	//
+	//
+	// he hash to ensure the last few
+	// bytes are well-incorporated.
+
+	h ^= h >> 13;
+	h *= m;
+	h ^= h >> 15;
+
+	return h;
+} 
+
diff --git a/c_src/gb_lru/rebar.config b/c_src/gb_lru/rebar.config
new file mode 100644
index 0000000..0ffcccf
--- /dev/null
+++ b/c_src/gb_lru/rebar.config
@@ -0,0 +1,7 @@
+{port_specs, [
+    {"../../priv/btreelru_nif.so", ["btreelru_nif.cpp"]}
+]}.
+
+
+
+
diff --git a/c_src/native_array/native_array_nif.c b/c_src/native_array/native_array_nif.c
new file mode 100644
index 0000000..6ef3b5b
--- /dev/null
+++ b/c_src/native_array/native_array_nif.c
@@ -0,0 +1,90 @@
+#include "erl_nif.h"
+
+#define A_OK(env)            enif_make_atom(env, "ok")
+#define assert_badarg(S, Env) if (! S) { return enif_make_badarg(env); }
+
+static ErlNifResourceType* array_handle = NULL;
+
+static void array_handle_cleanup(ErlNifEnv* env, void* arg) {}
+
+static int load(ErlNifEnv* env, void** priv, ERL_NIF_TERM load_info)
+{
+    ErlNifResourceFlags flags = ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER;
+    array_handle = enif_open_resource_type(env, "native_array_nif", "array_handle",
+                                           &array_handle_cleanup, flags, 0);
+    // 用于存储指针的数组, 最多1000个array
+    *priv = enif_alloc(1000 * sizeof(void*));
+    return 0;
+}
+
+static void unload(ErlNifEnv* env, void* priv)
+{
+    enif_free(priv);
+}
+
+static ERL_NIF_TERM new_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    // 取参数
+    int refindex;
+    assert_badarg(enif_get_int(env, argv[0], &refindex), env);
+    // 取参数length
+    unsigned long length;
+    assert_badarg(enif_get_ulong(env, argv[1], &length), env);
+    // 分配内存
+    // unsigned char* ref = enif_alloc_resource(array_handle, length);
+    unsigned char* ref = enif_alloc(length);
+    // 保存指针
+    *((unsigned char**)enif_priv_data(env) + refindex) = ref;
+    return A_OK(env);
+}
+
+static ERL_NIF_TERM get_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    // 取参数ref
+    int refindex;
+    assert_badarg(enif_get_int(env, argv[0], &refindex), env);
+    unsigned char* ref = *((unsigned char**)enif_priv_data(env) + refindex);
+    assert_badarg(ref, env);
+    // 取参数offset
+    unsigned long offset;
+    assert_badarg(enif_get_ulong(env, argv[1], &offset), env);
+    return enif_make_int(env, (int)(*(ref + offset - 1)));
+}
+
+static ERL_NIF_TERM put_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    // 取参数ref
+    int refindex;
+    assert_badarg(enif_get_int(env, argv[0], &refindex), env);
+    unsigned char* ref = *((unsigned char**)enif_priv_data(env) + refindex);
+    // 取参数offset
+    unsigned long offset;
+    assert_badarg(enif_get_ulong(env, argv[1], &offset), env);
+    // 取参数newval
+    unsigned int newval;
+    assert_badarg(enif_get_uint(env, argv[2], &newval), env);
+    // 赋值
+    *(ref + offset - 1) = (unsigned char)newval;
+    return A_OK(env);
+}
+
+static ERL_NIF_TERM delete_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    // 取参数ref
+    int refindex;
+    assert_badarg(enif_get_int(env, argv[0], &refindex), env);
+    unsigned char* ref = *((unsigned char**)enif_priv_data(env) + refindex);
+    //enif_release_resource(ref); 
+    enif_free(ref); 
+    return A_OK(env);
+}
+
+static ErlNifFunc nif_funcs[] = {
+    {"new", 2, new_nif},
+    {"get", 2, get_nif},
+    {"put", 3, put_nif},
+    {"delete", 1, delete_nif},
+};
+
+ERL_NIF_INIT(native_array, nif_funcs, &load, NULL, NULL, &unload)
+
diff --git a/c_src/native_array/rebar.config b/c_src/native_array/rebar.config
new file mode 100644
index 0000000..c6f912b
--- /dev/null
+++ b/c_src/native_array/rebar.config
@@ -0,0 +1,7 @@
+{port_specs, [
+    {"../../priv/native_array_nif.so", ["*.c"]}
+]}.
+
+
+
+
diff --git a/c_src/neural/NeuralTable.cpp b/c_src/neural/NeuralTable.cpp
new file mode 100644
index 0000000..b8df3ce
--- /dev/null
+++ b/c_src/neural/NeuralTable.cpp
@@ -0,0 +1,905 @@
+#include "NeuralTable.h"
+
+/* !!!! A NOTE ON KEYS !!!!
+ * Keys should be integer values passed from the erlang emulator, 
+ * and should be generated by a hashing function. There is no easy 
+ * way to hash an erlang term from a NIF, but ERTS is more than 
+ * capable of doing so.
+ *
+ * Additionally, this workaround means that traditional collision 
+ * handling mechanisms for hash tables will not work without 
+ * special consideration. For instance, to compare keys as you
+ * would by storing linked lists, you must retrieve the stored
+ * tuple and call enif_compare or enif_is_identical on the key
+ * elements of each tuple.
+ */
+
+table_set NeuralTable::tables;
+atomic<bool> NeuralTable::running(true);
+ErlNifMutex *NeuralTable::table_mutex;
+
+NeuralTable::NeuralTable(unsigned int kp) {
+    for (int i = 0;  i < BUCKET_COUNT; ++i) {
+        ErlNifEnv *env = enif_alloc_env();
+        env_buckets[i] = env;
+        locks[i] = enif_rwlock_create("neural_table");
+        garbage_cans[i] = 0;
+        reclaimable[i] = enif_make_list(env, 0);
+    }
+
+    start_gc();
+    start_batch();
+
+    key_pos = kp;
+}
+
+NeuralTable::~NeuralTable() {
+    stop_batch();
+    stop_gc();
+    for (int i = 0; i < BUCKET_COUNT; ++i) {
+        enif_rwlock_destroy(locks[i]);
+        enif_free_env(env_buckets[i]);
+    }
+}
+
+/* ================================================================
+ * MakeTable
+ * Allocates a new table, assuming a unique atom identifier. This
+ * table is stored in a static container. All interactions with
+ * the table must be performed through the static class API.
+ */
+ERL_NIF_TERM NeuralTable::MakeTable(ErlNifEnv *env, ERL_NIF_TERM name, ERL_NIF_TERM key_pos) {
+    char *atom;
+    string key;
+    unsigned int len = 0,
+                 pos = 0;
+    ERL_NIF_TERM ret;
+
+    // Allocate space for the name of the table
+    enif_get_atom_length(env, name, &len, ERL_NIF_LATIN1);
+    atom = (char*)enif_alloc(len + 1);
+
+    // Fetch the value of the atom and store it in a string (because I can, that's why)
+    enif_get_atom(env, name, atom, len + 1, ERL_NIF_LATIN1);
+    key = atom;
+
+    // Deallocate that space
+    enif_free(atom);
+
+    // Get the key position value
+    enif_get_uint(env, key_pos, &pos);
+
+    enif_mutex_lock(table_mutex);
+    if (NeuralTable::tables.find(key) != NeuralTable::tables.end()) { 
+        // Table already exists? Bad monkey!
+        ret = enif_make_badarg(env); 
+    } else {
+        // All good. Make the table
+        NeuralTable::tables[key] = new NeuralTable(pos);
+        ret = enif_make_atom(env, "ok");
+    }
+    enif_mutex_unlock(table_mutex);
+
+    return ret;
+}
+
+/* ================================================================
+ * GetTable
+ * Retrieves a handle to the table referenced by name, assuming
+ * such a table exists. If not, throw badarg.
+ */
+NeuralTable* NeuralTable::GetTable(ErlNifEnv *env, ERL_NIF_TERM name) {
+    char *atom = NULL;
+    string key;
+    unsigned len = 0;
+    NeuralTable *ret = NULL;
+    table_set::const_iterator it;
+
+    // Allocate space for the table name
+    enif_get_atom_length(env, name, &len, ERL_NIF_LATIN1);
+    atom = (char*)enif_alloc(len + 1);
+
+    // Copy the table name into a string
+    enif_get_atom(env, name, atom, len + 1, ERL_NIF_LATIN1);
+    key = atom;
+    
+    // Deallocate that space
+    enif_free(atom);
+
+    // Look for the table and return its pointer if found
+    it = NeuralTable::tables.find(key);
+    if (it != NeuralTable::tables.end()) { 
+        ret = it->second;
+    }
+
+    return ret;
+}
+
+/* ================================================================
+ * Insert
+ * Inserts a tuple into the table with key. 
+ */
+ERL_NIF_TERM NeuralTable::Insert(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM object) {
+    NeuralTable *tb;
+    ERL_NIF_TERM ret, old;
+    unsigned long int entry_key = 0;
+
+    // Grab table or bail.
+    tb = GetTable(env, table);
+    if (tb == NULL) { 
+        return enif_make_badarg(env); 
+    }
+
+    // Get key value.
+    enif_get_ulong(env, key, &entry_key);
+
+    // Lock the key.
+    tb->rwlock(entry_key);
+
+    // Attempt to lookup the value. If nonempty, increment
+    // discarded term counter and return a copy of the
+    // old value
+    if (tb->find(entry_key, old)) {
+        tb->reclaim(entry_key, old);
+        ret = enif_make_tuple2(env, enif_make_atom(env, "ok"), enif_make_copy(env, old));
+    } else {
+        ret = enif_make_atom(env, "ok");
+    }
+    
+    // Write that shit out
+    tb->put(entry_key, object);
+
+    // Oh, and unlock the key if you would.
+    tb->rwunlock(entry_key);
+
+    return ret;
+}
+
+/* ================================================================
+ * InsertNew
+ * Inserts a tuple into the table with key, assuming there is not
+ * a value with key already. Returns true if there was no value
+ * for key, or false if there was.
+ */
+ERL_NIF_TERM NeuralTable::InsertNew(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM object) {
+    NeuralTable *tb;
+    ERL_NIF_TERM ret, old;
+    unsigned long int entry_key = 0;
+
+    // Get the table or bail
+    tb = GetTable(env, table);
+    if (tb == NULL) {
+        return enif_make_badarg(env);
+    }
+
+    // Get the key value
+    enif_get_ulong(env, key, &entry_key);
+
+    // Get write lock for the key
+    tb->rwlock(entry_key);
+
+    if (tb->find(entry_key, old)) {
+        // Key was found. Return false and do not insert
+        ret = enif_make_atom(env, "false");
+    } else {
+        // Key was not found. Return true and insert
+        tb->put(entry_key, object);
+        ret = enif_make_atom(env, "true");
+    }
+
+    // Release write lock for the key
+    tb->rwunlock(entry_key);
+
+    return ret;
+}
+
+/* ================================================================
+ * Increment
+ * Processes a list of update operations. Each operation specifies
+ * a position in the stored tuple to update and an integer to add
+ * to it.
+ */
+ERL_NIF_TERM NeuralTable::Increment(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM ops) {
+    NeuralTable *tb;
+    ERL_NIF_TERM ret, old;
+    ERL_NIF_TERM it;
+    unsigned long int entry_key = 0;
+
+    // Get table handle or bail
+    tb = GetTable(env, table);
+    if (tb == NULL) {
+        return enif_make_badarg(env);
+    }
+
+    // Get key value
+    enif_get_ulong(env, key, &entry_key);
+
+    // Acquire read/write lock for key
+    tb->rwlock(entry_key);
+
+    // Try to read the value as it is
+    if (tb->find(entry_key, old)) {
+        // Value exists
+        ERL_NIF_TERM op_cell;
+        const ERL_NIF_TERM *tb_tpl;
+        const ERL_NIF_TERM *op_tpl;
+        ERL_NIF_TERM *new_tpl;
+        ErlNifEnv *bucket_env = tb->get_env(entry_key);
+        unsigned long int   pos         = 0;
+        long int            incr        = 0;
+        unsigned int        ops_length  = 0;
+        int                 op_arity    = 0,
+                            tb_arity    = 0;
+
+        // Expand tuple to work on elements
+        enif_get_tuple(bucket_env, old, &tb_arity, &tb_tpl);
+
+        // Allocate space for a copy the contents of the table
+        // tuple and copy it in. All changes are to be made to
+        // the copy of the tuple.
+        new_tpl = (ERL_NIF_TERM*)enif_alloc(sizeof(ERL_NIF_TERM) * tb_arity);
+        memcpy(new_tpl, tb_tpl, sizeof(ERL_NIF_TERM) * tb_arity);
+
+        // Create empty list cell for return value.
+        ret = enif_make_list(env, 0);
+
+        // Set iterator to first cell of ops
+        it = ops;
+        while(!enif_is_empty_list(env, it)) {
+            long int value = 0;
+            enif_get_list_cell(env, it, &op_cell, &it);             // op_cell = hd(it), it = tl(it)
+            enif_get_tuple(env, op_cell, &op_arity, &op_tpl);       // op_arity = tuple_size(op_cell), op_tpl = [TplPos1, TplPos2]
+            enif_get_ulong(env, op_tpl[0], &pos);                   // pos = (uint64)op_tpl[0]
+            enif_get_long(env, op_tpl[1], &incr);                   // incr = (uint64)op_tpl[1]
+
+            // Is the operation trying to modify a nonexistant
+            // position?
+            if (pos <= 0 || pos > tb_arity) {
+                ret = enif_make_badarg(env);
+                goto bailout;
+            }
+
+            // Is the operation trying to add to a value that's
+            // not a number?
+            if (!enif_is_number(bucket_env, new_tpl[pos - 1])) {
+                ret = enif_make_badarg(env);
+                goto bailout;
+            }
+
+            // Update the value stored in the tuple.
+            enif_get_long(env, new_tpl[pos - 1], &value);
+            tb->reclaim(entry_key, new_tpl[pos - 1]);
+            new_tpl[pos - 1] = enif_make_long(bucket_env, value + incr);
+
+            // Copy the new value to the head of the return list
+            ret = enif_make_list_cell(env, enif_make_copy(env, new_tpl[pos - 1]), ret);
+        }
+
+        tb->put(entry_key, enif_make_tuple_from_array(bucket_env, new_tpl, tb_arity));
+
+        // Bailout allows cancelling the update opertion
+        // in case something goes wrong. It must always 
+        // come after tb->put and before enif_free and
+        // rwunlock
+bailout:
+        enif_free(new_tpl);
+    } else {
+        ret = enif_make_badarg(env);
+    }
+    // Release the rwlock for entry_key
+    tb->rwunlock(entry_key);
+
+    return ret;
+}
+
+/* ================================================================
+ * Unshift
+ * Processes a list of update operations. Each update operation is
+ * a tuple specifying the position of a list in the stored value to 
+ * update and a list of values to append. Elements are shifted from
+ * the input list to the stored list, so:
+ *
+ * unshift([a,b,c,d]) results in [d,c,b,a]
+ */
+ERL_NIF_TERM NeuralTable::Unshift(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM ops) {
+    NeuralTable *tb;
+    ERL_NIF_TERM ret, old, it;
+    unsigned long int entry_key;
+    ErlNifEnv *bucket_env;
+
+    tb = GetTable(env, table);
+    if (tb == NULL) {
+        return enif_make_badarg(env);
+    }
+
+    enif_get_ulong(env, key, &entry_key);
+
+    tb->rwlock(entry_key);
+    bucket_env = tb->get_env(entry_key);
+    if (tb->find(entry_key, old)) {
+        const ERL_NIF_TERM  *old_tpl,
+                            *op_tpl;
+        ERL_NIF_TERM        *new_tpl;
+        int tb_arity = 0,
+            op_arity = 0;
+        unsigned long pos = 0;
+        unsigned int new_length = 0;
+        ERL_NIF_TERM op,
+                     unshift,
+                     copy_it,
+                     copy_val;
+
+        enif_get_tuple(bucket_env, old, &tb_arity, &old_tpl);
+        new_tpl = (ERL_NIF_TERM*)enif_alloc(sizeof(ERL_NIF_TERM) * tb_arity);
+        memcpy(new_tpl, old_tpl, sizeof(ERL_NIF_TERM) * tb_arity);
+
+        it = ops;
+        ret = enif_make_list(env, 0);
+
+        while (!enif_is_empty_list(env, it)) {
+            // Examine the operation.
+            enif_get_list_cell(env, it, &op, &it);          // op = hd(it), it = tl(it)
+            enif_get_tuple(env, op, &op_arity, &op_tpl);    // op_arity = tuple_size(op), op_tpl = [TplPos1, TplPos2]
+            enif_get_ulong(env, op_tpl[0], &pos);           // Tuple position to modify
+            unshift = op_tpl[1];                            // Values to unshfit
+
+            // Argument 1 of the operation tuple is position;
+            // make sure it's within the bounds of the tuple
+            // in the table.
+            if (pos <= 0 || pos > tb_arity) {
+                ret = enif_make_badarg(env);
+                goto bailout;
+            }
+            
+            // Make sure we were passed a list of things to push
+            // onto the posth element of the entry
+            if (!enif_is_list(env, unshift)) {
+                ret = enif_make_badarg(env);
+            }
+
+            // Now iterate over unshift, moving its values to
+            // the head of new_tpl[pos - 1] one by one
+            copy_it = unshift;
+            while (!enif_is_empty_list(env, copy_it)) {
+                enif_get_list_cell(env, copy_it, &copy_val, &copy_it);
+                new_tpl[pos - 1] = enif_make_list_cell(bucket_env, enif_make_copy(bucket_env, copy_val), new_tpl[pos - 1]);
+            }
+            enif_get_list_length(bucket_env, new_tpl[pos - 1], &new_length);
+            ret = enif_make_list_cell(env, enif_make_uint(env, new_length), ret);
+        }
+
+        tb->put(entry_key, enif_make_tuple_from_array(bucket_env, new_tpl, tb_arity));
+
+bailout:
+        enif_free(new_tpl);
+    } else {
+        ret = enif_make_badarg(env);
+    }
+    tb->rwunlock(entry_key);
+
+    return ret;
+}
+
+ERL_NIF_TERM NeuralTable::Shift(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM ops) {
+    NeuralTable *tb;
+    ERL_NIF_TERM ret, old, it;
+    unsigned long int entry_key;
+    ErlNifEnv *bucket_env;
+
+    tb = GetTable(env, table);
+    if (tb == NULL) {
+        return enif_make_badarg(env);
+    }
+
+    enif_get_ulong(env, key, &entry_key);
+
+    tb->rwlock(entry_key);
+    bucket_env = tb->get_env(entry_key);
+    if (tb->find(entry_key, old)) {
+        const ERL_NIF_TERM *old_tpl;
+        const ERL_NIF_TERM *op_tpl;
+        ERL_NIF_TERM *new_tpl;
+        int tb_arity = 0,
+            op_arity = 0;
+        unsigned long pos = 0,
+                      count = 0;
+        ERL_NIF_TERM op, list, shifted, reclaim;
+
+        enif_get_tuple(bucket_env, old, &tb_arity, &old_tpl);
+        new_tpl = (ERL_NIF_TERM*)enif_alloc(tb_arity * sizeof(ERL_NIF_TERM));
+        memcpy(new_tpl, old_tpl, sizeof(ERL_NIF_TERM) * tb_arity);
+
+        it = ops;
+        ret = enif_make_list(env, 0);
+        reclaim = enif_make_list(bucket_env, 0);
+
+        while(!enif_is_empty_list(env, it)) {
+            enif_get_list_cell(env, it, &op, &it);
+            enif_get_tuple(env, op, &op_arity, &op_tpl);
+            enif_get_ulong(env, op_tpl[0], &pos);
+            enif_get_ulong(env, op_tpl[1], &count);
+
+            if (pos <= 0 || pos > tb_arity) {
+                ret = enif_make_badarg(env);
+                goto bailout;
+            }
+
+            if (!enif_is_list(env, new_tpl[pos -1])) {
+                ret = enif_make_badarg(env);
+                goto bailout;
+            }
+
+            shifted = enif_make_list(env, 0);
+            if (count > 0) {
+                ERL_NIF_TERM copy_it = new_tpl[pos - 1],
+                             val;
+                int i = 0;
+                while (i < count && !enif_is_empty_list(bucket_env, copy_it)) {
+                    enif_get_list_cell(bucket_env, copy_it, &val, &copy_it);
+                    ++i;
+                    shifted = enif_make_list_cell(env, enif_make_copy(env, val), shifted);
+                    reclaim = enif_make_list_cell(env, val, reclaim);
+                }
+                new_tpl[pos - 1] = copy_it;
+            } else if (count < 0) {
+                ERL_NIF_TERM copy_it = new_tpl[pos - 1],
+                             val;
+                while (!enif_is_empty_list(bucket_env, copy_it)) {
+                    enif_get_list_cell(bucket_env, copy_it, &val, &copy_it);
+                    shifted = enif_make_list_cell(env, enif_make_copy(env, val), shifted);
+                    reclaim = enif_make_list_cell(env, val, reclaim);
+                }
+                new_tpl[pos - 1] = copy_it;
+            }
+            ret = enif_make_list_cell(env, shifted, ret);
+        }
+
+        tb->put(entry_key, enif_make_tuple_from_array(bucket_env, new_tpl, tb_arity));
+        tb->reclaim(entry_key, reclaim);
+bailout:
+        enif_free(new_tpl);
+    } else {
+        ret = enif_make_badarg(env);
+    }
+    tb->rwunlock(entry_key);
+
+    return ret;
+}
+
+ERL_NIF_TERM NeuralTable::Swap(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM ops) {
+    NeuralTable *tb;
+    ERL_NIF_TERM ret, old, it;
+    unsigned long int entry_key;
+    ErlNifEnv *bucket_env;
+
+    tb = GetTable(env, table);
+    if (tb == NULL) {
+        return enif_make_badarg(env);
+    }
+
+    enif_get_ulong(env, key, &entry_key);
+
+    tb->rwlock(entry_key);
+    bucket_env = tb->get_env(entry_key);
+    if (tb->find(entry_key, old)) {
+        const ERL_NIF_TERM *old_tpl;
+        const ERL_NIF_TERM *op_tpl;
+        ERL_NIF_TERM *new_tpl;
+        int tb_arity = 0,
+            op_arity = 0;
+        unsigned long pos = 0;
+        ERL_NIF_TERM op, list, shifted, reclaim;
+
+        enif_get_tuple(bucket_env, old, &tb_arity, &old_tpl);
+        new_tpl = (ERL_NIF_TERM*)enif_alloc(tb_arity * sizeof(ERL_NIF_TERM));
+        memcpy(new_tpl, old_tpl, sizeof(ERL_NIF_TERM) * tb_arity);
+
+        it = ops;
+        ret = enif_make_list(env, 0);
+        reclaim = enif_make_list(bucket_env, 0);
+
+        while (!enif_is_empty_list(env, it)) {
+            enif_get_list_cell(env, it, &op, &it);
+            enif_get_tuple(env, op, &op_arity, &op_tpl);
+            enif_get_ulong(env, op_tpl[0], &pos);
+
+            if (pos <= 0 || pos > tb_arity) {
+                ret = enif_make_badarg(env);
+                goto bailout;
+            }
+
+            reclaim = enif_make_list_cell(bucket_env, new_tpl[pos - 1], reclaim);
+            ret = enif_make_list_cell(env, enif_make_copy(env, new_tpl[pos -1]), ret);
+            new_tpl[pos - 1] = enif_make_copy(bucket_env, op_tpl[1]);
+        }
+
+        tb->put(entry_key, enif_make_tuple_from_array(bucket_env, new_tpl, tb_arity));
+        tb->reclaim(entry_key, reclaim);
+bailout:
+        enif_free(new_tpl);
+    } else {
+        ret = enif_make_badarg(env);
+    }
+    tb->rwunlock(entry_key);
+
+    return ret;
+}
+
+ERL_NIF_TERM NeuralTable::Delete(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key) {
+    NeuralTable *tb;
+    ERL_NIF_TERM val, ret;
+    unsigned long int entry_key;
+
+    tb = GetTable(env, table);
+    if (tb == NULL) { return enif_make_badarg(env); }
+
+    enif_get_ulong(env, key, &entry_key);
+
+    tb->rwlock(entry_key);
+
+    if (tb->erase(entry_key, val)) {
+        tb->reclaim(entry_key, val);
+        ret = enif_make_copy(env, val);
+    } else {
+        ret = enif_make_atom(env, "undefined");
+    }
+
+    tb->rwunlock(entry_key);
+
+    return ret;
+}
+
+ERL_NIF_TERM NeuralTable::Empty(ErlNifEnv *env, ERL_NIF_TERM table) {
+    NeuralTable *tb;
+    int n = 0;
+
+    tb = GetTable(env, table);
+    if (tb == NULL) { return enif_make_badarg(env); }
+
+    // First, lock EVERY bucket. We want this to be an isolated operation.
+    for (n = 0; n < BUCKET_COUNT; ++n) {
+        enif_rwlock_rwlock(tb->locks[n]);
+    }
+
+    // Now clear the table
+    for (n = 0; n < BUCKET_COUNT; ++n) {
+        tb->hash_buckets[n].clear();
+        enif_clear_env(tb->env_buckets[n]);
+        tb->garbage_cans[n] = 0;
+        tb->reclaimable[n] = enif_make_list(tb->env_buckets[n], 0);
+    }
+
+    // Now unlock every bucket.
+    for (n = 0; n < BUCKET_COUNT; ++n) {
+        enif_rwlock_rwunlock(tb->locks[n]);
+    }
+
+    return enif_make_atom(env, "ok");
+}
+
+ERL_NIF_TERM NeuralTable::Get(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key) {
+    NeuralTable *tb;
+    ERL_NIF_TERM ret, val;
+    unsigned long int entry_key;
+
+    // Acquire table handle, or quit if the table doesn't exist.
+    tb = GetTable(env, table);
+    if (tb == NULL) { return enif_make_badarg(env); }
+
+    // Get key value
+    enif_get_ulong(env, key, &entry_key);
+
+    // Lock the key
+    tb->rlock(entry_key);
+
+    // Read current value
+    if (!tb->find(entry_key, val)) {
+        ret = enif_make_atom(env, "undefined");
+    } else {
+        ret = enif_make_copy(env, val);
+    }
+
+    tb->runlock(entry_key);
+
+    return ret;
+}
+
+ERL_NIF_TERM NeuralTable::Dump(ErlNifEnv *env, ERL_NIF_TERM table) {
+    NeuralTable *tb = GetTable(env, table);
+    ErlNifPid self;
+    ERL_NIF_TERM ret;
+
+    if (tb == NULL) { return enif_make_badarg(env); }
+
+    enif_self(env, &self);
+
+    tb->add_batch_job(self, &NeuralTable::batch_dump);
+
+    return enif_make_atom(env, "$neural_batch_wait");
+}
+
+ERL_NIF_TERM NeuralTable::Drain(ErlNifEnv *env, ERL_NIF_TERM table) {
+    NeuralTable *tb = GetTable(env, table);
+    ErlNifPid self;
+    int ret;
+
+    if (tb == NULL) { return enif_make_badarg(env); }
+
+    enif_self(env, &self);
+
+    tb->add_batch_job(self, &NeuralTable::batch_drain);
+
+    return enif_make_atom(env, "$neural_batch_wait");
+}
+
+ERL_NIF_TERM NeuralTable::GetKeyPosition(ErlNifEnv *env, ERL_NIF_TERM table) {
+    NeuralTable *tb = GetTable(env, table);
+
+    if (tb == NULL) { return enif_make_badarg(env); }
+    return enif_make_uint(env, tb->key_pos);
+}
+
+ERL_NIF_TERM NeuralTable::GarbageCollect(ErlNifEnv *env, ERL_NIF_TERM table) {
+    NeuralTable *tb = GetTable(env, table);
+    if (tb == NULL) { return enif_make_badarg(env); }
+
+    enif_cond_signal(tb->gc_cond);
+
+    return enif_make_atom(env, "ok");
+}
+ 
+ERL_NIF_TERM NeuralTable::GarbageSize(ErlNifEnv *env, ERL_NIF_TERM table) {
+    NeuralTable *tb = GetTable(env, table);
+    unsigned long int size = 0;
+
+    if (tb == NULL) { return enif_make_badarg(env); }
+
+    size = tb->garbage_size();
+
+    return enif_make_ulong(env, size);
+}
+
+void* NeuralTable::DoGarbageCollection(void *table) {
+    NeuralTable *tb = (NeuralTable*)table;
+
+    enif_mutex_lock(tb->gc_mutex);
+
+    while (running.load(memory_order_acquire)) {
+        while (running.load(memory_order_acquire) && tb->garbage_size() < RECLAIM_THRESHOLD) {
+            enif_cond_wait(tb->gc_cond, tb->gc_mutex);
+        }
+        tb->gc();
+    }
+
+    enif_mutex_unlock(tb->gc_mutex);
+
+    return NULL;
+}
+
+void* NeuralTable::DoReclamation(void *table) {
+    const int max_eat = 5;
+    NeuralTable *tb = (NeuralTable*)table;
+    int i = 0, c = 0, t = 0;;
+    ERL_NIF_TERM tl, hd;
+    ErlNifEnv *env;
+
+    while (running.load(memory_order_acquire)) {
+        for (i = 0; i < BUCKET_COUNT; ++i) {
+            c = 0;
+            t = 0;
+            tb->rwlock(i);
+            env = tb->get_env(i);
+            tl = tb->reclaimable[i];
+            while (c++ < max_eat && !enif_is_empty_list(env, tl)) {
+                enif_get_list_cell(env, tl, &hd, &tl);
+                tb->garbage_cans[i] += estimate_size(env, hd);
+                t += tb->garbage_cans[i];
+            }
+            tb->rwunlock(i);
+
+            if (t >= RECLAIM_THRESHOLD) {
+                enif_cond_signal(tb->gc_cond);
+            }
+        }
+#ifdef _WIN32
+        Sleep(50);
+#else
+        usleep(50000);
+#endif
+
+    }
+
+    return NULL;
+}
+
+void* NeuralTable::DoBatchOperations(void *table) {
+    NeuralTable *tb = (NeuralTable*)table;
+
+    enif_mutex_lock(tb->batch_mutex);
+
+    while (running.load(memory_order_acquire)) {
+        while (running.load(memory_order_acquire) && tb->batch_jobs.empty()) {
+            enif_cond_wait(tb->batch_cond, tb->batch_mutex);
+        }
+        BatchJob job = tb->batch_jobs.front();
+        (tb->*job.fun)(job.pid);
+        tb->batch_jobs.pop();
+    }
+
+    enif_mutex_unlock(tb->batch_mutex);
+
+    return NULL;
+}
+
+void NeuralTable::start_gc() {
+    int ret;
+
+    gc_mutex = enif_mutex_create("neural_table_gc");
+    gc_cond = enif_cond_create("neural_table_gc");
+
+    ret = enif_thread_create("neural_garbage_collector", &gc_tid, NeuralTable::DoGarbageCollection, (void*)this, NULL);
+    if (ret != 0) {
+        printf("[neural_gc] Can't create GC thread. Error Code: %d\r\n", ret);
+    }
+
+    // Start the reclaimer after the garbage collector.
+    ret = enif_thread_create("neural_reclaimer", &rc_tid, NeuralTable::DoReclamation, (void*)this, NULL);
+    if (ret != 0) {
+        printf("[neural_gc] Can't create reclamation thread. Error Code: %d\r\n", ret);
+    }
+}
+
+void NeuralTable::stop_gc() {
+    enif_cond_signal(gc_cond);
+    // Join the reclaimer before the garbage collector.
+    enif_thread_join(rc_tid, NULL);
+    enif_thread_join(gc_tid, NULL);
+}
+
+void NeuralTable::start_batch() {
+    int ret;
+
+    batch_mutex = enif_mutex_create("neural_table_batch");
+    batch_cond = enif_cond_create("neural_table_batch");
+
+    ret = enif_thread_create("neural_batcher", &batch_tid, NeuralTable::DoBatchOperations, (void*)this, NULL);
+    if (ret != 0) {
+        printf("[neural_batch] Can't create batch thread. Error Code: %d\r\n", ret);
+    }
+}
+
+void NeuralTable::stop_batch() {
+    enif_cond_signal(batch_cond);
+    enif_thread_join(batch_tid, NULL);
+}
+
+void NeuralTable::put(unsigned long int key, ERL_NIF_TERM tuple) {
+    ErlNifEnv *env = get_env(key);
+    hash_buckets[GET_BUCKET(key)][key] = enif_make_copy(env, tuple);
+}
+
+ErlNifEnv* NeuralTable::get_env(unsigned long int key) {
+    return env_buckets[GET_BUCKET(key)];
+}
+
+bool NeuralTable::find(unsigned long int key, ERL_NIF_TERM &ret) {
+    hash_table *bucket = &hash_buckets[GET_BUCKET(key)];
+    hash_table::iterator it = bucket->find(key);
+    if (bucket->end() == it) {
+        return false;
+    } else {
+        ret = it->second;
+        return true;
+    }
+}
+
+bool NeuralTable::erase(unsigned long int key, ERL_NIF_TERM &val) {
+    hash_table *bucket = &hash_buckets[GET_BUCKET(key)];
+    hash_table::iterator it = bucket->find(key);
+    bool ret = false;
+    if (it != bucket->end()) {
+        ret = true;
+        val = it->second;
+        bucket->erase(it);
+    }
+    return ret;
+}
+
+void NeuralTable::add_batch_job(ErlNifPid pid, BatchFunction fun) {
+    BatchJob job;
+    job.pid = pid;
+    job.fun = fun;
+
+    enif_mutex_lock(batch_mutex);
+    batch_jobs.push(job);
+    enif_mutex_unlock(batch_mutex);
+
+    enif_cond_signal(batch_cond);
+}
+
+void NeuralTable::batch_drain(ErlNifPid pid) {
+    ErlNifEnv *env = enif_alloc_env();
+    ERL_NIF_TERM msg, value;
+
+    value = enif_make_list(env, 0);
+    for (int i = 0; i < BUCKET_COUNT; ++i) {
+        enif_rwlock_rwlock(locks[i]);
+
+        for (hash_table::iterator it = hash_buckets[i].begin(); it != hash_buckets[i].end(); ++it) {
+            value = enif_make_list_cell(env, enif_make_copy(env, it->second), value);
+        }
+        enif_clear_env(env_buckets[i]);
+        hash_buckets[i].clear();
+        garbage_cans[i] = 0;
+        reclaimable[i] = enif_make_list(env_buckets[i], 0);
+
+        enif_rwlock_rwunlock(locks[i]);
+    }
+
+    msg = enif_make_tuple2(env, enif_make_atom(env, "$neural_batch_response"), value);
+
+    enif_send(NULL, &pid, env, msg);
+
+    enif_free_env(env);
+}
+
+void NeuralTable::batch_dump(ErlNifPid pid) {
+    ErlNifEnv *env = enif_alloc_env();
+    ERL_NIF_TERM msg, value;
+
+    value = enif_make_list(env, 0);
+    for (int i = 0; i < BUCKET_COUNT; ++i) {
+        enif_rwlock_rlock(locks[i]);
+        for (hash_table::iterator it = hash_buckets[i].begin(); it != hash_buckets[i].end(); ++it) {
+            value = enif_make_list_cell(env, enif_make_copy(env, it->second), value);
+        }
+        enif_rwlock_runlock(locks[i]);
+    }
+
+    msg = enif_make_tuple2(env, enif_make_atom(env, "$neural_batch_response"), value);
+
+    enif_send(NULL, &pid, env, msg);
+
+    enif_free_env(env);
+}
+
+void NeuralTable::reclaim(unsigned long int key, ERL_NIF_TERM term) {
+    int bucket = GET_BUCKET(key);
+    ErlNifEnv *env = get_env(key);
+    reclaimable[bucket] = enif_make_list_cell(env, term, reclaimable[bucket]);
+}
+
+void NeuralTable::gc() {
+    ErlNifEnv *fresh    = NULL,
+              *old      = NULL;
+    hash_table *bucket  = NULL;
+    hash_table::iterator it;
+    unsigned int gc_curr = 0;
+
+    for (; gc_curr < BUCKET_COUNT; ++gc_curr) {
+        bucket = &hash_buckets[gc_curr];
+        old = env_buckets[gc_curr];
+        fresh = enif_alloc_env();
+
+        enif_rwlock_rwlock(locks[gc_curr]);
+        for  (it = bucket->begin(); it != bucket->end(); ++it) {
+            it->second = enif_make_copy(fresh, it->second);
+        }
+
+        garbage_cans[gc_curr] = 0;
+        env_buckets[gc_curr] = fresh;
+        reclaimable[gc_curr] = enif_make_list(fresh, 0);
+        enif_free_env(old);
+        enif_rwlock_rwunlock(locks[gc_curr]);
+    }
+}
+
+unsigned long int NeuralTable::garbage_size() {
+    unsigned long int size = 0;
+    for (int i = 0; i < BUCKET_COUNT; ++i) {
+        enif_rwlock_rlock(locks[i]);
+        size += garbage_cans[i];
+        enif_rwlock_runlock(locks[i]);
+    }
+    return size;
+}
diff --git a/c_src/neural/NeuralTable.h b/c_src/neural/NeuralTable.h
new file mode 100644
index 0000000..527c820
--- /dev/null
+++ b/c_src/neural/NeuralTable.h
@@ -0,0 +1,121 @@
+#ifndef NEURALTABLE_H
+#define NEURALTABLE_H
+
+#include "erl_nif.h"
+#include "neural_utils.h"
+#include <string>
+#include <stdio.h>
+#include <string.h>
+#include <unordered_map>
+#include <queue>
+#include <atomic>
+#ifdef _WIN32
+#include <windows.h>
+#include <io.h>
+#include <process.h>
+#else
+#include <unistd.h>
+#endif
+
+#define BUCKET_COUNT 64
+#define BUCKET_MASK (BUCKET_COUNT - 1)
+#define GET_BUCKET(key) key & BUCKET_MASK
+#define GET_LOCK(key) key & BUCKET_MASK
+#define RECLAIM_THRESHOLD 1048576
+
+using namespace std;
+
+class NeuralTable;
+
+typedef unordered_map<string, NeuralTable*> table_set;
+typedef unordered_map<unsigned long int, ERL_NIF_TERM> hash_table;
+typedef void (NeuralTable::*BatchFunction)(ErlNifPid pid);
+
+class NeuralTable {
+    public:
+        static ERL_NIF_TERM MakeTable(ErlNifEnv *env, ERL_NIF_TERM name, ERL_NIF_TERM keypos);
+        static ERL_NIF_TERM Insert(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM object);
+        static ERL_NIF_TERM InsertNew(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM object);
+        static ERL_NIF_TERM Delete(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key);
+        static ERL_NIF_TERM Empty(ErlNifEnv *env, ERL_NIF_TERM table);
+        static ERL_NIF_TERM Get(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key);
+        static ERL_NIF_TERM Increment(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM ops);
+        static ERL_NIF_TERM Shift(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM ops);
+        static ERL_NIF_TERM Unshift(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM ops);
+        static ERL_NIF_TERM Swap(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM ops);
+        static ERL_NIF_TERM Dump(ErlNifEnv *env, ERL_NIF_TERM table);
+        static ERL_NIF_TERM Drain(ErlNifEnv *env, ERL_NIF_TERM table);
+        static ERL_NIF_TERM GetKeyPosition(ErlNifEnv *env, ERL_NIF_TERM table);
+        static ERL_NIF_TERM GarbageCollect(ErlNifEnv *env, ERL_NIF_TERM table);
+        static ERL_NIF_TERM GarbageSize(ErlNifEnv *env, ERL_NIF_TERM table);
+        static NeuralTable* GetTable(ErlNifEnv *env, ERL_NIF_TERM name);
+        static void* DoGarbageCollection(void *table);
+        static void* DoBatchOperations(void *table);
+        static void* DoReclamation(void *table);
+        static void Initialize() {
+            table_mutex = enif_mutex_create("neural_table_maker");
+        }
+        static void Shutdown() {
+            running = false;
+            table_set::iterator it(tables.begin());
+
+            while (it != tables.end()) {
+                delete it->second;
+                tables.erase(it);
+                it = tables.begin();
+            }
+
+            enif_mutex_destroy(table_mutex);
+        }
+
+        void rlock(unsigned long int key) { enif_rwlock_rlock(locks[GET_LOCK(key)]); }
+        void runlock(unsigned long int key) { enif_rwlock_runlock(locks[GET_LOCK(key)]); }
+        void rwlock(unsigned long int key) { enif_rwlock_rwlock(locks[GET_LOCK(key)]); }
+        void rwunlock(unsigned long int key) { enif_rwlock_rwunlock(locks[GET_LOCK(key)]); }
+
+        ErlNifEnv *get_env(unsigned long int key);
+        bool erase(unsigned long int key, ERL_NIF_TERM &ret);
+        bool find(unsigned long int key, ERL_NIF_TERM &ret);
+        void put(unsigned long int key, ERL_NIF_TERM tuple);
+        void batch_dump(ErlNifPid pid);
+        void batch_drain(ErlNifPid pid);
+        void start_gc();
+        void stop_gc();
+        void start_batch();
+        void stop_batch();
+        void gc();
+        void reclaim(unsigned long int key, ERL_NIF_TERM reclaim);
+        unsigned long int garbage_size();
+        void add_batch_job(ErlNifPid pid, BatchFunction fun);
+
+    protected:
+        static table_set tables;
+        static atomic<bool> running;
+        static ErlNifMutex *table_mutex;
+
+        struct BatchJob {
+            ErlNifPid pid;
+            BatchFunction fun;
+        };
+
+        NeuralTable(unsigned int kp);
+        ~NeuralTable();
+
+        unsigned int    garbage_cans[BUCKET_COUNT];
+        hash_table      hash_buckets[BUCKET_COUNT];
+        ErlNifEnv       *env_buckets[BUCKET_COUNT];
+        ERL_NIF_TERM    reclaimable[BUCKET_COUNT];
+        ErlNifRWLock    *locks[BUCKET_COUNT];
+        ErlNifCond      *gc_cond;
+        ErlNifMutex     *gc_mutex;
+        ErlNifTid       gc_tid;
+        ErlNifTid       rc_tid;
+        ErlNifCond      *batch_cond;
+        ErlNifMutex     *batch_mutex;
+        queue<BatchJob> batch_jobs;
+        ErlNifTid       batch_tid;
+
+        unsigned int key_pos;
+};
+
+#endif
diff --git a/c_src/neural/neural.cpp b/c_src/neural/neural.cpp
new file mode 100644
index 0000000..0273324
--- /dev/null
+++ b/c_src/neural/neural.cpp
@@ -0,0 +1,134 @@
+#include "erl_nif.h"
+#include "NeuralTable.h"
+#include <stdio.h>
+
+// Prototypes
+static ERL_NIF_TERM neural_new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+static ERL_NIF_TERM neural_put(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+static ERL_NIF_TERM neural_put_new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+static ERL_NIF_TERM neural_increment(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+static ERL_NIF_TERM neural_unshift(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+static ERL_NIF_TERM neural_shift(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+static ERL_NIF_TERM neural_swap(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+static ERL_NIF_TERM neural_get(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+static ERL_NIF_TERM neural_delete(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+static ERL_NIF_TERM neural_garbage(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+static ERL_NIF_TERM neural_garbage_size(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+static ERL_NIF_TERM neural_empty(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+static ERL_NIF_TERM neural_drain(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+static ERL_NIF_TERM neural_dump(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+static ERL_NIF_TERM neural_key_pos(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+
+static ErlNifFunc nif_funcs[] =
+{
+    {"make_table", 2, neural_new},
+    {"do_fetch", 2, neural_get},
+    {"do_delete", 2, neural_delete},
+    {"do_dump", 1, neural_dump},
+    {"do_drain", 1, neural_drain},
+    {"empty", 1, neural_empty},
+    {"insert", 3, neural_put},
+    {"insert_new", 3, neural_put_new},
+    {"do_increment", 3, neural_increment},
+    {"do_unshift", 3, neural_unshift},
+    {"do_shift", 3, neural_shift},
+    {"do_swap", 3, neural_swap},
+    {"garbage", 1, neural_garbage},
+    {"garbage_size", 1, neural_garbage_size},
+    {"key_pos", 1, neural_key_pos}
+};
+
+static ERL_NIF_TERM neural_key_pos(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    // This function is directly exposed, so no strict guards or patterns protecting us.
+    if (argc != 1 || !enif_is_atom(env, argv[0])) { return enif_make_badarg(env); }
+
+    return NeuralTable::GetKeyPosition(env, argv[0]);
+}
+
+static ERL_NIF_TERM neural_new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    return NeuralTable::MakeTable(env, argv[0], argv[1]);
+}
+
+static ERL_NIF_TERM neural_put(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    return NeuralTable::Insert(env, argv[0], argv[1], argv[2]);
+}
+
+static ERL_NIF_TERM neural_put_new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    return NeuralTable::InsertNew(env, argv[0], argv[1], argv[2]);
+}
+
+static ERL_NIF_TERM neural_increment(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    if (!enif_is_atom(env, argv[0]) || !enif_is_number(env, argv[1]) || !enif_is_list(env, argv[2])) {
+        return enif_make_badarg(env);
+    }
+
+    return NeuralTable::Increment(env, argv[0], argv[1], argv[2]);
+}
+
+static ERL_NIF_TERM neural_shift(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    return NeuralTable::Shift(env, argv[0], argv[1], argv[2]);
+}
+
+static ERL_NIF_TERM neural_unshift(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    return NeuralTable::Unshift(env, argv[0], argv[1], argv[2]);
+}
+
+static ERL_NIF_TERM neural_swap(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]){
+    return NeuralTable::Swap(env, argv[0], argv[1], argv[2]);
+}
+
+static ERL_NIF_TERM neural_get(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    return NeuralTable::Get(env, argv[0], argv[1]);
+}
+
+static ERL_NIF_TERM neural_delete(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    return NeuralTable::Delete(env, argv[0], argv[1]);
+}
+
+static ERL_NIF_TERM neural_empty(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    if (!enif_is_atom(env, argv[0])) { return enif_make_badarg(env); }
+
+    return NeuralTable::Empty(env, argv[0]);
+}
+
+static ERL_NIF_TERM neural_dump(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    if (!enif_is_atom(env, argv[0])) { return enif_make_badarg(env); }
+
+    return NeuralTable::Dump(env, argv[0]);
+}
+
+static ERL_NIF_TERM neural_drain(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    if (!enif_is_atom(env, argv[0])) { return enif_make_badarg(env); }
+
+    return NeuralTable::Drain(env, argv[0]);
+}
+
+static ERL_NIF_TERM neural_garbage(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    if (!enif_is_atom(env, argv[0])) { return enif_make_badarg(env); }
+
+    return NeuralTable::GarbageCollect(env, argv[0]);
+}
+
+static ERL_NIF_TERM neural_garbage_size(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    if (!enif_is_atom(env, argv[0])) { return enif_make_badarg(env); }
+
+    return NeuralTable::GarbageSize(env, argv[0]);
+}
+
+static void neural_resource_cleanup(ErlNifEnv* env, void* arg)
+{
+    /* Delete any dynamically allocated memory stored in neural_handle */
+    /* neural_handle* handle = (neural_handle*)arg; */
+}
+
+static int on_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info)
+{
+    NeuralTable::Initialize();
+    return 0;
+}
+
+static void on_unload(ErlNifEnv *env, void *priv_data) {
+    NeuralTable::Shutdown();
+}
+
+ERL_NIF_INIT(neural, nif_funcs, &on_load, NULL, NULL, &on_unload);
diff --git a/c_src/neural/neural_utils.cpp b/c_src/neural/neural_utils.cpp
new file mode 100644
index 0000000..a579ef5
--- /dev/null
+++ b/c_src/neural/neural_utils.cpp
@@ -0,0 +1,46 @@
+#include "neural_utils.h"
+
+unsigned long int estimate_size(ErlNifEnv *env, ERL_NIF_TERM term) {
+    if (enif_is_atom(env, term)) {
+        return WORD_SIZE;
+    }
+
+    // Treating all numbers like longs.
+    if (enif_is_number(env, term)) {
+        return 2 * WORD_SIZE;
+    }
+
+    if (enif_is_binary(env, term)) {
+        ErlNifBinary bin;
+        enif_inspect_binary(env, term, &bin);
+        return bin.size + (6 * WORD_SIZE);
+    }
+
+    if (enif_is_list(env, term)) {
+        unsigned long int size = 0;
+        ERL_NIF_TERM it, curr;
+        it = term;
+        size += WORD_SIZE;
+        while (!enif_is_empty_list(env, it)) {
+            enif_get_list_cell(env, it, &curr, &it);
+            size += estimate_size(env, curr) + WORD_SIZE;
+        }
+        return size;
+    }
+
+    if (enif_is_tuple(env, term)) {
+        unsigned long int size = 0;
+        const ERL_NIF_TERM *tpl;
+        int arity;
+        enif_get_tuple(env, term, &arity, &tpl);
+        for (int i = 0; i < arity; ++i) {
+            size += estimate_size(env, tpl[i]);
+        }
+        return size;
+    }
+
+    // Return 1 word by default
+    return WORD_SIZE;
+}
+
+
diff --git a/c_src/neural/neural_utils.h b/c_src/neural/neural_utils.h
new file mode 100644
index 0000000..6111d0b
--- /dev/null
+++ b/c_src/neural/neural_utils.h
@@ -0,0 +1,9 @@
+#ifndef NEURAL_UTILS_H
+#define NEURAL_UTILS_H
+
+#include "erl_nif.h"
+#define WORD_SIZE sizeof(int)
+
+unsigned long int estimate_size(ErlNifEnv *env, ERL_NIF_TERM term);
+
+#endif
diff --git a/c_src/neural/rebar.config b/c_src/neural/rebar.config
new file mode 100644
index 0000000..8d8e6be
--- /dev/null
+++ b/c_src/neural/rebar.config
@@ -0,0 +1,14 @@
+{port_specs, [
+    {"../../priv/neural.so", ["*.cpp"]}
+]}.
+
+{port_env, [
+    {".*", "CXXFLAGS", "$CXXFLAGS -std=c++11 -O3"},
+    {".*", "LDFLAGS", "$LDFLAGS -lstdc++ -shared"}
+]}.
+
+
+
+
+
+
diff --git a/src/dataType/utTermSize.erl b/src/dataType/utTermSize.erl
index c0e3d1a..0077699 100644
--- a/src/dataType/utTermSize.erl
+++ b/src/dataType/utTermSize.erl
@@ -85,7 +85,7 @@ internal_test() ->
     32 = byteSize(<<$a, $b, $c>>, 8),
     8 = byteSize([], 8),
     24 = byteSize([0|[]], 8),
-    24 = byteSize([1|2], 8), % improper list
+    24 = byteSize([1|2], 8), % itime_tmproper list
     16 = byteSize({}, 8),
     24 = byteSize({0}, 8),
     8 = byteSize(0, 8),
diff --git a/src/nifSrc/bitmap_filter/bitmap_filter.erl b/src/nifSrc/bitmap_filter/bitmap_filter.erl
new file mode 100644
index 0000000..0eb5004
--- /dev/null
+++ b/src/nifSrc/bitmap_filter/bitmap_filter.erl
@@ -0,0 +1,20 @@
+-module(bitmap_filter).
+
+-export([init/0, filter/1]).
+-on_load(init/0).
+
+init() ->
+    PrivDir = case code:priv_dir(?MODULE) of
+        {error, _} ->
+            EbinDir = filename:dirname(code:which(?MODULE)),
+            AppPath = filename:dirname(EbinDir),
+            filename:join(AppPath, "priv");
+        Path ->
+            Path
+    end,
+    erlang:load_nif(filename:join(PrivDir, "bitmap_filter"), 0).
+
+% Hack - overriden by init, which is called in on_load.
+% I couldn't find another way that the compiler or code load didn't complain about.
+filter(DefaultArgs) ->
+    DefaultArgs.
diff --git a/src/nifSrc/bsn/bsn.erl b/src/nifSrc/bsn/bsn.erl
new file mode 100644
index 0000000..ee5b01f
--- /dev/null
+++ b/src/nifSrc/bsn/bsn.erl
@@ -0,0 +1,77 @@
+-module(bsn).
+
+%% API
+-export([hash/2, compare/2]).
+-export([new/2, add/2, all/1, chains/1, in/2, count/1, clear/2]).
+
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+%-include_lib("triq/include/triq.hrl").
+-endif.
+
+
+%% Create new resource, `CellCount' is the size of the painters' store.
+new('int_quadric', CellsCount) when CellsCount > 0 ->
+	{'bsn_int', bsn_int:new(-CellsCount)};
+
+new('int_linear', CellsCount) when CellsCount > 0 ->
+	{'bsn_int', bsn_int:new(CellsCount)};
+
+new('ext', CellsCount) when CellsCount > 0 ->
+	{'bsn_ext', bsn_ext:new(CellsCount)}.
+
+
+
+
+%% Add new element.
+%% If the result is a negative integer 
+%% then object was already added.
+%% We found this object with (result) steps.
+%%
+%% If the result is a positive integer 
+%% then object was added after (result) elements.
+add({Type, Res}, Bin) ->
+	Type:add(Res, Bin).
+
+all({Type, Res}) ->
+	Type:all(Res).
+
+chains({Type, Res}) ->
+	Type:chains(Res).
+
+%% Add new element.
+%% If the result is a negative integer 
+%% then object was found with (-result) steps.
+%%
+%% If the result is a positive integer 
+%% then object was not found with (result) steps.
+in({Type, Res}, Bin) ->
+	Type:in(Res, Bin).
+    
+clear({Type, Res}, Bin) ->
+	Type:clear(Res, Bin).
+
+%% Return the count of elements stored in this resource.
+count({Type, Res}) ->
+	Type:count(Res).
+
+%% Calculate the hash of the  binary
+hash(Bin, Max) ->
+	bsn_ext:hash(Bin, Max).
+	
+compare(Bin1, Bin2) ->
+	bsn_ext:compare(Bin1, Bin2).
+
+-ifdef(TEST).
+-ifdef(FORALL).
+prop_compare_test_() ->
+    {"Binary compare testing.",
+    	{timeout, 60,
+    		fun() -> triq:check(prop_compare()) end}}.
+
+prop_compare() ->
+   ?FORALL({Xs},{binary()},
+   	    compare(Xs, Xs)).
+-endif.
+
+-endif.
diff --git a/src/nifSrc/bsn/bsn_ext.erl b/src/nifSrc/bsn/bsn_ext.erl
new file mode 100644
index 0000000..6a822da
--- /dev/null
+++ b/src/nifSrc/bsn/bsn_ext.erl
@@ -0,0 +1,56 @@
+-module(bsn_ext).
+
+-on_load(init/0).
+-export([init/0]).
+
+%% API
+-export([hash/2, compare/2]).
+-export([new/1, add/2, all/1, chains/1, in/2, count/1, clear/2]).
+
+-define(NIF_NOT_LOADED, erlang:nif_error(nif_not_loaded)).
+
+init() ->
+    erlang:load_nif(code:priv_dir('bsn')++"/bsn_ext", 0).
+
+%% Create new resource, `CellCount' is the size of the painters' store.
+new(CellsCount) ->
+	?NIF_NOT_LOADED.
+
+%% Add new element.
+%% If the result is a negative integer 
+%% then object was already added.
+%% We found this object with (result) steps.
+%%
+%% If the result is a positive integer 
+%% then object was added after (result) elements.
+add(Res, Bin) ->
+	?NIF_NOT_LOADED.
+
+all(Res) ->
+	?NIF_NOT_LOADED.
+
+chains(Res) ->
+	?NIF_NOT_LOADED.
+
+%% Add new element.
+%% If the result is a negative integer 
+%% then object was found with (-result) steps.
+%%
+%% If the result is a positive integer 
+%% then object was not found with (result) steps.
+in(Res, Bin) ->
+	?NIF_NOT_LOADED.
+
+%% Return the count of elements stored in this resource.
+count(Res) ->
+	?NIF_NOT_LOADED.
+
+%% Calculate the hash of the  binary
+hash(Bin, Max) ->
+	?NIF_NOT_LOADED.
+	
+compare(Bin1, Bin2) ->
+	?NIF_NOT_LOADED.
+    
+clear(Res, Bin) ->
+	?NIF_NOT_LOADED.
diff --git a/src/nifSrc/bsn/bsn_int.erl b/src/nifSrc/bsn/bsn_int.erl
new file mode 100644
index 0000000..ad1328d
--- /dev/null
+++ b/src/nifSrc/bsn/bsn_int.erl
@@ -0,0 +1,45 @@
+-module(bsn_int).
+
+-on_load(init/0).
+-export([init/0]).
+
+%% API
+-export([new/1, add/2, all/1, in/2, count/1, clear/2]).
+
+-define(NIF_NOT_LOADED, erlang:nif_error(nif_not_loaded)).
+
+init() ->
+    erlang:load_nif(code:priv_dir('bsn')++"/bsn_int", 0).
+
+%% Create new resource, `CellCount' is the size of the painters' store.
+new(CellsCount) ->
+	?NIF_NOT_LOADED.
+
+%% Add new element.
+%% If the result is a negative integer 
+%% then object was already added.
+%% We found this object with (result) steps.
+%%
+%% If the result is a positive integer 
+%% then object was added after (result) elements.
+add(Res, Bin) ->
+	?NIF_NOT_LOADED.
+
+all(Res) ->
+	?NIF_NOT_LOADED.
+
+%% Add new element.
+%% If the result is a negative integer 
+%% then object was found with (-result) steps.
+%%
+%% If the result is a positive integer 
+%% then object was not found with (result) steps.
+in(Res, Bin) ->
+	?NIF_NOT_LOADED.
+
+%% Return the count of elements stored in this resource.
+count(Res) ->
+	?NIF_NOT_LOADED.
+
+clear(Res, Bin) ->
+	?NIF_NOT_LOADED.
diff --git a/src/nifSrc/bsn/bsn_measure.erl b/src/nifSrc/bsn/bsn_measure.erl
new file mode 100644
index 0000000..fe2c694
--- /dev/null
+++ b/src/nifSrc/bsn/bsn_measure.erl
@@ -0,0 +1,236 @@
+-module(bsn_measure).
+-export([test/0, test2/0, test3/0, print/0]).
+-export([gen/2, check_type/4]).
+-export([check_type/3, get_type/3, test_type/2]).
+-export([check_degrade/0, test_filled/1]).
+
+-ifndef(TEST).
+-define(TEST, e).
+-endif.
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+%-include_lib("triq/include/triq.hrl").
+-endif.
+
+
+% InOutK is (success / failure) checks.
+% Return {TestCases, Elements}.
+gen(ElemCount, InOutK) 
+	when ElemCount>0 ->
+	Nums = lists:seq(0, erlang:round(ElemCount*100)),
+	filter(ElemCount, InOutK, Nums, [], []).
+
+
+filter(EC, InOutK, [H|T], AllAcc, ElemAcc) 
+	when EC>0 ->
+	case random:uniform() of
+	X when X<InOutK ->
+		filter(EC-1, InOutK, 
+			T, [H|AllAcc], [H|ElemAcc]);
+	_X ->
+		filter(EC, InOutK, 
+			T, [H|AllAcc], ElemAcc)
+	end;
+filter(_ElemCount, _InOutK, _Acc, AllAcc, ElemAcc) ->
+	{AllAcc, ElemAcc}.
+
+
+check_type(Type, Size, InOutK) ->
+	check_type(fun average/1, Type, Size, InOutK).
+
+get_type(Type, Size, InOutK) ->
+	check_type(fun(X) -> X end, Type, Size, InOutK).
+
+check_type(OutF, Type, Size, InOutK) ->
+	% Build resourse
+	F = fun() -> bsn:new(Type, Size) end,
+
+	[do_check(OutF, F, Size, InOutK, 0.1),
+	 do_check(OutF, F, Size, InOutK, 0.25),
+ 	 do_check(OutF, F, Size, InOutK, 0.5),
+	 do_check(OutF, F, Size, InOutK, 0.75),
+	 do_check(OutF, F, Size, InOutK, 0.9),
+	 do_check(OutF, F, Size, InOutK, 1)].
+
+do_check(OutF, F, Size, InOutK, CapacityK) ->
+	Res = F(),
+	ElemCount = Size * CapacityK,
+	{CaseList, ElemList} = gen(ElemCount, InOutK),
+	fill_values(Res, ElemList),
+	VaList = check_values(Res, CaseList, []),
+	{MissList, InNegList} = lists:partition(fun(X) -> X>0 end, VaList),
+	InList = lists:map(fun erlang:'-'/1, InNegList),
+	AllList = InList ++ MissList,
+	{CapacityK, 
+		{size, Size}, 
+		{real_count, bsn:count(Res)}, 
+		{miss, OutF(MissList)}, 
+		{in,   OutF(InList)}, 
+		{all,  OutF(AllList)}}.
+
+	
+average([]) ->
+    false;
+average([X|Tail]) ->
+    average1(Tail, X, 1).
+% @private
+average1([X|Tail], Sum, Count) ->
+    average1(Tail, Sum + X, Count + 1);
+average1([], Sum, Count) ->
+    round4(Sum / Count).
+
+
+
+
+round4(X) when is_number(X) ->
+	erlang:round(X * 1000) / 1000;
+round4(X) ->
+	X. 
+	
+check_values(Res, [H|T], Acc) ->
+	X = bsn:in(Res, integer_to_binary(H)),
+	check_values(Res, T, [X|Acc]);
+check_values(_Res, [], Acc) ->
+	Acc.
+	
+fill_values(Res, [H|T]) ->
+	case bsn:add(Res, integer_to_binary(H)) of
+	no_more ->
+		Res;
+	X ->
+		fill_values(Res, T)
+	end;
+fill_values(Res, []) ->
+	Res.
+	
+fill_values(Res, [H|T], Acc) ->
+	case bsn:add(Res, integer_to_binary(H)) of
+	no_more ->
+		Acc;
+	X ->
+		fill_values(Res, T, [H|Acc])
+	end;
+fill_values(_Res, [], Acc) ->
+	Acc.
+
+integer_to_binary(X) ->
+	erlang:list_to_binary(erlang:integer_to_list(X)).
+
+test() ->
+	[{ext, check_type(ext, 100, 0.5)}
+	,{int_linear, check_type(int_linear, 100, 0.5)}
+	,{int_quadric, check_type(int_quadric, 100, 0.5)}].
+
+%% All values.
+test2() ->
+	[{ext, get_type(ext, 100, 0.5)}
+	,{int_linear, get_type(int_linear, 100, 0.5)}
+	,{int_quadric, get_type(int_quadric, 100, 0.5)}].
+
+%% Counts of values.
+test3() ->
+	F = fun anal_values/1,
+	[{ext, check_type(F, ext, 100, 0.5)}
+	,{int_linear, check_type(F, int_linear, 100, 0.5)}
+	,{int_quadric, check_type(F, int_quadric, 100, 0.5)}].
+
+print() ->
+	do_print(test3()).
+
+do_print([{Type, Vals}|T]) ->
+	io:format("Type ~w~n", [Type]),
+	lists:map(fun({K,
+		{real_count,RC},
+		{miss, M},
+		{in, I},
+		{all, A}}) ->
+		io:format("K=~w, RC=~w~n", [K, RC]),
+		io:format("count,miss,in,all\n"),
+
+		print_mia(lists:seq(1, 100), M, I, A), 
+		io:format("\n") 
+	end, Vals),
+	do_print(T);
+do_print([]) ->
+	ok.
+
+print_mia([H|T], [{H,0}|T1], [{H,0}|T2], [{H,0}|T3]) ->
+	print_mia(T, T1, T2, T3);
+print_mia([H|T], [{H,C1}|T1], [{H,C2}|T2], [{H,C3}|T3]) ->
+	io:format("~w,~w,~w,~w\n", [H, C1, C2, C3]),
+	print_mia(T, T1, T2, T3);
+
+print_mia([H|_]=L, [{X,_}|_]=L1, L2, L3) 
+	when X =/= H ->
+	print_mia(L, [{H,0}|L1], L2, L3);
+print_mia([H|_]=L, [], L2, L3) ->
+	print_mia(L, [{H,0}], L2, L3);
+
+print_mia([H|_]=L, L1, [{X,_}|_]=L2, L3)
+	when X =/= H ->
+	print_mia(L, L1, [{H,0}|L2], L3);
+print_mia([H|_]=L, L1, [], L3) ->
+	print_mia(L, L1, [{H,0}], L3);
+
+print_mia([H|_]=L, L1, L2, L3) ->
+	print_mia(L, L1, L2, [{H,0}|L3]);
+print_mia([], _, _, _) ->
+	ok.
+	
+	
+
+	
+	
+
+
+anal_values(L) ->
+	do_anal(lists:sort(L), 1, []).
+
+do_anal([H,H|T], C, Acc) ->
+	do_anal([H|T], C+1, Acc);
+do_anal([OldH|T], C, Acc) ->
+	do_anal(T, 1, [{OldH, C}|Acc]);
+do_anal([], C, Acc) ->
+	lists:reverse(Acc).
+
+avg(L) -> do_avg(L, 0, 0).
+do_avg([H|T], Cnt, Sum) ->
+	do_avg(T, Cnt+1, Sum+H);
+do_avg([], Cnt, Sum) ->
+	Sum / Cnt.
+	
+check_degrade() ->
+	[do_check_degrade(ext)
+	,do_check_degrade(int_linear)
+	,do_check_degrade(int_quadric)
+	].
+
+do_check_degrade(Type) ->
+	OutF = fun avg/1,
+	[Type,
+		lists:map(fun(Size) ->
+			F = fun() -> bsn:new(Type, Size) end,
+			do_check(OutF, F, Size, 0.5, 1)
+			end, [10, 100, 500, 1000, 5000, 10000])].
+
+test_filled(ElemCount) ->
+	Res = bsn:new(ext, ElemCount),
+	{CaseList, ElemList} = gen(ElemCount, 1),
+	Vals = fill_values(Res, ElemList, []),
+	{bsn_ext, R} = Res,
+	R.
+
+-ifdef(TEST).
+	
+do_test_() ->
+	[?_assert(test_type(bsn:new(ext, 100), 100))
+	,?_assert(test_type(bsn:new(int_linear, 100), 100))
+	,?_assert(test_type(bsn:new(int_quadric, 100), 100))
+	].
+-endif.
+
+test_type(Res, ElemCount) ->
+	{CaseList, ElemList} = gen(ElemCount, 1),
+	Vals = fill_values(Res, ElemList, []),
+	%Vals = ElemList,
+	lists:all(fun(X) -> bsn:in(Res, integer_to_binary(X)) < 0 end, Vals).
diff --git a/src/nifSrc/couchdb_hqeue/hqueue.erl b/src/nifSrc/couchdb_hqeue/hqueue.erl
new file mode 100644
index 0000000..eec8b98
--- /dev/null
+++ b/src/nifSrc/couchdb_hqeue/hqueue.erl
@@ -0,0 +1,160 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http:%www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(hqueue).
+
+
+-on_load(init/0).
+
+
+-export([
+    new/0,
+    new/1,
+
+    extract_max/1,
+    insert/3,
+
+    from_list/1,
+    from_list/2,
+    to_list/1,
+
+    heap_size/1,
+    info/1,
+    is_empty/1,
+    max_elems/1,
+    size/1,
+
+    resize_heap/2,
+    scale_by/2,
+    set_max_elems/2
+]).
+
+
+-define(NOT_LOADED, not_loaded(?LINE)).
+
+
+-type hqueue() :: term().
+-type hqueue_priority() :: float(). %% this should be non_neg_float()
+-type hqueue_val() :: term().
+-type hqueue_elem() :: {hqueue_priority(), hqueue_val()}.
+-type hqueue_option() :: {max_elems, pos_integer()}
+    | {heap_size, pos_integer()}.
+-type hqueue_stat() :: {max_elems, pos_integer()}
+    | {heap_size, pos_integer()}
+    | {size, non_neg_integer()}.
+
+-export_type([hqueue/0]).
+
+
+-spec new() -> {ok, hqueue()}.
+new() ->
+    new([]).
+
+
+-spec new([hqueue_option()]) -> {ok, hqueue()}.
+new(_Options) ->
+    ?NOT_LOADED.
+
+
+%% Extraction order is undefined for entries with duplicate priorities
+-spec extract_max(hqueue()) -> hqueue_elem() | {error, empty}.
+extract_max(_HQ) ->
+    ?NOT_LOADED.
+
+
+-spec insert(hqueue(), hqueue_priority(), hqueue_val()) -> ok | {error, full}.
+insert(_HQ, _Priority, _Val) ->
+    ?NOT_LOADED.
+
+
+-spec size(hqueue()) -> integer().
+size(_HQ) ->
+    ?NOT_LOADED.
+
+
+-spec max_elems(hqueue()) -> integer().
+max_elems(_HQ) ->
+    ?NOT_LOADED.
+
+
+%% Returns old max elems or error if NewMaxElems < size(HQ)
+-spec set_max_elems(hqueue(), pos_integer()) -> pos_integer()
+    | {error, too_small}.
+set_max_elems(_HQ, _NewMaxElems) ->
+    ?NOT_LOADED.
+
+
+-spec is_empty(hqueue()) -> boolean().
+is_empty(HQ) ->
+    hqueue:size(HQ) =:= 0.
+
+
+-spec to_list(hqueue()) -> [hqueue_elem()].
+to_list(_HQ) ->
+    ?NOT_LOADED.
+
+
+-spec from_list([hqueue_elem()]) -> {ok, hqueue()}.
+from_list(Elems) ->
+    from_list(Elems, []).
+
+
+-spec from_list([hqueue_elem()], [hqueue_option()]) -> {ok, hqueue()}.
+from_list(Elems, Options) ->
+    {ok, HQ} = ?MODULE:new(Options),
+    lists:foreach(fun({Priority, Val}) ->
+        ?MODULE:insert(HQ, Priority, Val)
+    end, Elems),
+    {ok, HQ}.
+
+
+-spec scale_by(hqueue(), float()) -> ok.
+scale_by(_HQ, _Factor) ->
+    ?NOT_LOADED.
+
+
+%% Returns old heap size or error if NewHeapSize < size(HQ)
+-spec resize_heap(hqueue(), pos_integer()) -> pos_integer()
+    | {error, too_small}.
+resize_heap(_HQ, _NewHeapSize) ->
+    ?NOT_LOADED.
+
+
+-spec heap_size(hqueue()) -> pos_integer().
+heap_size(_HQ) ->
+    ?NOT_LOADED.
+
+
+-spec info(hqueue()) -> [hqueue_stat()].
+info(HQ) ->
+    [
+        {heap_size, hqueue:heap_size(HQ)},
+        {max_elems, hqueue:max_elems(HQ)},
+        {size, hqueue:size(HQ)}
+    ].
+
+
+
+init() ->
+    PrivDir = case code:priv_dir(?MODULE) of
+        {error, _} ->
+            EbinDir = filename:dirname(code:which(?MODULE)),
+            AppPath = filename:dirname(EbinDir),
+            filename:join(AppPath, "priv");
+        Path ->
+            Path
+    end,
+    erlang:load_nif(filename:join(PrivDir, "hqueue"), 0).
+
+
+not_loaded(Line) ->
+    erlang:nif_error({not_loaded, [{module, ?MODULE}, {line, Line}]}).
diff --git a/src/nifSrc/cq/cq.erl b/src/nifSrc/cq/cq.erl
deleted file mode 100644
index e69de29..0000000
diff --git a/src/nifSrc/enlfq/enlfq.erl b/src/nifSrc/enlfq/enlfq.erl
new file mode 100644
index 0000000..9a24cc4
--- /dev/null
+++ b/src/nifSrc/enlfq/enlfq.erl
@@ -0,0 +1,51 @@
+-module(enlfq).
+
+-on_load(load_nif/0).
+
+-define(NOT_LOADED, not_loaded(?LINE)).
+
+%% API exports
+-export([new/0, push/2, pop/1]).
+
+%%====================================================================
+%% API functions
+%%====================================================================
+
+
+-spec(new() -> {ok, QueueRef :: reference()} | badarg | {error, Reason :: binary()}).
+new() ->
+  ?NOT_LOADED.
+
+-spec(push(QueueRef :: reference(), Data :: any()) ->
+  true | {error, Reason :: binary()}).
+push(_QueueRef, _Data) ->
+  ?NOT_LOADED.
+
+-spec(pop(QueueRef :: reference()) ->
+  {ok, Data :: any()} | empty | {error, Reason :: binary()}).
+pop(_QueueRef) ->
+  ?NOT_LOADED.
+
+%%====================================================================
+%% Internal functions
+%%====================================================================
+
+
+%% nif functions
+
+load_nif() ->
+  SoName = get_priv_path(?MODULE),
+  io:format(<<"Loading library: ~p ~n">>, [SoName]),
+  ok = erlang:load_nif(SoName, 0).
+
+get_priv_path(File) ->
+  case code:priv_dir(?MODULE) of
+    {error, bad_name} ->
+      Ebin = filename:dirname(code:which(?MODULE)),
+      filename:join([filename:dirname(Ebin), "priv", File]);
+    Dir ->
+      filename:join(Dir, File)
+  end.
+
+not_loaded(Line) ->
+  erlang:nif_error({not_loaded, [{module, ?MODULE}, {line, Line}]}).
\ No newline at end of file
diff --git a/src/nifSrc/enlfq/testing/benchmark.erl b/src/nifSrc/enlfq/testing/benchmark.erl
new file mode 100644
index 0000000..05f9e92
--- /dev/null
+++ b/src/nifSrc/enlfq/testing/benchmark.erl
@@ -0,0 +1,71 @@
+-module(benchmark).
+-author("silviu.caragea").
+
+-export([
+    benchmark_serial/2,
+    benchmark_concurrent/3
+]).
+
+benchmark_serial(Elements, MaxPriority) ->
+    rand:uniform(), %just to init the seed
+    {ok, Q} = enlfq:new(),
+
+    {T0, ok} = timer:tc(fun() -> insert_none(Elements, MaxPriority) end),
+    {T1, ok} = timer:tc(fun() -> insert_item(Elements, Q, MaxPriority) end),
+    {T2, ok} = timer:tc(fun() -> remove_item(Q) end),
+
+    T0Ms = T0/1000,
+    T1Ms = T1/1000,
+    T2Ms = T2/1000,
+
+    io:format(<<"insert overhead: ~p ms insert time: ~p ms pop time: ~p ms ~n">>, [T0Ms, T1Ms, T2Ms]).
+
+benchmark_concurrent(Procs, Elements, MaxPriority) ->
+    {ok, Q} = enlfq:new(),
+
+    ElsPerProcess = round(Elements/Procs),
+
+    InsertNoneWorkFun = fun() ->
+        insert_none(ElsPerProcess, MaxPriority)
+    end,
+
+    InsertWorkFun = fun() ->
+        insert_item(ElsPerProcess, Q, MaxPriority)
+    end,
+
+    RemoveWorkFun = fun() ->
+        remove_item(Q)
+    end,
+
+    {T0, _} = timer:tc(fun()-> multi_spawn:do_work(InsertNoneWorkFun, Procs) end),
+    {T1, _} = timer:tc(fun()-> multi_spawn:do_work(InsertWorkFun, Procs) end),
+    {T2, _} = timer:tc(fun()-> multi_spawn:do_work(RemoveWorkFun, Procs) end),
+
+    T0Ms = T0/1000,
+    T1Ms = T1/1000,
+    T2Ms = T2/1000,
+
+    io:format(<<"insert overhead: ~p ms insert time: ~p ms pop time: ~p ms ~n">>, [T0Ms, T1Ms, T2Ms]).
+
+insert_item(0, _Q, _Max) ->
+    ok;
+insert_item(N, Q, Max) ->
+%%    El = rand:uniform(Max),
+    true = enlfq:push(Q,{}),
+    insert_item(N-1, Q, Max).
+
+remove_item(Q) ->
+    case enlfq:pop(Q) of
+        empty ->
+            ok;
+        {ok, _} ->
+            remove_item(Q)
+    end.
+
+insert_none(0, _Max) ->
+    ok;
+insert_none(N, Max) ->
+%%    rand:uniform(Max),
+    insert_none(N-1, Max).
+
+
diff --git a/src/nifSrc/enlfq/testing/multi_spawn.erl b/src/nifSrc/enlfq/testing/multi_spawn.erl
new file mode 100644
index 0000000..3348e43
--- /dev/null
+++ b/src/nifSrc/enlfq/testing/multi_spawn.erl
@@ -0,0 +1,23 @@
+-module(multi_spawn).
+-author("silviu.caragea").
+
+-export([do_work/2]).
+
+do_work(Fun, Count) ->
+    process_flag(trap_exit, true),
+    spawn_childrens(Fun, Count),
+    wait_responses(Count).
+
+spawn_childrens(_Fun, 0) ->
+    ok;
+spawn_childrens(Fun, Count) ->
+    spawn_link(Fun),
+    spawn_childrens(Fun, Count -1).
+
+wait_responses(0) ->
+    ok;
+wait_responses(Count) ->
+    receive
+        {'EXIT',_FromPid, _Reason} ->
+            wait_responses(Count -1)
+    end.
\ No newline at end of file
diff --git a/src/nifSrc/enq/enq.erl b/src/nifSrc/enq/enq.erl
new file mode 100644
index 0000000..9bb068d
--- /dev/null
+++ b/src/nifSrc/enq/enq.erl
@@ -0,0 +1,159 @@
+%%%-----------------------------------------------------------------------------
+%%% @author s@shuvatov.ru
+%%% @copyright 2018 Sergei Shuvatov
+%%% @doc
+%%%  Native implemented queue with TTL.
+%%%  By default queue type is FIFO and TTL is 0 (disabled), size unlimited.
+%%%  Usage:
+%%%   {ok, Q} = enq:new([fifo,
+%%%                      {ttl, 10000}, % 10 seconds
+%%%                      {max_size, 1000}]), % maximum 1000 elements
+%%%   ok = enq:push(Q, test), % push atom 'test' to the queue
+%%%   [test] = enq:pop(Q), % pop one element from the queue
+%%%   [] = enq:pop(Q), % pop returns empty list if the queue is empty
+%%%   % pushed item can be any term
+%%%   ok = enq:push(Q, fun() -> io:format("some important job~n") end),
+%%%   1 = enq:size(Q), % you can take length of the queue as efficiently as O(1)
+%%% @end
+%%%-----------------------------------------------------------------------------
+-module(enq).
+-author("Sergei Shuvatov").
+
+%% API
+-export([new/0,
+         new/1,
+         push/2,
+         pop/1,
+         size/1]).
+
+-export_type([queue/0, option/0, error/0]).
+
+-type queue() :: reference().
+-type option() :: fifo |
+                  lifo |
+                  {ttl, Microseconds :: non_neg_integer()} |
+                  {max_size, Count :: non_neg_integer()}.
+-type error() :: max_size.
+
+%%==============================================================================
+%% API
+%%==============================================================================
+
+%% Same as enq:new([fifo, {ttl, 0}]).
+-spec new() -> {ok, enq:queue()} | {error, enq:error()}.
+new() ->
+    new([]).
+
+%% Returns a new queue or error in case of memory allocation error.
+-spec new([option()]) -> {ok, enq:queue()} | {error, enq:error()}.
+new(Options) ->
+    enq_nif:new(Options).
+
+%% Pushes Item on top (LIFO) or tail (FIFO) of Queue.
+-spec push(Queue :: enq:queue(), Item :: any()) -> ok | {error, enq:error()}.
+push(Queue, Item) ->
+    enq_nif:push(Queue, erlang:term_to_binary(Item)).
+
+%% Returns next item from the Queue.
+-spec pop(Queue :: enq:queue()) -> [] | [any()].
+pop(Queue) ->
+    [ erlang:binary_to_term(I) || I <- enq_nif:pop(Queue) ].
+
+%% Returns Queue length. Speed does not depend on number of elements.
+-spec size(Queue :: enq:queue()) -> non_neg_integer().
+size(Queue) ->
+    enq_nif:size(Queue).
+
+%%==============================================================================
+%% Tests
+%%==============================================================================
+
+-ifdef(TEST).
+
+-include_lib("eunit/include/eunit.hrl").
+
+-define(log(F, A), io:format(standard_error, "~p:line ~p: " F "~n", [?FILE, ?LINE | A])).
+-define(log(F), ?log(F, [])).
+
+fifo_test() ->
+    fifo_test(1000000).
+
+fifo_test(N) ->
+    {ok, Q} = enq:new(),
+    T1 = erlang:timestamp(),
+    % fill the queue with N elements
+    fill(Q, N),
+    Diff1 = timer:now_diff(erlang:timestamp(), T1),
+    ?log("FIFO fill time: ~p ms", [Diff1 / 1000]),
+    % ensure that size of queue matches N
+    N = enq:size(Q),
+    T2 = erlang:timestamp(),
+    % pop all elements
+    fifo_pop_all(Q, N),
+    Diff2 = timer:now_diff(erlang:timestamp(), T2),
+    ?log("FIFO pop time: ~p ms", [Diff2 / 1000]),
+    % size of the queue must be 0
+    0 = enq:size(Q).
+
+fill(_Q, 0) ->
+    ok;
+fill(Q, N) ->
+    ok = enq:push(Q, N),
+    fill(Q, N - 1).
+
+fifo_pop_all(Q, 0) ->
+    [] = enq:pop(Q);
+fifo_pop_all(Q, N) ->
+    [N] = enq:pop(Q),
+    fifo_pop_all(Q, N - 1).
+
+ttl_test() ->
+    {ok, Q} = enq:new([{ttl, 100}]),
+    enq:push(Q, test),
+    timer:sleep(95),
+    [test] = enq:pop(Q),
+    [] = enq:pop(Q),
+    enq:push(Q, test),
+    timer:sleep(105),
+    [] = enq:pop(Q).
+
+lifo_test() ->
+    lifo_test(1000000).
+
+lifo_test(N) ->
+    {ok, Q} = enq:new([lifo]),
+    T1 = erlang:timestamp(),
+    % fill the queue with N elements
+    fill(Q, N),
+    Diff1 = timer:now_diff(erlang:timestamp(), T1),
+    ?log("LIFO fill time: ~p ms", [Diff1 / 1000]),
+    % ensure that size of queue matches N
+    N = enq:size(Q),
+    T2 = erlang:timestamp(),
+    % pop all elements
+    lifo_pop_all(Q, N),
+    Diff2 = timer:now_diff(erlang:timestamp(), T2),
+    ?log("LIFO pop time: ~p ms", [Diff2 / 1000]),
+    % size of the queue must be 0
+    0 = enq:size(Q).
+
+lifo_pop_all(Q, N) ->
+    lifo_pop_all(Q, 1, N).
+
+lifo_pop_all(Q, I, N) when I > N ->
+    [] = enq:pop(Q);
+lifo_pop_all(Q, I, N) ->
+    [I] = enq:pop(Q),
+    lifo_pop_all(Q, I + 1, N).
+
+max_size_test() ->
+    {ok, Q} = enq:new([{ttl, 100}, {max_size, 1}]),
+    ok = enq:push(Q, test),
+    timer:sleep(50),
+    {error, max_size} = enq:push(Q, 123),
+    timer:sleep(55),
+    ok = enq:push(Q, 321),
+    [321] = enq:pop(Q),
+    [] = enq:pop(Q).
+
+-endif. % TEST
\ No newline at end of file
diff --git a/src/nifSrc/enq/enq_nif.erl b/src/nifSrc/enq/enq_nif.erl
new file mode 100644
index 0000000..d2cf618
--- /dev/null
+++ b/src/nifSrc/enq/enq_nif.erl
@@ -0,0 +1,63 @@
+%%%-------------------------------------------------------------------
+%%% @author s@shuvatov.ru
+%%% @copyright 2018 Sergei Shuvatov
+%%%-------------------------------------------------------------------
+-module(enq_nif).
+-author("Sergei Shuvatov").
+
+%% API
+-export([new/1,
+         push/2,
+         pop/1,
+         size/1]).
+
+-on_load(load_nif/0).
+
+-define(app, enq).
+-define(log(F, A), io:format(standard_error, "~p:~p: " F, [?MODULE, ?LINE | A])).
+-define(not_loaded(), not_loaded(?LINE)).
+
+%%==============================================================================
+%% API
+%%==============================================================================
+
+new(_Options) ->
+    ?not_loaded().
+
+push(_Queue, _Item) ->
+    ?not_loaded().
+
+pop(_Queue) ->
+    ?not_loaded().
+
+size(_Queue) ->
+    ?not_loaded().
+
+%%==============================================================================
+%% Internal functions
+%%==============================================================================
+
+load_nif() ->
+    SoName = get_priv_path(?MODULE),
+    % ?log("Loading library: ~p ~n", [SoName]),
+    ok = erlang:load_nif(SoName, 0).
+
+get_priv_path(File) ->
+    case code:priv_dir(get_app()) of
+        {error, bad_name} ->
+            Ebin = filename:dirname(code:which(?MODULE)),
+            filename:join([filename:dirname(Ebin), "priv", File]);
+        Dir ->
+            filename:join(Dir, File)
+    end.
+
+get_app() ->
+    case application:get_application(?MODULE) of
+        {ok, App} ->
+            App;
+        _ ->
+            ?app
+    end.
+
+not_loaded(Line) ->
+    erlang:nif_error({not_loaded, [{module, ?MODULE}, {line, Line}]}).
diff --git a/src/nifSrc/etsq/etsq.erl b/src/nifSrc/etsq/etsq.erl
new file mode 100644
index 0000000..4f34279
--- /dev/null
+++ b/src/nifSrc/etsq/etsq.erl
@@ -0,0 +1,103 @@
+%% @author vinod
+%% @doc @todo Add description to ets_queue.
+
+
+-module(etsq).
+-on_load(load_nif/0).
+
+-export([load_nif/0,
+         new/1,
+         info/1,
+         push/2,
+         pop/1,
+         front/1]).
+
+%% ====================================================================
+%% API functions
+%% ====================================================================
+
+-define(LIB_BASE_NAME, "etsq").
+-define(LIB_NIF_VSN, 1).
+-define(LIB_APP_NAME, etsq).
+
+-spec new(atom()) -> ok | {error, already_exists}.
+new(_Name) ->
+    erlang:nif_error({nif_not_loaded,module,?MODULE,line,?LINE}).
+
+-spec info(atom()) -> ok.
+info(_Name) ->
+    erlang:nif_error({nif_not_loaded,module,?MODULE,line,?LINE}).
+
+-spec push(atom(), term()) -> ok.
+push(Name, Term) ->
+    push_back(Name, term_to_binary(Term)).
+
+-spec pop(atom()) -> ok | {error, empty}.
+pop(Name) ->
+    get_val(pop_front(Name)).
+
+-spec front(atom()) -> ok | {error, empty}.
+front(Name) ->
+    get_val(get_front(Name)).
+
+
+get_val(Value) when is_binary(Value) ->
+    binary_to_term(Value);
+get_val(Value) ->
+    Value.
+
+push_back(_Name, _Bin) ->
+    erlang:nif_error({nif_not_loaded,module,?MODULE,line,?LINE}).
+pop_front(_Name) ->
+    erlang:nif_error({nif_not_loaded,module,?MODULE,line,?LINE}).
+get_front(_Name) ->
+    erlang:nif_error({nif_not_loaded,module,?MODULE,line,?LINE}).
+
+-spec load_nif() -> ok | {error, term()}.
+load_nif() ->
+    LibBaseName = ?LIB_BASE_NAME,
+    PrivDir = code:priv_dir(etsq),
+    LibName = case erlang:system_info(build_type) of
+          opt ->
+              LibBaseName;
+          Type ->
+              LibTypeName = LibBaseName ++ "."  ++ atom_to_list(Type),
+              case (filelib:wildcard(
+                  filename:join(
+                [PrivDir,
+                 "lib",
+                 LibTypeName ++ "*"])) /= []) orelse
+              (filelib:wildcard(
+                 filename:join(
+                   [PrivDir,
+                "lib",
+                erlang:system_info(system_architecture),
+                LibTypeName ++ "*"])) /= []) of
+              true -> LibTypeName;
+              false -> LibBaseName
+              end
+          end,
+    Lib = filename:join([PrivDir, "lib", LibName]),
+    Status = case erlang:load_nif(Lib, ?LIB_NIF_VSN) of
+         ok -> ok;
+         {error, {load_failed, _}}=Error1 ->
+             ArchLibDir =
+             filename:join([PrivDir, "lib",
+                    erlang:system_info(system_architecture)]),
+             Candidate =
+             filelib:wildcard(filename:join([ArchLibDir,LibName ++ "*" ])),
+             case Candidate of
+             [] -> Error1;
+             _ ->
+                 ArchLib = filename:join([ArchLibDir, LibName]),
+                 erlang:load_nif(ArchLib, ?LIB_NIF_VSN)
+             end;
+         Error1 -> Error1
+         end,
+    case Status of
+    ok -> ok;
+    {error, {E, Str}} ->
+        error_logger:error_msg("Unable to load ~p nif library. "
+                   "Failed with error:~n\"~p, ~s\"~n", [?LIB_APP_NAME, E, Str]),
+        Status
+    end.
diff --git a/src/nifSrc/etsq/etsq_tests.erl b/src/nifSrc/etsq/etsq_tests.erl
new file mode 100644
index 0000000..45a2108
--- /dev/null
+++ b/src/nifSrc/etsq/etsq_tests.erl
@@ -0,0 +1,65 @@
+%% @author vinod
+%% @doc @todo Add description to etsq_tests.
+
+
+-module(etsq_tests).
+-compile(export_all).
+
+-export([init/0,
+         time/3,
+         stats/3]).
+
+-type microseconds() :: pos_integer().
+-type milliseconds() :: pos_integer().
+
+%% ====================================================================
+%% API functions
+%% ====================================================================
+
+init() ->
+    etsq:new(queue),
+    ets:new(tab, [named_table, public]).
+
+-spec time(run_ets | run_queue, pos_integer()) -> microseconds().
+time(Op, NumOp) ->
+    {Time, _} = timer:tc(?MODULE, Op, [NumOp]),
+    Time.
+
+-spec time(pos_integer(), run_ets | run_queue, pos_integer()) -> microseconds().
+time(NumProc, Op, NumOp) ->
+    {Time, _} = timer:tc(?MODULE, spawn, [NumProc, Op, NumOp]),
+    Time.
+
+-spec stats(run_ets | run_queue, pos_integer()) -> milliseconds().
+stats(Op, NumOp) ->
+    erlang:statistics(runtime),
+    ?MODULE:Op(NumOp),
+    {_, Time} = erlang:statistics(runtime),
+    Time.
+
+-spec stats(pos_integer(), run_ets | run_queue, pos_integer()) -> milliseconds().
+stats(NumProc, Op, NumOp) ->
+    erlang:statistics(runtime),
+    ?MODULE:spawn(NumProc, Op, NumOp),
+    {_, Time} = erlang:statistics(runtime),
+    Time.
+
+run_ets(Num) ->
+    Self = self(),
+    Data = lists:seq(1, 100),
+    L = lists:seq(1, Num),
+    [ets:insert(tab, {{Self, K}, Data}) || K <- L],
+    [ets:take(tab, {Self, K}) || K <- L].
+
+run_queue(Num) ->
+    Self = self(),
+    Data = lists:seq(1, 100),
+    L = lists:seq(1, Num),
+    [etsq:push(queue, {{Self, K}, Data}) || K <- L],
+    [etsq:pop(queue) || _ <- L].
+
+spawn(NumProc, Op, NumOp) ->
+    Pid = self(),
+    L = lists:seq(1, NumProc),
+    [spawn_link(fun() -> ?MODULE:Op(NumOp), Pid ! done end) || _ <- L],
+    [receive done -> ok end || _ <- L].
diff --git a/src/nifSrc/gb_lru/btree_lru.erl b/src/nifSrc/gb_lru/btree_lru.erl
new file mode 100644
index 0000000..4238d9b
--- /dev/null
+++ b/src/nifSrc/gb_lru/btree_lru.erl
@@ -0,0 +1,102 @@
+-module(btree_lru).
+
+-export([create/1,
+	 close/1,
+	 register_pid/2,
+	 unregister_pid/1,
+	 get_registered_pid/1,
+	 set_max_size/2,
+	 get_max_size/1,
+	 get_size/1,
+	 write/2,
+	 write/3,
+	 read/2,
+	 next/2,
+	 prev/2,
+	 remove/2,
+	 seek/2,
+	 iterate_next/2,
+	 oldest/1,
+	 latest/1,
+	 last/1,
+	 first/1]).
+
+
+
+-on_load(init/0).
+
+init() ->
+    Dir = "../priv",
+    PrivDir = 
+        case code:priv_dir(?MODULE) of
+            {error, _} ->
+                case code:which(?MODULE) of
+                    Filename when is_list(Filename) ->
+                        filename:join([filename:dirname(Filename), Dir]);
+                    _ ->
+                        Dir
+                end;
+            Path -> Path
+        end,
+    Lib = filename:join(PrivDir, "btreelru_nif"),
+    erlang:load_nif(Lib, 0).
+
+write(Tab, {Key, Value}) ->
+    write(Tab, Key, Value).
+
+create(_Maxsize) ->
+    erlang:nif_error(nif_library_not_loaded).
+
+register_pid(_Tab, _Pid) ->
+    erlang:nif_error(nif_library_not_loaded).
+
+unregister_pid(_Tab) ->
+    erlang:nif_error(nif_library_not_loaded).
+
+get_registered_pid(_Tab) ->
+    erlang:nif_error(nif_library_not_loaded).
+
+set_max_size(_Tab, _MaxSize) ->
+    erlang:nif_error(nif_library_not_loaded).
+
+get_max_size(_Tab) ->
+    erlang:nif_error(nif_library_not_loaded).
+
+get_size(_Tab) ->
+    erlang:nif_error(nif_library_not_loaded).
+
+write(_Tab, _Key, _Value) ->
+    erlang:nif_error(nif_library_not_loaded).
+
+read(_Tab, _Key) ->
+    erlang:nif_error(nif_library_not_loaded).
+
+next(_Tab, _Key) ->
+    erlang:nif_error(nif_library_not_loaded).
+
+prev(_Tab, _Key) ->
+    erlang:nif_error(nif_library_not_loaded).
+
+remove(_Tab, _Key) ->
+    erlang:nif_error(nif_library_not_loaded).
+
+seek(_Tab, _Key) ->
+    erlang:nif_error(nif_library_not_loaded).
+
+iterate_next(_Tab, _It) ->
+    erlang:nif_error(nif_library_not_loaded).
+
+oldest(_Tab) ->
+    erlang:nif_error(nif_library_not_loaded).
+
+latest(_Tab) ->
+    erlang:nif_error(nif_library_not_loaded).
+
+close(_Tab) ->
+    erlang:nif_error(nif_library_not_loaded).
+
+last(_Tab) ->
+    erlang:nif_error(nif_library_not_loaded).
+
+first(_Tab) ->
+    erlang:nif_error(nif_library_not_loaded).
diff --git a/src/nifSrc/gb_lru/btree_lru_test.erl b/src/nifSrc/gb_lru/btree_lru_test.erl
new file mode 100644
index 0000000..8d5cd22
--- /dev/null
+++ b/src/nifSrc/gb_lru/btree_lru_test.erl
@@ -0,0 +1,59 @@
+-module(btree_lru_test).
+
+-compile(export_all).
+
+-export([create/0,
+	 create/1]).
+
+
+
+create() ->
+    create(1024*1024*1024*1000).
+
+create(Size) ->
+    {ok, _Tab} = btree_lru:create(Size).
+
+
+write(Tab) ->
+    Objs = [{X,X} || X <- lists:seq(1,10000000)],
+    write(Tab, Objs).
+
+write(Tab, [Obj | Objs]) ->
+    ok = btree_lru:write(Tab, Obj),
+    write(Tab, Objs);
+write(_Tab, []) ->
+    ok.
+
+read(Tab, [{K,D} | Objs]) ->
+    {K,D} = btree_lru:read(Tab, K),
+    read(Tab, Objs);
+read(_Tab, []) ->
+    ok.
+    
+timing_write(Tab) ->
+    Objs = [{X,X} || X <- lists:seq(1,10000000)],
+    timer:tc(?MODULE, write, [Tab, Objs]).
+timing_read(Tab) ->
+    Objs = [{X,X} || X <- lists:seq(1,10000000)],
+    timer:tc(?MODULE, read, [Tab, Objs]).
+    
+timing_ets_write(Tab) ->
+    Objs = [{X,X} || X <- lists:seq(1,10000000)],
+    timer:tc(?MODULE, ets_write, [Tab, Objs]).
+
+timing_ets_read(Tab) ->
+    Objs = [{X,X} || X <- lists:seq(1,10000000)],
+    timer:tc(?MODULE, ets_read, [Tab, Objs]).
+
+ets_write(Tab, [Obj | Objs]) ->
+    true = ets:insert(Tab, Obj),
+    ets_write(Tab, Objs);
+ets_write(_Tab, []) ->
+    ok.
+
+ets_read(Tab, [{K,D} | Objs]) ->
+    [{K,D}] = ets:lookup(Tab, K),
+    ets_read(Tab, Objs);
+ets_read(_Tab, []) ->
+    ok.
+    
diff --git a/src/nifSrc/gb_lru/gb_lru.app.src b/src/nifSrc/gb_lru/gb_lru.app.src
new file mode 100644
index 0000000..efff174
--- /dev/null
+++ b/src/nifSrc/gb_lru/gb_lru.app.src
@@ -0,0 +1,6 @@
+{application, gb_lru,
+ [{description, "gb_lru"},
+  {vsn, "0.1"},
+  {registered, []},
+  {applications, []}
+ ]}.
diff --git a/src/nifSrc/native_array/native_array.erl b/src/nifSrc/native_array/native_array.erl
new file mode 100644
index 0000000..09e7ab8
--- /dev/null
+++ b/src/nifSrc/native_array/native_array.erl
@@ -0,0 +1,19 @@
+-module(native_array).
+-export([new/2, get/2, put/3, delete/1]).
+-on_load(init/0).
+
+init() ->
+    ok = erlang:load_nif("./native_array_nif", 0).
+
+new(_Idx, _Length) ->
+    exit(nif_library_not_loaded).
+
+get(_Idx, _Offset) ->
+    exit(nif_library_not_loaded).
+
+put(_Idx, _Offset, _NewVal) ->
+    exit(nif_library_not_loaded).
+
+delete(_Idx) ->
+    exit(nif_library_not_loaded).
+