From 706eaa128982133ed6d8824d96ea853bb242f633 Mon Sep 17 00:00:00 2001 From: SisMaker <1713699517@qq.com> Date: Sun, 23 Feb 2020 00:05:46 +0800 Subject: [PATCH] some nif sample --- .gitignore | 5 +- c_src/.enq/enq_nif.c | 442 +++ c_src/.enq/fifo.h | 71 + c_src/.enq/lifo.h | 63 + c_src/.enq/rebar.config | 12 + c_src/bitmap_filter/bitmap_filter.c | 80 + c_src/{cq => bitmap_filter}/rebar.config | 6 +- c_src/bsn/bsn_ext.c | 448 +++ c_src/bsn/bsn_int.c | 331 ++ c_src/bsn/c_src/bsn_ext.c | 448 +++ c_src/bsn/c_src/bsn_int.c | 331 ++ c_src/{cq2 => bsn}/rebar.config | 11 +- c_src/couchdb_hqueue/c_src/hqueue.c | 318 ++ c_src/couchdb_hqueue/c_src/hqueue.d | 5 + c_src/couchdb_hqueue/c_src/hqueue.h | 60 + c_src/couchdb_hqueue/c_src/hqueue_nif.c | 601 +++ c_src/couchdb_hqueue/c_src/hqueue_nif.d | 5 + c_src/couchdb_hqueue/c_src/valgrind_sample.c | 72 + c_src/couchdb_hqueue/hqueue.c | 318 ++ c_src/couchdb_hqueue/hqueue.h | 60 + c_src/couchdb_hqueue/hqueue_nif.c | 601 +++ c_src/couchdb_hqueue/rebar.config | 13 + c_src/couchdb_hqueue/valgrind_sample.c | 72 + c_src/cq/cq_nif.c | 564 --- c_src/cq/cq_nif.h | 71 - c_src/cq1/cq_nif.c | 564 --- c_src/cq1/cq_nif.h | 71 - c_src/cq1/rebar.config | 26 - c_src/cq2/cq_nif.c | 564 --- c_src/cq2/cq_nif.h | 71 - c_src/enlfq/Makefile | 80 + c_src/enlfq/concurrentqueue.h | 3637 ++++++++++++++++++ c_src/enlfq/enlfq.cc | 84 + c_src/enlfq/enlfq.h | 10 + c_src/enlfq/enlfq_nif.cc | 57 + c_src/enlfq/enlfq_nif.h | 19 + c_src/enlfq/nif_utils.cc | 27 + c_src/enlfq/nif_utils.h | 6 + c_src/enlfq/rebar.config | 7 + c_src/etsq/etsq.cpp | 172 + c_src/etsq/etsq.h | 130 + c_src/etsq/rebar.config | 7 + c_src/gb_lru/binary.h | 103 + c_src/gb_lru/btree.h | 2394 ++++++++++++ c_src/gb_lru/btree_container.h | 349 ++ c_src/gb_lru/btree_map.h | 130 + c_src/gb_lru/btreelru_nif.cpp | 619 +++ c_src/gb_lru/erlterm.h | 71 + c_src/gb_lru/lru.h | 266 ++ c_src/gb_lru/murmurhash2.h | 73 + c_src/gb_lru/rebar.config | 7 + c_src/native_array/native_array_nif.c | 90 + c_src/native_array/rebar.config | 7 + c_src/neural/NeuralTable.cpp | 905 +++++ c_src/neural/NeuralTable.h | 121 + c_src/neural/neural.cpp | 134 + c_src/neural/neural_utils.cpp | 46 + c_src/neural/neural_utils.h | 9 + c_src/neural/rebar.config | 14 + src/dataType/utTermSize.erl | 2 +- src/nifSrc/bitmap_filter/bitmap_filter.erl | 20 + src/nifSrc/bsn/bsn.erl | 77 + src/nifSrc/bsn/bsn_ext.erl | 56 + src/nifSrc/bsn/bsn_int.erl | 45 + src/nifSrc/bsn/bsn_measure.erl | 236 ++ src/nifSrc/couchdb_hqeue/hqueue.erl | 160 + src/nifSrc/cq/cq.erl | 0 src/nifSrc/enlfq/enlfq.erl | 51 + src/nifSrc/enlfq/testing/benchmark.erl | 71 + src/nifSrc/enlfq/testing/multi_spawn.erl | 23 + src/nifSrc/enq/enq.erl | 159 + src/nifSrc/enq/enq_nif.erl | 63 + src/nifSrc/etsq/etsq.erl | 103 + src/nifSrc/etsq/etsq_tests.erl | 65 + src/nifSrc/gb_lru/btree_lru.erl | 102 + src/nifSrc/gb_lru/btree_lru_test.erl | 59 + src/nifSrc/gb_lru/gb_lru.app.src | 6 + src/nifSrc/native_array/native_array.erl | 19 + 78 files changed, 15255 insertions(+), 1940 deletions(-) create mode 100644 c_src/.enq/enq_nif.c create mode 100644 c_src/.enq/fifo.h create mode 100644 c_src/.enq/lifo.h create mode 100644 c_src/.enq/rebar.config create mode 100644 c_src/bitmap_filter/bitmap_filter.c rename c_src/{cq => bitmap_filter}/rebar.config (88%) create mode 100644 c_src/bsn/bsn_ext.c create mode 100644 c_src/bsn/bsn_int.c create mode 100644 c_src/bsn/c_src/bsn_ext.c create mode 100644 c_src/bsn/c_src/bsn_int.c rename c_src/{cq2 => bsn}/rebar.config (84%) create mode 100644 c_src/couchdb_hqueue/c_src/hqueue.c create mode 100644 c_src/couchdb_hqueue/c_src/hqueue.d create mode 100644 c_src/couchdb_hqueue/c_src/hqueue.h create mode 100644 c_src/couchdb_hqueue/c_src/hqueue_nif.c create mode 100644 c_src/couchdb_hqueue/c_src/hqueue_nif.d create mode 100644 c_src/couchdb_hqueue/c_src/valgrind_sample.c create mode 100644 c_src/couchdb_hqueue/hqueue.c create mode 100644 c_src/couchdb_hqueue/hqueue.h create mode 100644 c_src/couchdb_hqueue/hqueue_nif.c create mode 100644 c_src/couchdb_hqueue/rebar.config create mode 100644 c_src/couchdb_hqueue/valgrind_sample.c delete mode 100644 c_src/cq/cq_nif.c delete mode 100644 c_src/cq/cq_nif.h delete mode 100644 c_src/cq1/cq_nif.c delete mode 100644 c_src/cq1/cq_nif.h delete mode 100644 c_src/cq1/rebar.config delete mode 100644 c_src/cq2/cq_nif.c delete mode 100644 c_src/cq2/cq_nif.h create mode 100644 c_src/enlfq/Makefile create mode 100644 c_src/enlfq/concurrentqueue.h create mode 100644 c_src/enlfq/enlfq.cc create mode 100644 c_src/enlfq/enlfq.h create mode 100644 c_src/enlfq/enlfq_nif.cc create mode 100644 c_src/enlfq/enlfq_nif.h create mode 100644 c_src/enlfq/nif_utils.cc create mode 100644 c_src/enlfq/nif_utils.h create mode 100644 c_src/enlfq/rebar.config create mode 100644 c_src/etsq/etsq.cpp create mode 100644 c_src/etsq/etsq.h create mode 100644 c_src/etsq/rebar.config create mode 100644 c_src/gb_lru/binary.h create mode 100644 c_src/gb_lru/btree.h create mode 100644 c_src/gb_lru/btree_container.h create mode 100644 c_src/gb_lru/btree_map.h create mode 100644 c_src/gb_lru/btreelru_nif.cpp create mode 100644 c_src/gb_lru/erlterm.h create mode 100644 c_src/gb_lru/lru.h create mode 100644 c_src/gb_lru/murmurhash2.h create mode 100644 c_src/gb_lru/rebar.config create mode 100644 c_src/native_array/native_array_nif.c create mode 100644 c_src/native_array/rebar.config create mode 100644 c_src/neural/NeuralTable.cpp create mode 100644 c_src/neural/NeuralTable.h create mode 100644 c_src/neural/neural.cpp create mode 100644 c_src/neural/neural_utils.cpp create mode 100644 c_src/neural/neural_utils.h create mode 100644 c_src/neural/rebar.config create mode 100644 src/nifSrc/bitmap_filter/bitmap_filter.erl create mode 100644 src/nifSrc/bsn/bsn.erl create mode 100644 src/nifSrc/bsn/bsn_ext.erl create mode 100644 src/nifSrc/bsn/bsn_int.erl create mode 100644 src/nifSrc/bsn/bsn_measure.erl create mode 100644 src/nifSrc/couchdb_hqeue/hqueue.erl delete mode 100644 src/nifSrc/cq/cq.erl create mode 100644 src/nifSrc/enlfq/enlfq.erl create mode 100644 src/nifSrc/enlfq/testing/benchmark.erl create mode 100644 src/nifSrc/enlfq/testing/multi_spawn.erl create mode 100644 src/nifSrc/enq/enq.erl create mode 100644 src/nifSrc/enq/enq_nif.erl create mode 100644 src/nifSrc/etsq/etsq.erl create mode 100644 src/nifSrc/etsq/etsq_tests.erl create mode 100644 src/nifSrc/gb_lru/btree_lru.erl create mode 100644 src/nifSrc/gb_lru/btree_lru_test.erl create mode 100644 src/nifSrc/gb_lru/gb_lru.app.src create mode 100644 src/nifSrc/native_array/native_array.erl diff --git a/.gitignore b/.gitignore index 18df924..d566b05 100644 --- a/.gitignore +++ b/.gitignore @@ -22,4 +22,7 @@ priv .idea *.iml cmake-build* -CMakeLists.txt \ No newline at end of file +CMakeLists.txt + +*.pdb +compile_commands.json \ No newline at end of file diff --git a/c_src/.enq/enq_nif.c b/c_src/.enq/enq_nif.c new file mode 100644 index 0000000..a855a36 --- /dev/null +++ b/c_src/.enq/enq_nif.c @@ -0,0 +1,442 @@ +#define _GNU_SOURCE + +#include "erl_nif.h" + +#include +#include +#include +#include +#include + +// #include "fifo.h" +#include "lifo.h" + +typedef struct { + ERL_NIF_TERM ok; + ERL_NIF_TERM error; + ERL_NIF_TERM fifo; + ERL_NIF_TERM lifo; + ERL_NIF_TERM ttl; + ERL_NIF_TERM max_size; +} atoms_t; + +typedef struct { + ErlNifResourceType *queue; + atoms_t atoms; +} priv_t; + +typedef struct { + union { + fifo_handle_t fifo; + lifo_handle_t lifo; + } handle; + ErlNifBinary data; + struct timespec added; +} item_t; + +typedef enum { + QTYPE_FIFO = 0, + QTYPE_LIFO +} queue_type_t; + +typedef struct queue { + union { + fifo_t fifo; + lifo_t lifo; + } queue; + uint64_t ttl; + uint64_t max_size; + void (*push) (struct queue *inst, item_t *item); + item_t* (*pop) (struct queue *inst); + void (*free) (struct queue *inst); + uint64_t (*size) (struct queue *inst); + void (*cleanup) (struct queue *inst); +} queue_t; + +// returns tuple {error, atom()} +static inline ERL_NIF_TERM +make_error(ErlNifEnv* env, const char *error) { + priv_t *priv = (priv_t *) enif_priv_data(env); + + return enif_make_tuple2(env, priv->atoms.error, enif_make_atom(env, error)); +} + +// returns time diff in milliseconds +static inline int64_t +tdiff(struct timespec *t2, struct timespec *t1) { + return (t2->tv_sec * 1000 + t2->tv_nsec / 1000000UL) - + (t1->tv_sec * 1000 + t1->tv_nsec / 1000000UL); +} + +static inline void +gettime(struct timespec *tp) { + int rc = clock_gettime(CLOCK_MONOTONIC_RAW, tp); + assert(rc == 0); +} + +/******************************************************************************/ +/* FIFO callbacks */ +/******************************************************************************/ + +static void +cleanup_fifo(queue_t *inst) { + struct timespec now; + + gettime(&now); + + for (;;) { + item_t *item = NULL; + __fifo_peak(&inst->queue.fifo, item, handle.fifo); + + if (item == NULL) + return; + + int64_t diff = tdiff(&now, &item->added); + if (diff < inst->ttl) { + return; + } else { + __fifo_pop(&inst->queue.fifo, item, handle.fifo); + enif_release_binary(&item->data); + enif_free(item); + } + } +} + +static void +push_fifo(queue_t *inst, item_t *item) { + __fifo_push(&inst->queue.fifo, item, handle.fifo); +} + +static item_t * +pop_fifo(queue_t *inst) { + item_t *item = NULL; + + if (inst->ttl > 0) { + struct timespec now; + + gettime(&now); + + for (;;) { + __fifo_pop(&inst->queue.fifo, item, handle.fifo); + + if (item == NULL) + return NULL; + + int64_t diff = tdiff(&now, &item->added); + if (diff < inst->ttl) { + return item; + } else { + enif_release_binary(&item->data); + enif_free(item); + } + } + } else { + __fifo_pop(&inst->queue.fifo, item, handle.fifo); + } + + return item; +} + +static void +free_fifo(queue_t *inst) { + item_t *item; + + for(;;) { + __fifo_pop(&inst->queue.fifo, item, handle.fifo); + + if (item == NULL) + return; + + enif_release_binary(&item->data); + enif_free(item); + } +} + +static uint64_t +size_fifo(queue_t *inst) { + return fifo_length(&inst->queue.fifo); +} + +/******************************************************************************/ +/* LIFO callbacks */ +/******************************************************************************/ + +static void +cleanup_lifo(queue_t *inst) { + struct timespec now; + + gettime(&now); + + for(;;) { + item_t *item = inst->queue.lifo.tail; + + if (item == NULL) + return; + + int64_t diff = tdiff(&now, &item->added); + if (diff < inst->ttl) { + return; + } else { + item_t *prev = item->handle.lifo.prev; + + if (prev != NULL) + prev->handle.lifo.next = NULL; + + inst->queue.lifo.tail = prev; + + enif_release_binary(&item->data); + enif_free(item); + } + } +} + +static void +push_lifo(queue_t *inst, item_t *item) { + __lifo_push(&inst->queue.lifo, item, handle.lifo); +} + +static item_t * +pop_lifo(queue_t *inst) { + item_t *item = NULL; + + if (inst->ttl > 0) + cleanup_lifo(inst); + + __lifo_pop(&inst->queue.lifo, item, handle.lifo); + + return item; +} + +static void +free_lifo(queue_t *inst) { + item_t *item; + + for(;;) { + __lifo_pop(&inst->queue.lifo, item, handle.lifo); + + if (item == NULL) + return; + + enif_release_binary(&item->data); + enif_free(item); + } +} + +static uint64_t +size_lifo(queue_t *inst) { + return lifo_length(&inst->queue.lifo); +} + +/****************************************************************************** +** NIFs +*******************************************************************************/ + +static ERL_NIF_TERM +new_queue(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + if (!enif_is_list(env, argv[0])) + return enif_make_badarg(env); + + priv_t *priv = (priv_t *) enif_priv_data(env); + + queue_type_t qtype = QTYPE_FIFO; + unsigned long ttl = 0; + unsigned long max_size = 0; + + ERL_NIF_TERM settings_list = argv[0]; + ERL_NIF_TERM head; + + // parses proplist [fifo, lifo, {ttl, non_neg_integer()}, {max_size, non_neg_integer()}] + while(enif_get_list_cell(env, settings_list, &head, &settings_list)) + { + const ERL_NIF_TERM *items; + int arity; + + if (enif_is_atom(env, head)) { + if (enif_is_identical(head, priv->atoms.fifo)) { + qtype = QTYPE_FIFO; + } else if (enif_is_identical(head, priv->atoms.lifo)) { + qtype = QTYPE_LIFO; + } else { + return enif_make_badarg(env); + } + } else if (enif_get_tuple(env, head, &arity, &items) && arity == 2) { + if (enif_is_identical(items[0], priv->atoms.ttl)) { + if (!enif_get_ulong(env, items[1], &ttl)) { + return enif_make_badarg(env); + } + } else if (enif_is_identical(items[0], priv->atoms.max_size)) { + if (!enif_get_ulong(env, items[1], &max_size)) { + return enif_make_badarg(env); + } + } else { + return enif_make_badarg(env); + } + } else { + return enif_make_badarg(env); + } + } + + queue_t *inst = (queue_t *) enif_alloc_resource(priv->queue, sizeof(*inst)); + + if (inst == NULL) + return make_error(env, "enif_alloc_resource"); + + inst->ttl = ttl; + inst->max_size = max_size; + + switch (qtype) { + case QTYPE_FIFO: + fifo_init(&inst->queue.fifo); + inst->push = &push_fifo; + inst->pop = &pop_fifo; + inst->free = &free_fifo; + inst->size = &size_fifo; + inst->cleanup = &cleanup_fifo; + break; + case QTYPE_LIFO: + lifo_init(&inst->queue.lifo); + inst->push = &push_lifo; + inst->pop = &pop_lifo; + inst->free = &free_lifo; + inst->size = &size_lifo; + inst->cleanup = &cleanup_lifo; + break; + } + + ERL_NIF_TERM result = enif_make_resource(env, inst); + enif_release_resource(inst); + + return enif_make_tuple2(env, priv->atoms.ok, result); +} + +static ERL_NIF_TERM +push_item(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + priv_t *priv = (priv_t *) enif_priv_data(env); + + queue_t *inst; + + if (!enif_get_resource(env, argv[0], priv->queue, (void**) &inst)) + return enif_make_badarg(env); + + // todo: check an owner of the queue + + ErlNifBinary bin; + if (!enif_inspect_binary(env, argv[1], &bin)) + return enif_make_badarg(env); + + if (inst->ttl > 0) { + inst->cleanup(inst); + } + + if (inst->max_size > 0 && inst->size(inst) >= inst->max_size) { + return enif_make_tuple2(env, priv->atoms.error, priv->atoms.max_size); + } + + item_t *item = (item_t *) enif_alloc(sizeof(*item)); + + if (item == NULL) + return make_error(env, "enif_alloc"); + + if (!enif_alloc_binary(bin.size, &item->data)) { + enif_free(item); + return make_error(env, "enif_alloc_binary"); + } + + memcpy(item->data.data, bin.data, bin.size); + + if (inst->ttl > 0) { + gettime(&item->added); + } + + inst->push(inst, item); + return priv->atoms.ok; +} + +static ERL_NIF_TERM +pop_item(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + priv_t *priv = (priv_t *) enif_priv_data(env); + + queue_t *inst; + item_t *item; + + if (!enif_get_resource(env, argv[0], priv->queue, (void**) &inst)) + return enif_make_badarg(env); + + // todo: check an owner of the queue + + item = inst->pop(inst); + + if (item == NULL) + return enif_make_list(env, 0); + + ERL_NIF_TERM result = enif_make_binary(env, &item->data); + + enif_free(item); + + return enif_make_list1(env, result); +} + +static ERL_NIF_TERM +queue_size(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + priv_t *priv = (priv_t *) enif_priv_data(env); + + queue_t *inst; + + if (!enif_get_resource(env, argv[0], priv->queue, (void**) &inst)) + return enif_make_badarg(env); + + return enif_make_uint64(env, inst->size(inst)); +} + +/****************************************************************************** +** NIF initialization +*******************************************************************************/ + +static void +enq_queue_free(ErlNifEnv* env, void* obj) { + queue_t *inst = obj; + inst->free(inst); +} + +static priv_t * +make_priv(ErlNifEnv *env) { + priv_t *priv = enif_alloc(sizeof(*priv)); + + if (priv == NULL) + return NULL; + + ErlNifResourceFlags flags = ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER; + priv->queue = enif_open_resource_type(env, NULL, "enq_queue", enq_queue_free, flags, NULL); + + priv->atoms.ok = enif_make_atom(env, "ok"); + priv->atoms.error = enif_make_atom(env, "error"); + priv->atoms.fifo = enif_make_atom(env, "fifo"); + priv->atoms.lifo = enif_make_atom(env, "lifo"); + priv->atoms.ttl = enif_make_atom(env, "ttl"); + priv->atoms.max_size = enif_make_atom(env, "max_size"); + + return priv; +} + +static int +enq_nif_load(ErlNifEnv *env, void **priv_data, ERL_NIF_TERM load_info) { + *priv_data = make_priv(env); + + return 0; +} + +static int +enq_nif_upgrade(ErlNifEnv *env, void **priv_data, void **old_priv_data, ERL_NIF_TERM load_info) { + *priv_data = make_priv(env); + + return 0; +} + +static ErlNifFunc enq_nif_funcs[] = { + {"new", 1, new_queue}, + {"push", 2, push_item}, + {"pop", 1, pop_item}, + {"size", 1, queue_size}, +}; + +ERL_NIF_INIT(enq_nif, enq_nif_funcs, enq_nif_load, NULL, enq_nif_upgrade, NULL) \ No newline at end of file diff --git a/c_src/.enq/fifo.h b/c_src/.enq/fifo.h new file mode 100644 index 0000000..ec45eed --- /dev/null +++ b/c_src/.enq/fifo.h @@ -0,0 +1,71 @@ +#ifndef _FIFO_H +#define _FIFO_H + +/* Main FIFO structure. Allocate memory for it yourself. */ +typedef struct fifo_t { + void *head; + void *tail; + unsigned long long count; +} fifo_t; + +typedef struct fifo_handle_t { + void *next; +} fifo_handle_t; + +/* Initializes fifo structure. */ +#define fifo_init(fifo) \ +do { \ + fifo_t *__q = fifo; \ + __q->head = NULL; \ + __q->tail = NULL; \ + __q->count = 0; \ +} while (0) + +#define __fifo_push(fifo, p, h) \ +do { \ + fifo_t *__q = fifo; \ + __typeof__ (p) e = p; \ + e->h.next = NULL; \ + if (__q->tail == NULL) { \ + __q->head = e; \ + } else { \ + __typeof__ (e) t = __q->tail; \ + t->h.next = e; \ + } \ + __q->tail = e; \ + __q->count++; \ +} while (0) + +/* Puts an element to the queue. */ +#define fifo_push(fifo, p) __fifo_push (fifo, p, fifo_handle) + +#define __fifo_pop(fifo, p, h) \ +do { \ + fifo_t *__q = fifo; \ + p = __q->head; \ + if (p != NULL) { \ + __q->count--; \ + __q->head = p->h.next; \ + if (__q->tail == p) \ + __q->tail = NULL; \ + } \ +} while (0) + +/* Pops the first element out of the queue. */ +#define fifo_pop(fifo, p) __fifo_pop (fifo, p, fifo_handle) + +#define __fifo_peak(fifo, p, h) \ +do { \ + p = (fifo)->head; \ +} while (0) + +/* Returns the first elemnt of the queue without removing. */ +#define fifo_peak(fifo, p) __fifo_peak (fifo, p, fifo_handle) + +/* Returns the length of the queue. */ +#define fifo_length(fifo) ((fifo)->count) + +/* Returns true if the queue is empty. */ +#define fifo_empty(fifo) ((fifo)->count == 0) + +#endif /* _FIFO_H */ diff --git a/c_src/.enq/lifo.h b/c_src/.enq/lifo.h new file mode 100644 index 0000000..8e57c06 --- /dev/null +++ b/c_src/.enq/lifo.h @@ -0,0 +1,63 @@ +#ifndef _LIFO_H +#define _LIFO_H + +typedef struct lifo_t { + void *head; + void *tail; + unsigned long long count; +} lifo_t; + +typedef struct lifo_handle_t { + void *next; + void *prev; +} lifo_handle_t; + +#define lifo_init(lifo) \ +do { \ + lifo_t *__q = lifo; \ + __q->head = NULL; \ + __q->tail = NULL; \ + __q->count = 0; \ +} while (0) + +#define __lifo_push(lifo, p, h) \ +do { \ + lifo_t *__q = lifo; \ + __typeof__ (p) e = p; \ + e->h.next = __q->head; \ + e->h.prev = NULL; \ + if (__q->head == NULL) { \ + __q->tail = e; \ + } else { \ + __typeof__ (e) t = __q->head; \ + t->h.prev = e; \ + } \ + __q->head = e; \ + __q->count++; \ +} while (0) + +#define lifo_push(lifo, p) __lifo_push (lifo, p, lifo_handle) + +#define __lifo_pop(lifo, p, h) \ +do { \ + lifo_t *__q = lifo; \ + p = __q->head; \ + if (p != NULL) { \ + __q->count--; \ + __q->head = p->h.next; \ + if (__q->head != NULL) { \ + __typeof__ (p) t = __q->head; \ + t->h.prev = NULL; \ + } else { \ + __q->tail = NULL; \ + } \ + } \ +} while (0) + +#define lifo_pop(lifo, p) __lifo_pop (lifo, p, lifo_handle) + +#define lifo_length(lifo) ((lifo)->count) + +#define lifo_empty(lifo) ((lifo)->count == 0) + +#endif /* _LIFO_H */ diff --git a/c_src/.enq/rebar.config b/c_src/.enq/rebar.config new file mode 100644 index 0000000..95b16e5 --- /dev/null +++ b/c_src/.enq/rebar.config @@ -0,0 +1,12 @@ +{port_specs, [ + {"../../priv/enq_nif.so", ["*.c"]} +]}. + +% {port_env, [ +% {"LDFLAGS", "$ERL_LDFLAGS -lrt"}, +% {"CFLAGS", "$CFLAGS --std=gnu99 -Wall -O3"} +% ]}. + + + + diff --git a/c_src/bitmap_filter/bitmap_filter.c b/c_src/bitmap_filter/bitmap_filter.c new file mode 100644 index 0000000..8ea2fe7 --- /dev/null +++ b/c_src/bitmap_filter/bitmap_filter.c @@ -0,0 +1,80 @@ +#include + +/* + This function expects a list of list of tuples of type {int, _}. + It filters the tuples, using the first int field as a key, + and removing duplicating keys with precedence given the the order + in which they were seen (first given precedence). +*/ +static ERL_NIF_TERM +bitmap_filter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + size_t seen_forklift_id[3000] = { 0 }; + + if(argc != 1) + { + return enif_make_badarg(env); + } + + if(!enif_is_list(env, argv[0])) + { + return enif_make_badarg(env); + } + + ERL_NIF_TERM ret = enif_make_list(env, 0); + + ERL_NIF_TERM outer_list = argv[0]; + ERL_NIF_TERM inner_list; + + ERL_NIF_TERM inner_head; + + const ERL_NIF_TERM* tuple_elems; + int num_elems; + unsigned int key; + + while(enif_get_list_cell(env, outer_list, &inner_list, &outer_list)) + { + if(!enif_is_list(env, inner_list)) + { + return enif_make_badarg(env); + } + + while(enif_get_list_cell(env, inner_list, &inner_head, &inner_list)) + { + if(!enif_get_tuple(env, inner_head, &num_elems, &tuple_elems)) + { + return enif_make_badarg(env); + } + + if(num_elems != 2) + { + return enif_make_badarg(env); + } + + if(!enif_get_uint(env, tuple_elems[0], &key)) + { + return enif_make_badarg(env); + } + + if(key >= 3000) + { + return enif_make_badarg(env); + } + + if(!seen_forklift_id[key]) + { + seen_forklift_id[key] = 1; + ret = enif_make_list_cell(env, inner_head, ret); + } + } + } + + return ret; +} + +static ErlNifFunc nif_funcs[] = +{ + {"filter", 1, bitmap_filter, 0} +}; + +ERL_NIF_INIT(bitmap_filter, nif_funcs, NULL, NULL, NULL, NULL) diff --git a/c_src/cq/rebar.config b/c_src/bitmap_filter/rebar.config similarity index 88% rename from c_src/cq/rebar.config rename to c_src/bitmap_filter/rebar.config index c55438b..0dfa1a9 100644 --- a/c_src/cq/rebar.config +++ b/c_src/bitmap_filter/rebar.config @@ -1,9 +1,9 @@ {port_specs, [ - {"../../priv/cq.so", [ - "*.c", - "*.cc" + {"../../priv/bitmap_filter.so", [ + "*.c" ]} ]}. +%{port_specs, [{"../../priv/granderl.so", []}]}. %% {port_env, [ %% {"(linux|solaris|freebsd|netbsd|openbsd|dragonfly|darwin|gnu)", diff --git a/c_src/bsn/bsn_ext.c b/c_src/bsn/bsn_ext.c new file mode 100644 index 0000000..2ea8d9a --- /dev/null +++ b/c_src/bsn/bsn_ext.c @@ -0,0 +1,448 @@ +#include "erl_nif.h" + +ErlNifResourceType* bsn_type; +ERL_NIF_TERM ATOM_TRUE, ATOM_FALSE; + +/* +typedef struct { + unsigned size; + unsigned char* data; +} ErlNifBinary; + +*/ + +struct bsn_elem_struct { + ErlNifBinary bin; + struct bsn_elem_struct* next; +}; +typedef struct bsn_elem_struct bsn_elem; + +typedef bsn_elem* bsn_list; + +typedef struct { + unsigned int count; /* count of elements */ + unsigned int max; /* count of slots */ + ErlNifMutex *mutex; + bsn_list* list; +} bsn_res; + + +inline static ERL_NIF_TERM bool_to_term(int value) { + return value ? ATOM_TRUE : ATOM_FALSE; +} + +/* Calculate the sum of chars. */ +unsigned int +private_hash(const ErlNifBinary* b, unsigned int max) +{ + unsigned char* ptr; + unsigned int i, sum = 0; + + ptr = b->data; + i = b->size; + + for (; i; i--, ptr++) + sum += *ptr; + + return sum % max; +} + +inline void +private_clear_elem(bsn_elem* el) +{ + enif_release_binary(&(el->bin)); + enif_free(el); +} + +inline void +private_chain_clear_all(bsn_elem* ptr) +{ + bsn_elem* next; + + while (ptr != NULL) { + + next = ptr->next; + private_clear_elem(ptr); + ptr = next; + } +} + +inline int +private_compare(ErlNifBinary* b1, ErlNifBinary* b2) +{ + unsigned char* p1; + unsigned char* p2; + unsigned len; + + if (b1->size != b2->size) + return 0; + + p1 = b1->data; + p2 = b2->data; + len = b1->size; + + while (len) { + if ((*p1) != (*p2)) + return 0; + + len--; p1++; p2++; + } + return 1; +} + +/* Skip existing elements. If the element bin is not found, return last element. + * If el.bin == bin, return el. */ +bsn_elem* +private_chain_shift(bsn_elem* ptr, ErlNifBinary* bin, int* num_ptr) +{ + (*num_ptr)++; + if ((ptr) == NULL) + return ptr; + + while (1) { + if (private_compare(&(ptr->bin), bin)) { + /* found an equal binary. Invert num */ + (*num_ptr) *= -1; + return ptr; + } + if ((ptr->next) == NULL) + return ptr; + ptr = ptr->next; + (*num_ptr)++; + } +} + +/* Append the element `el' to the chain `chain' */ +void +private_chain_append(bsn_elem** chain, bsn_elem* el, int* num_ptr) +{ + bsn_elem* last; + + if ((*chain) == NULL) { + /* The new element is last */ + *chain = el; + } else { + last = private_chain_shift(*chain, &(el->bin), num_ptr); + if ((*num_ptr) < 0) { + /* Element was already added. */ + private_clear_elem(el); + } else { + last->next = el; + } + } +} + +bsn_elem* +private_chain_shift_clear(bsn_elem** ptr, ErlNifBinary* bin, int* num_ptr) +{ + bsn_elem** prev = NULL; + bsn_elem* el; + + while ((*ptr) != NULL) { + if (private_compare(&((*ptr)->bin), bin)) { + (*num_ptr) *= -1; + + /* found an equal binary. Delete elem. Invert num */ + if (prev == NULL) { + el = *ptr; + (*ptr) = (*ptr)->next; + return el; + } + *prev = (*ptr)->next; + return *ptr; + } + prev = ptr; + el = *ptr; + ptr = (bsn_elem**) &(el->next); + (*num_ptr)++; + } + + return NULL; +} + +static ERL_NIF_TERM +bsn_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + unsigned int max; + bsn_list* ptr; + bsn_res* r; + + if (!(enif_get_uint(env, argv[0], &max) && (max>0))) + return enif_make_badarg(env); + + ptr = enif_alloc(sizeof(bsn_list) * max); + if (ptr == NULL) + return enif_make_badarg(env); + + r = (bsn_res*) enif_alloc_resource(bsn_type, sizeof(bsn_res)); + r->mutex = enif_mutex_create("Mutex for the BSN writer"); + r->count = 0; + r->max = max; + r->list = ptr; + + for (; max; max--, ptr++) + *ptr = NULL; + + return enif_make_resource(env, r); +} + +static ERL_NIF_TERM +bsn_add(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + ErlNifBinary bin; + bsn_res* r; + unsigned int pos; + int num = 0; + bsn_elem* elem_ptr; + + if (!(enif_get_resource(env, argv[0], bsn_type, (void**) &r) + && enif_inspect_binary(env, argv[1], &bin))) + return enif_make_badarg(env); + + enif_realloc_binary(&bin, bin.size); + pos = private_hash(&bin, r->max); + + elem_ptr = enif_alloc(sizeof(bsn_elem)); + if (elem_ptr == NULL) + return enif_make_badarg(env); + + elem_ptr->next = NULL; + elem_ptr->bin = bin; + + enif_mutex_lock(r->mutex); + private_chain_append(&(r->list[pos]), elem_ptr, &num); + if (num >= 0) + (r->count)++; + enif_mutex_unlock(r->mutex); + + /* Already added */ + if (num < 0) + enif_release_binary(&(bin)); + + return enif_make_int(env, num); +} + +static ERL_NIF_TERM +bsn_search(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + ErlNifBinary bin; + bsn_res* r; + unsigned int pos; + int num = 0; + + if (!(enif_get_resource(env, argv[0], bsn_type, (void**) &r) + && enif_inspect_binary(env, argv[1], &bin))) + return enif_make_badarg(env); + + pos = private_hash(&bin, r->max); + + enif_mutex_lock(r->mutex); + private_chain_shift(r->list[pos], &bin, &num); + enif_mutex_unlock(r->mutex); + + return enif_make_int(env, num); +} + +static ERL_NIF_TERM +bsn_clear(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + ErlNifBinary bin; + bsn_res* r; + unsigned int pos; + int num = 0; + bsn_elem* elem_ptr; + + if (!(enif_get_resource(env, argv[0], bsn_type, (void**) &r) + && enif_inspect_binary(env, argv[1], &bin))) + return enif_make_badarg(env); + + pos = private_hash(&bin, r->max); + + enif_mutex_lock(r->mutex); + elem_ptr = private_chain_shift_clear(&(r->list[pos]), &bin, &num); + if (elem_ptr != NULL) { + private_clear_elem(elem_ptr); + (r->count)--; + } + enif_mutex_unlock(r->mutex); + + return enif_make_int(env, num); +} + +static ERL_NIF_TERM +bsn_all_chain(ErlNifEnv* env, bsn_elem* e, ERL_NIF_TERM tail) +{ + ERL_NIF_TERM head; + ErlNifBinary bin; + while (e != NULL) { + bin = e->bin; + enif_realloc_binary(&bin, bin.size); + head = enif_make_binary(env, &bin); + tail = enif_make_list_cell(env, head, tail); + e = e->next; + } + return tail; +} + +static ERL_NIF_TERM +bsn_chains(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + bsn_res* r; + unsigned int max; + bsn_list* ptr; + ERL_NIF_TERM tail, head; + + if (!enif_get_resource(env, argv[0], bsn_type, (void**) &r)) + return enif_make_badarg(env); + tail = enif_make_list(env, 0); + + ptr = r->list; + + enif_mutex_lock(r->mutex); + max = r->max; + + while (max) { + head = enif_make_list(env, 0); + head = bsn_all_chain(env, *ptr, head); + tail = enif_make_list_cell(env, head, tail); + + ptr++; + max--; + } + enif_mutex_unlock(r->mutex); + + return tail; +} + +static ERL_NIF_TERM +bsn_all(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + bsn_res* r; + unsigned int max; + bsn_list* ptr; + ERL_NIF_TERM list; + + if (!enif_get_resource(env, argv[0], bsn_type, (void**) &r)) + return enif_make_badarg(env); + list = enif_make_list(env, 0); + + ptr = r->list; + + enif_mutex_lock(r->mutex); + max = r->max; + + while (max) { + list = bsn_all_chain(env, *ptr, list); + ptr++; + max--; + } + enif_mutex_unlock(r->mutex); + + return list; +} + + +static ERL_NIF_TERM +bsn_count(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + bsn_res* r; + + if (!enif_get_resource(env, argv[0], bsn_type, (void**) &r)) + return enif_make_badarg(env); + + return enif_make_int(env, r->count); +} + + +static ERL_NIF_TERM +bsn_hash(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + ErlNifBinary bin; + unsigned int max; + + if (!(enif_inspect_binary(env, argv[0], &bin) + && enif_get_uint(env, argv[1], &max) && (max>0))) + return enif_make_badarg(env); + + return enif_make_uint(env, + private_hash(&bin, max)); +} + + +static ERL_NIF_TERM +bsn_compare(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + ErlNifBinary b1, b2; + + if (!(enif_inspect_binary(env, argv[0], &b1) + && enif_inspect_binary(env, argv[1], &b2))) + return enif_make_badarg(env); + + return bool_to_term(private_compare(&b1, &b2)); +} + +void private_clear_all(bsn_res* r) +{ + unsigned int max; + bsn_list* ptr; + max = r->max; + ptr = r->list; + + while (max) { + private_chain_clear_all(*ptr); + ptr++; + max--; + } +} + +void +bsn_type_dtor(ErlNifEnv* env, void* obj) +{ + bsn_res* r = (bsn_res*) obj; + private_clear_all(r); + enif_mutex_destroy(r->mutex); + enif_free(r->list); +} + + + +int +on_load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info) +{ + ATOM_TRUE = enif_make_atom(env, "true"); + ATOM_FALSE = enif_make_atom(env, "false"); + + ErlNifResourceFlags flags = (ErlNifResourceFlags)(ERL_NIF_RT_CREATE | + ERL_NIF_RT_TAKEOVER); + + bsn_type = enif_open_resource_type(env, NULL, "bsn_type", + bsn_type_dtor, flags, NULL); + + if (bsn_type == NULL) return 1; + + return 0; +} + + +int +on_upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM info) +{ + return 0; +} + + +static ErlNifFunc nif_functions[] = { + {"new", 1, bsn_new}, + {"add", 2, bsn_add}, + {"all", 1, bsn_all}, + {"chains", 1, bsn_chains}, + {"in", 2, bsn_search}, + {"clear", 2, bsn_clear}, + {"count", 1, bsn_count}, + + {"hash", 2, bsn_hash}, + {"compare", 2, bsn_compare}, +}; + + +ERL_NIF_INIT(bsn_ext, nif_functions, &on_load, &on_load, &on_upgrade, NULL); diff --git a/c_src/bsn/bsn_int.c b/c_src/bsn/bsn_int.c new file mode 100644 index 0000000..30e2944 --- /dev/null +++ b/c_src/bsn/bsn_int.c @@ -0,0 +1,331 @@ +#include "erl_nif.h" + + +ErlNifResourceType* bsn_type; +ERL_NIF_TERM ATOM_TRUE, ATOM_FALSE, ATOM_NO_MORE; + +struct bsn_elem_struct { + ErlNifBinary bin; + unsigned int hash; +}; +typedef struct bsn_elem_struct bsn_elem; + + +typedef struct { + unsigned int count; /* count of elements */ + unsigned int max; /* count of slots */ + ErlNifMutex *mutex; + bsn_elem* list; + unsigned int (*next_pos) + (void*, unsigned int, unsigned int); +} bsn_res; + + +inline static ERL_NIF_TERM bool_to_term(int value) { + return value ? ATOM_TRUE : ATOM_FALSE; +} + +unsigned int next_pos_linear(bsn_res* r, unsigned int hash, unsigned int step) { + return (hash + step) % (r->max); +} + +unsigned int next_pos_quadric(bsn_res* r, unsigned int hash, unsigned int step) { + return (hash + (step*step)) % (r->max); +} + +/* Calculate the sum of chars. */ +unsigned int +private_hash(const ErlNifBinary* b, unsigned int max) +{ + unsigned char* ptr; + unsigned int i, sum = 0; + + ptr = b->data; + i = b->size; + + for (; i; i--, ptr++) + sum += *ptr; + + return sum % max; +} + + + +inline int +private_compare(ErlNifBinary* b1, ErlNifBinary* b2) +{ + unsigned char* p1; + unsigned char* p2; + unsigned len; + + if (b1->size != b2->size) + return 0; + + p1 = b1->data; + p2 = b2->data; + len = b1->size; + + while (len) { + if ((*p1) != (*p2)) + return 0; + + len--; p1++; p2++; + } + return 1; +} + + +static ERL_NIF_TERM +bsn_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + int max; /* This value will be set by a client: + if (max<0) -> use quadric algorithm */ + bsn_elem* ptr; + bsn_res* r; + + if (!enif_get_int(env, argv[0], &max) || (max == 0)) + return enif_make_badarg(env); + + + r = (bsn_res*) enif_alloc_resource(bsn_type, sizeof(bsn_res)); + r->mutex = enif_mutex_create("Mutex for the BSN writer"); + r->count = 0; + + /* Select an algorithm */ + if (max>0) { + r->next_pos = &next_pos_linear; + } else if (max<0) { + r->next_pos = &next_pos_quadric; + max *= -1; + } + /* Now max is cells' count in the array. */ + r->max = (unsigned int) max; + + ptr = enif_alloc(sizeof(bsn_elem) * max); + if (ptr == NULL) + return enif_make_badarg(env); + r->list = ptr; + + for (; max; max--, ptr++) + ptr->hash = r->max; + + + return enif_make_resource(env, r); +} + +static ERL_NIF_TERM +bsn_add(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + ErlNifBinary bin; + bsn_res* r; + unsigned int pos, hash, max; + int num = 0; + bsn_elem* elem_ptr; + + if (!(enif_get_resource(env, argv[0], bsn_type, (void**) &r) + && enif_inspect_binary(env, argv[1], &bin))) + return enif_make_badarg(env); + + enif_realloc_binary(&bin, bin.size); + hash = pos = private_hash(&bin, r->max); + + + enif_mutex_lock(r->mutex); + max = r->max; + + while (num < max) { + elem_ptr = &(r->list[pos]); + /* Found free space */ + if (elem_ptr->hash == max) { + elem_ptr->bin = bin; + elem_ptr->hash = hash; + break; + } + + + /* Found elem */ + if ((elem_ptr->hash == hash) + && private_compare(&bin, &(elem_ptr->bin))) { + num *= -1; + break; + } + + pos = (r->next_pos)(r, hash, num); + num++; + } + if ((num >= 0) && (num < max)) + (r->count)++; + + enif_mutex_unlock(r->mutex); + + /* Error: already added or owerflow */ + if (!((num >= 0) && (num < max))) + enif_release_binary(&bin); + + if (num >= max) + return ATOM_NO_MORE; + + return enif_make_int(env, num); +} + +static ERL_NIF_TERM +bsn_search(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + ErlNifBinary bin; + bsn_res* r; + unsigned int pos, max, hash; + int num = 1; + bsn_elem* elem_ptr; + + if (!(enif_get_resource(env, argv[0], bsn_type, (void**) &r) + && enif_inspect_binary(env, argv[1], &bin))) + return enif_make_badarg(env); + + hash = pos = private_hash(&bin, r->max); + + enif_mutex_lock(r->mutex); + max = r->max; + + while (num < max) { + elem_ptr = &(r->list[pos]); + /* Found free space */ + if (elem_ptr->hash == max) { + break; + } + + + /* Found elem */ + if ((elem_ptr->hash == hash) + && private_compare(&bin, &(elem_ptr->bin))) { + num *= -1; + break; + } + + pos = (r->next_pos)(r, hash, num); + num++; + } + enif_mutex_unlock(r->mutex); + + return enif_make_int(env, num); +} + +static ERL_NIF_TERM +bsn_clear(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + return enif_make_badarg(env); +} + + +static ERL_NIF_TERM +bsn_all(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + bsn_res* r; + unsigned int max, pos = 0; + ERL_NIF_TERM head, tail; + ErlNifBinary bin; + bsn_elem* elem_ptr; + + if (!enif_get_resource(env, argv[0], bsn_type, (void**) &r)) + return enif_make_badarg(env); + tail = enif_make_list(env, 0); + + enif_mutex_lock(r->mutex); + max = r->max; + elem_ptr = r->list; + + do { + + if (elem_ptr->hash != max) { + bin = elem_ptr->bin; + enif_realloc_binary(&bin, bin.size); + head = enif_make_binary(env, &bin); + tail = enif_make_list_cell(env, head, tail); + } + + elem_ptr++; + pos++; + } while (pos < max); + + enif_mutex_unlock(r->mutex); + + return tail; +} + + +static ERL_NIF_TERM +bsn_count(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + bsn_res* r; + + if (!enif_get_resource(env, argv[0], bsn_type, (void**) &r)) + return enif_make_badarg(env); + + return enif_make_int(env, r->count); +} + + +void private_clear_all(bsn_res* r) +{ + unsigned int max, num; + bsn_elem* ptr; + num = max = r->max; + ptr = r->list; + + while (num) { + if (ptr->hash != max) { + enif_release_binary(&(ptr->bin)); + } + ptr++; + num--; + } +} + +void +bsn_type_dtor(ErlNifEnv* env, void* obj) +{ + bsn_res* r = (bsn_res*) obj; + private_clear_all(r); + enif_mutex_destroy(r->mutex); + enif_free(r->list); +} + + + +int +on_load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info) +{ + ATOM_TRUE = enif_make_atom(env, "true"); + ATOM_FALSE = enif_make_atom(env, "false"); + ATOM_NO_MORE = enif_make_atom(env, "no_more"); + + + ErlNifResourceFlags flags = (ErlNifResourceFlags)(ERL_NIF_RT_CREATE | + ERL_NIF_RT_TAKEOVER); + + bsn_type = enif_open_resource_type(env, NULL, "bsn_type", + bsn_type_dtor, flags, NULL); + + if (bsn_type == NULL) return 1; + + return 0; +} + + +int +on_upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM info) +{ + return 0; +} + + +static ErlNifFunc nif_functions[] = { + {"new", 1, bsn_new}, + {"add", 2, bsn_add}, + {"all", 1, bsn_all}, + {"in", 2, bsn_search}, + {"clear", 2, bsn_clear}, + {"count", 1, bsn_count}, +}; + + +ERL_NIF_INIT(bsn_int, nif_functions, &on_load, &on_load, &on_upgrade, NULL); diff --git a/c_src/bsn/c_src/bsn_ext.c b/c_src/bsn/c_src/bsn_ext.c new file mode 100644 index 0000000..2ea8d9a --- /dev/null +++ b/c_src/bsn/c_src/bsn_ext.c @@ -0,0 +1,448 @@ +#include "erl_nif.h" + +ErlNifResourceType* bsn_type; +ERL_NIF_TERM ATOM_TRUE, ATOM_FALSE; + +/* +typedef struct { + unsigned size; + unsigned char* data; +} ErlNifBinary; + +*/ + +struct bsn_elem_struct { + ErlNifBinary bin; + struct bsn_elem_struct* next; +}; +typedef struct bsn_elem_struct bsn_elem; + +typedef bsn_elem* bsn_list; + +typedef struct { + unsigned int count; /* count of elements */ + unsigned int max; /* count of slots */ + ErlNifMutex *mutex; + bsn_list* list; +} bsn_res; + + +inline static ERL_NIF_TERM bool_to_term(int value) { + return value ? ATOM_TRUE : ATOM_FALSE; +} + +/* Calculate the sum of chars. */ +unsigned int +private_hash(const ErlNifBinary* b, unsigned int max) +{ + unsigned char* ptr; + unsigned int i, sum = 0; + + ptr = b->data; + i = b->size; + + for (; i; i--, ptr++) + sum += *ptr; + + return sum % max; +} + +inline void +private_clear_elem(bsn_elem* el) +{ + enif_release_binary(&(el->bin)); + enif_free(el); +} + +inline void +private_chain_clear_all(bsn_elem* ptr) +{ + bsn_elem* next; + + while (ptr != NULL) { + + next = ptr->next; + private_clear_elem(ptr); + ptr = next; + } +} + +inline int +private_compare(ErlNifBinary* b1, ErlNifBinary* b2) +{ + unsigned char* p1; + unsigned char* p2; + unsigned len; + + if (b1->size != b2->size) + return 0; + + p1 = b1->data; + p2 = b2->data; + len = b1->size; + + while (len) { + if ((*p1) != (*p2)) + return 0; + + len--; p1++; p2++; + } + return 1; +} + +/* Skip existing elements. If the element bin is not found, return last element. + * If el.bin == bin, return el. */ +bsn_elem* +private_chain_shift(bsn_elem* ptr, ErlNifBinary* bin, int* num_ptr) +{ + (*num_ptr)++; + if ((ptr) == NULL) + return ptr; + + while (1) { + if (private_compare(&(ptr->bin), bin)) { + /* found an equal binary. Invert num */ + (*num_ptr) *= -1; + return ptr; + } + if ((ptr->next) == NULL) + return ptr; + ptr = ptr->next; + (*num_ptr)++; + } +} + +/* Append the element `el' to the chain `chain' */ +void +private_chain_append(bsn_elem** chain, bsn_elem* el, int* num_ptr) +{ + bsn_elem* last; + + if ((*chain) == NULL) { + /* The new element is last */ + *chain = el; + } else { + last = private_chain_shift(*chain, &(el->bin), num_ptr); + if ((*num_ptr) < 0) { + /* Element was already added. */ + private_clear_elem(el); + } else { + last->next = el; + } + } +} + +bsn_elem* +private_chain_shift_clear(bsn_elem** ptr, ErlNifBinary* bin, int* num_ptr) +{ + bsn_elem** prev = NULL; + bsn_elem* el; + + while ((*ptr) != NULL) { + if (private_compare(&((*ptr)->bin), bin)) { + (*num_ptr) *= -1; + + /* found an equal binary. Delete elem. Invert num */ + if (prev == NULL) { + el = *ptr; + (*ptr) = (*ptr)->next; + return el; + } + *prev = (*ptr)->next; + return *ptr; + } + prev = ptr; + el = *ptr; + ptr = (bsn_elem**) &(el->next); + (*num_ptr)++; + } + + return NULL; +} + +static ERL_NIF_TERM +bsn_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + unsigned int max; + bsn_list* ptr; + bsn_res* r; + + if (!(enif_get_uint(env, argv[0], &max) && (max>0))) + return enif_make_badarg(env); + + ptr = enif_alloc(sizeof(bsn_list) * max); + if (ptr == NULL) + return enif_make_badarg(env); + + r = (bsn_res*) enif_alloc_resource(bsn_type, sizeof(bsn_res)); + r->mutex = enif_mutex_create("Mutex for the BSN writer"); + r->count = 0; + r->max = max; + r->list = ptr; + + for (; max; max--, ptr++) + *ptr = NULL; + + return enif_make_resource(env, r); +} + +static ERL_NIF_TERM +bsn_add(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + ErlNifBinary bin; + bsn_res* r; + unsigned int pos; + int num = 0; + bsn_elem* elem_ptr; + + if (!(enif_get_resource(env, argv[0], bsn_type, (void**) &r) + && enif_inspect_binary(env, argv[1], &bin))) + return enif_make_badarg(env); + + enif_realloc_binary(&bin, bin.size); + pos = private_hash(&bin, r->max); + + elem_ptr = enif_alloc(sizeof(bsn_elem)); + if (elem_ptr == NULL) + return enif_make_badarg(env); + + elem_ptr->next = NULL; + elem_ptr->bin = bin; + + enif_mutex_lock(r->mutex); + private_chain_append(&(r->list[pos]), elem_ptr, &num); + if (num >= 0) + (r->count)++; + enif_mutex_unlock(r->mutex); + + /* Already added */ + if (num < 0) + enif_release_binary(&(bin)); + + return enif_make_int(env, num); +} + +static ERL_NIF_TERM +bsn_search(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + ErlNifBinary bin; + bsn_res* r; + unsigned int pos; + int num = 0; + + if (!(enif_get_resource(env, argv[0], bsn_type, (void**) &r) + && enif_inspect_binary(env, argv[1], &bin))) + return enif_make_badarg(env); + + pos = private_hash(&bin, r->max); + + enif_mutex_lock(r->mutex); + private_chain_shift(r->list[pos], &bin, &num); + enif_mutex_unlock(r->mutex); + + return enif_make_int(env, num); +} + +static ERL_NIF_TERM +bsn_clear(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + ErlNifBinary bin; + bsn_res* r; + unsigned int pos; + int num = 0; + bsn_elem* elem_ptr; + + if (!(enif_get_resource(env, argv[0], bsn_type, (void**) &r) + && enif_inspect_binary(env, argv[1], &bin))) + return enif_make_badarg(env); + + pos = private_hash(&bin, r->max); + + enif_mutex_lock(r->mutex); + elem_ptr = private_chain_shift_clear(&(r->list[pos]), &bin, &num); + if (elem_ptr != NULL) { + private_clear_elem(elem_ptr); + (r->count)--; + } + enif_mutex_unlock(r->mutex); + + return enif_make_int(env, num); +} + +static ERL_NIF_TERM +bsn_all_chain(ErlNifEnv* env, bsn_elem* e, ERL_NIF_TERM tail) +{ + ERL_NIF_TERM head; + ErlNifBinary bin; + while (e != NULL) { + bin = e->bin; + enif_realloc_binary(&bin, bin.size); + head = enif_make_binary(env, &bin); + tail = enif_make_list_cell(env, head, tail); + e = e->next; + } + return tail; +} + +static ERL_NIF_TERM +bsn_chains(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + bsn_res* r; + unsigned int max; + bsn_list* ptr; + ERL_NIF_TERM tail, head; + + if (!enif_get_resource(env, argv[0], bsn_type, (void**) &r)) + return enif_make_badarg(env); + tail = enif_make_list(env, 0); + + ptr = r->list; + + enif_mutex_lock(r->mutex); + max = r->max; + + while (max) { + head = enif_make_list(env, 0); + head = bsn_all_chain(env, *ptr, head); + tail = enif_make_list_cell(env, head, tail); + + ptr++; + max--; + } + enif_mutex_unlock(r->mutex); + + return tail; +} + +static ERL_NIF_TERM +bsn_all(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + bsn_res* r; + unsigned int max; + bsn_list* ptr; + ERL_NIF_TERM list; + + if (!enif_get_resource(env, argv[0], bsn_type, (void**) &r)) + return enif_make_badarg(env); + list = enif_make_list(env, 0); + + ptr = r->list; + + enif_mutex_lock(r->mutex); + max = r->max; + + while (max) { + list = bsn_all_chain(env, *ptr, list); + ptr++; + max--; + } + enif_mutex_unlock(r->mutex); + + return list; +} + + +static ERL_NIF_TERM +bsn_count(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + bsn_res* r; + + if (!enif_get_resource(env, argv[0], bsn_type, (void**) &r)) + return enif_make_badarg(env); + + return enif_make_int(env, r->count); +} + + +static ERL_NIF_TERM +bsn_hash(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + ErlNifBinary bin; + unsigned int max; + + if (!(enif_inspect_binary(env, argv[0], &bin) + && enif_get_uint(env, argv[1], &max) && (max>0))) + return enif_make_badarg(env); + + return enif_make_uint(env, + private_hash(&bin, max)); +} + + +static ERL_NIF_TERM +bsn_compare(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + ErlNifBinary b1, b2; + + if (!(enif_inspect_binary(env, argv[0], &b1) + && enif_inspect_binary(env, argv[1], &b2))) + return enif_make_badarg(env); + + return bool_to_term(private_compare(&b1, &b2)); +} + +void private_clear_all(bsn_res* r) +{ + unsigned int max; + bsn_list* ptr; + max = r->max; + ptr = r->list; + + while (max) { + private_chain_clear_all(*ptr); + ptr++; + max--; + } +} + +void +bsn_type_dtor(ErlNifEnv* env, void* obj) +{ + bsn_res* r = (bsn_res*) obj; + private_clear_all(r); + enif_mutex_destroy(r->mutex); + enif_free(r->list); +} + + + +int +on_load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info) +{ + ATOM_TRUE = enif_make_atom(env, "true"); + ATOM_FALSE = enif_make_atom(env, "false"); + + ErlNifResourceFlags flags = (ErlNifResourceFlags)(ERL_NIF_RT_CREATE | + ERL_NIF_RT_TAKEOVER); + + bsn_type = enif_open_resource_type(env, NULL, "bsn_type", + bsn_type_dtor, flags, NULL); + + if (bsn_type == NULL) return 1; + + return 0; +} + + +int +on_upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM info) +{ + return 0; +} + + +static ErlNifFunc nif_functions[] = { + {"new", 1, bsn_new}, + {"add", 2, bsn_add}, + {"all", 1, bsn_all}, + {"chains", 1, bsn_chains}, + {"in", 2, bsn_search}, + {"clear", 2, bsn_clear}, + {"count", 1, bsn_count}, + + {"hash", 2, bsn_hash}, + {"compare", 2, bsn_compare}, +}; + + +ERL_NIF_INIT(bsn_ext, nif_functions, &on_load, &on_load, &on_upgrade, NULL); diff --git a/c_src/bsn/c_src/bsn_int.c b/c_src/bsn/c_src/bsn_int.c new file mode 100644 index 0000000..30e2944 --- /dev/null +++ b/c_src/bsn/c_src/bsn_int.c @@ -0,0 +1,331 @@ +#include "erl_nif.h" + + +ErlNifResourceType* bsn_type; +ERL_NIF_TERM ATOM_TRUE, ATOM_FALSE, ATOM_NO_MORE; + +struct bsn_elem_struct { + ErlNifBinary bin; + unsigned int hash; +}; +typedef struct bsn_elem_struct bsn_elem; + + +typedef struct { + unsigned int count; /* count of elements */ + unsigned int max; /* count of slots */ + ErlNifMutex *mutex; + bsn_elem* list; + unsigned int (*next_pos) + (void*, unsigned int, unsigned int); +} bsn_res; + + +inline static ERL_NIF_TERM bool_to_term(int value) { + return value ? ATOM_TRUE : ATOM_FALSE; +} + +unsigned int next_pos_linear(bsn_res* r, unsigned int hash, unsigned int step) { + return (hash + step) % (r->max); +} + +unsigned int next_pos_quadric(bsn_res* r, unsigned int hash, unsigned int step) { + return (hash + (step*step)) % (r->max); +} + +/* Calculate the sum of chars. */ +unsigned int +private_hash(const ErlNifBinary* b, unsigned int max) +{ + unsigned char* ptr; + unsigned int i, sum = 0; + + ptr = b->data; + i = b->size; + + for (; i; i--, ptr++) + sum += *ptr; + + return sum % max; +} + + + +inline int +private_compare(ErlNifBinary* b1, ErlNifBinary* b2) +{ + unsigned char* p1; + unsigned char* p2; + unsigned len; + + if (b1->size != b2->size) + return 0; + + p1 = b1->data; + p2 = b2->data; + len = b1->size; + + while (len) { + if ((*p1) != (*p2)) + return 0; + + len--; p1++; p2++; + } + return 1; +} + + +static ERL_NIF_TERM +bsn_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + int max; /* This value will be set by a client: + if (max<0) -> use quadric algorithm */ + bsn_elem* ptr; + bsn_res* r; + + if (!enif_get_int(env, argv[0], &max) || (max == 0)) + return enif_make_badarg(env); + + + r = (bsn_res*) enif_alloc_resource(bsn_type, sizeof(bsn_res)); + r->mutex = enif_mutex_create("Mutex for the BSN writer"); + r->count = 0; + + /* Select an algorithm */ + if (max>0) { + r->next_pos = &next_pos_linear; + } else if (max<0) { + r->next_pos = &next_pos_quadric; + max *= -1; + } + /* Now max is cells' count in the array. */ + r->max = (unsigned int) max; + + ptr = enif_alloc(sizeof(bsn_elem) * max); + if (ptr == NULL) + return enif_make_badarg(env); + r->list = ptr; + + for (; max; max--, ptr++) + ptr->hash = r->max; + + + return enif_make_resource(env, r); +} + +static ERL_NIF_TERM +bsn_add(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + ErlNifBinary bin; + bsn_res* r; + unsigned int pos, hash, max; + int num = 0; + bsn_elem* elem_ptr; + + if (!(enif_get_resource(env, argv[0], bsn_type, (void**) &r) + && enif_inspect_binary(env, argv[1], &bin))) + return enif_make_badarg(env); + + enif_realloc_binary(&bin, bin.size); + hash = pos = private_hash(&bin, r->max); + + + enif_mutex_lock(r->mutex); + max = r->max; + + while (num < max) { + elem_ptr = &(r->list[pos]); + /* Found free space */ + if (elem_ptr->hash == max) { + elem_ptr->bin = bin; + elem_ptr->hash = hash; + break; + } + + + /* Found elem */ + if ((elem_ptr->hash == hash) + && private_compare(&bin, &(elem_ptr->bin))) { + num *= -1; + break; + } + + pos = (r->next_pos)(r, hash, num); + num++; + } + if ((num >= 0) && (num < max)) + (r->count)++; + + enif_mutex_unlock(r->mutex); + + /* Error: already added or owerflow */ + if (!((num >= 0) && (num < max))) + enif_release_binary(&bin); + + if (num >= max) + return ATOM_NO_MORE; + + return enif_make_int(env, num); +} + +static ERL_NIF_TERM +bsn_search(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + ErlNifBinary bin; + bsn_res* r; + unsigned int pos, max, hash; + int num = 1; + bsn_elem* elem_ptr; + + if (!(enif_get_resource(env, argv[0], bsn_type, (void**) &r) + && enif_inspect_binary(env, argv[1], &bin))) + return enif_make_badarg(env); + + hash = pos = private_hash(&bin, r->max); + + enif_mutex_lock(r->mutex); + max = r->max; + + while (num < max) { + elem_ptr = &(r->list[pos]); + /* Found free space */ + if (elem_ptr->hash == max) { + break; + } + + + /* Found elem */ + if ((elem_ptr->hash == hash) + && private_compare(&bin, &(elem_ptr->bin))) { + num *= -1; + break; + } + + pos = (r->next_pos)(r, hash, num); + num++; + } + enif_mutex_unlock(r->mutex); + + return enif_make_int(env, num); +} + +static ERL_NIF_TERM +bsn_clear(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + return enif_make_badarg(env); +} + + +static ERL_NIF_TERM +bsn_all(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + bsn_res* r; + unsigned int max, pos = 0; + ERL_NIF_TERM head, tail; + ErlNifBinary bin; + bsn_elem* elem_ptr; + + if (!enif_get_resource(env, argv[0], bsn_type, (void**) &r)) + return enif_make_badarg(env); + tail = enif_make_list(env, 0); + + enif_mutex_lock(r->mutex); + max = r->max; + elem_ptr = r->list; + + do { + + if (elem_ptr->hash != max) { + bin = elem_ptr->bin; + enif_realloc_binary(&bin, bin.size); + head = enif_make_binary(env, &bin); + tail = enif_make_list_cell(env, head, tail); + } + + elem_ptr++; + pos++; + } while (pos < max); + + enif_mutex_unlock(r->mutex); + + return tail; +} + + +static ERL_NIF_TERM +bsn_count(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + bsn_res* r; + + if (!enif_get_resource(env, argv[0], bsn_type, (void**) &r)) + return enif_make_badarg(env); + + return enif_make_int(env, r->count); +} + + +void private_clear_all(bsn_res* r) +{ + unsigned int max, num; + bsn_elem* ptr; + num = max = r->max; + ptr = r->list; + + while (num) { + if (ptr->hash != max) { + enif_release_binary(&(ptr->bin)); + } + ptr++; + num--; + } +} + +void +bsn_type_dtor(ErlNifEnv* env, void* obj) +{ + bsn_res* r = (bsn_res*) obj; + private_clear_all(r); + enif_mutex_destroy(r->mutex); + enif_free(r->list); +} + + + +int +on_load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info) +{ + ATOM_TRUE = enif_make_atom(env, "true"); + ATOM_FALSE = enif_make_atom(env, "false"); + ATOM_NO_MORE = enif_make_atom(env, "no_more"); + + + ErlNifResourceFlags flags = (ErlNifResourceFlags)(ERL_NIF_RT_CREATE | + ERL_NIF_RT_TAKEOVER); + + bsn_type = enif_open_resource_type(env, NULL, "bsn_type", + bsn_type_dtor, flags, NULL); + + if (bsn_type == NULL) return 1; + + return 0; +} + + +int +on_upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM info) +{ + return 0; +} + + +static ErlNifFunc nif_functions[] = { + {"new", 1, bsn_new}, + {"add", 2, bsn_add}, + {"all", 1, bsn_all}, + {"in", 2, bsn_search}, + {"clear", 2, bsn_clear}, + {"count", 1, bsn_count}, +}; + + +ERL_NIF_INIT(bsn_int, nif_functions, &on_load, &on_load, &on_upgrade, NULL); diff --git a/c_src/cq2/rebar.config b/c_src/bsn/rebar.config similarity index 84% rename from c_src/cq2/rebar.config rename to c_src/bsn/rebar.config index 9bbec13..77bc6d8 100644 --- a/c_src/cq2/rebar.config +++ b/c_src/bsn/rebar.config @@ -1,9 +1,8 @@ {port_specs, [ - {"../../priv/cq2.so", [ - "*.c", - "*.cc" - ]} + {"../../priv/bsn_ext.so", ["bsn_ext.c"]}, + {"../../priv/bsn_int.so", ["bsn_int.c"]} ]}. +%{port_specs, [{"../../priv/granderl.so", []}]}. %% {port_env, [ %% {"(linux|solaris|freebsd|netbsd|openbsd|dragonfly|darwin|gnu)", @@ -24,3 +23,7 @@ %% %% {"win32", "CXXFLAGS", "$CXXFLAGS /O2 /DNDEBUG"} %% ]}. + + + + diff --git a/c_src/couchdb_hqueue/c_src/hqueue.c b/c_src/couchdb_hqueue/c_src/hqueue.c new file mode 100644 index 0000000..f02f251 --- /dev/null +++ b/c_src/couchdb_hqueue/c_src/hqueue.c @@ -0,0 +1,318 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#include +#include +#include +#include +#include + +#include "hqueue.h" + + +struct hqueue +{ + int version; + uint32_t idx; + uint32_t max_elems; + uint32_t heap_size; + hqnode_t* heap; // one based index +}; + + +struct hqnode +{ + double priority; + void* value; +}; + + +static inline void +hqueue_exchange(hqueue_t* hqueue, int i, int j) +{ + hqnode_t tmp; + + tmp = hqueue->heap[i]; + hqueue->heap[i] = hqueue->heap[j]; + hqueue->heap[j] = tmp; + return; +} + + +static inline int +hqueue_less(hqueue_t* hqueue, int i, int j) +{ + return hqueue->heap[i].priority < hqueue->heap[j].priority; +} + + +static void +hqueue_fix_up(hqueue_t* hqueue, int k) +{ + while(k > 1 && hqueue_less(hqueue, k/2, k)) { + hqueue_exchange(hqueue, k/2, k); + k = k/2; + } + return; +} + + +static void +hqueue_fix_down(hqueue_t* hqueue, int k) +{ + int j; + int n = hqueue->idx; + + while(2*k <= n) { + j = 2*k; + if(j < n && hqueue_less(hqueue, j, j+1)) { + j++; + } + if(!hqueue_less(hqueue, k, j)) { + break; + } + hqueue_exchange(hqueue, k, j); + k = j; + } + return; +} + + +hqueue_t* +hqueue_new(uint32_t max_elems, uint32_t heap_size) +{ + hqueue_t* hqueue = NULL; + size_t total_heap_size; + + if(max_elems == 0 || heap_size == 0) { + return NULL; + } + + if(max_elems < heap_size) { + heap_size = max_elems; + } + + hqueue = HQUEUE_ALLOC(sizeof(hqueue_t)); + if(hqueue == NULL) { + return NULL; + } + + memset(hqueue, '\0', sizeof(hqueue_t)); + hqueue->version = HQ_VERSION; + hqueue->max_elems = max_elems; + hqueue->heap_size = heap_size; + hqueue->idx = 0; + + total_heap_size = sizeof(hqnode_t) * (hqueue->heap_size+1); + + hqueue->heap = (hqnode_t*) HQUEUE_ALLOC(total_heap_size); + + if(hqueue->heap == NULL ) { + HQUEUE_FREE(hqueue); + return NULL; + } + + memset(hqueue->heap, '\0', total_heap_size); + + return hqueue; +} + + +void +hqueue_free(hqueue_t* hqueue) +{ + HQUEUE_FREE(hqueue->heap); + HQUEUE_FREE(hqueue); + + return; +} + + +void +hqueue_free2(hqueue_t* hqueue, void (*free_node)(void* node)) +{ + uint32_t i; + + for(i = 1; i < hqueue->heap_size + 1; i++) { + if(i <= hqueue->idx) { + free_node(hqueue->heap[i].value); + } else { + assert(hqueue->heap[i].value == NULL && "inactive elements must be NULL"); + } + } + + hqueue_free(hqueue); + + return; +} + + +// Extraction order is undefined for entries with duplicate priorities +int +hqueue_extract_max(hqueue_t* hqueue, double* priority, void** value) +{ + if(hqueue->idx <= 0) { + return 0; + } + + hqueue_exchange(hqueue, 1, hqueue->idx); + + *priority = hqueue->heap[hqueue->idx].priority; + *value = hqueue->heap[hqueue->idx].value; + + hqueue->heap[hqueue->idx].value = NULL; + + hqueue->idx--; // heap uses one based index, so we decrement after + hqueue_fix_down(hqueue, 1); + + return 1; +} + + +void +hqueue_get_elem(hqueue_t* hqueue, uint32_t idx, double *priority, void** value) +{ + *priority = hqueue->heap[idx].priority; + *value = hqueue->heap[idx].value; + + return; +} + + +static int +hqueue_maybe_resize(hqueue_t* hqueue) +{ + uint32_t min_resize; + + if(hqueue->idx + 1 > hqueue->heap_size) { + if(hqueue->idx * HQ_SCALE_FACTOR > hqueue->max_elems) { + min_resize = hqueue->max_elems; + } else { + min_resize = hqueue->idx * HQ_SCALE_FACTOR; + } + return hqueue_resize_heap(hqueue, min_resize); + } + + return 1; +} + + +int +hqueue_insert(hqueue_t* hqueue, double priority, void* value) +{ + if(hqueue->idx >= hqueue->max_elems) { + return 0; + } + + if(!hqueue_maybe_resize(hqueue)) { + return 0; + } + + hqueue->idx++; // heap uses one based index, so we increment first + hqueue->heap[hqueue->idx].priority = priority; + hqueue->heap[hqueue->idx].value = value; + + hqueue_fix_up(hqueue, hqueue->idx); + + return 1; +} + + +uint32_t +hqueue_size(hqueue_t* hqueue) +{ + return hqueue->idx; +} + + +uint32_t +hqueue_heap_size(hqueue_t* hqueue) +{ + return hqueue->heap_size; +} + + +uint32_t +hqueue_max_elems(hqueue_t* hqueue) +{ + return hqueue->max_elems; +} + + +void +hqueue_scale_by(hqueue_t* hqueue, double factor) +{ + uint32_t i; + + for(i = 1; i <= hqueue->idx && i <= hqueue->heap_size; i++) { + hqueue->heap[i].priority *= factor; + } + + return; +} + + +uint32_t +hqueue_resize_heap(hqueue_t* hqueue, uint32_t new_heap_size) +{ + uint32_t old_heap_size; + size_t total_heap_size; + hqnode_t* tmp_heap; + uint32_t i; + + if(hqueue->idx > new_heap_size) { + return 0; + } + + total_heap_size = sizeof(hqnode_t) * (new_heap_size+1); + old_heap_size = hqueue->heap_size; + + if((tmp_heap = (hqnode_t*) HQUEUE_ALLOC(total_heap_size)) == NULL) { + return 0; + } + + memset(tmp_heap, '\0', total_heap_size); + + for(i = 1; i <= hqueue->idx && i <= old_heap_size; i++) { + if(i <= hqueue->idx) { + tmp_heap[i] = hqueue->heap[i]; + hqueue->heap[i].value = NULL; + } else { + assert(hqueue->heap[i].value == NULL && + "unexpected NULL element during heap resize"); + } + } + + HQUEUE_FREE(hqueue->heap); + hqueue->heap = tmp_heap; + hqueue->heap_size = new_heap_size; + + return old_heap_size; +} + + +int +hqueue_set_max_elems(hqueue_t* hqueue, uint32_t new_max_elems) +{ + uint32_t old_max_elems; + + if(hqueue->heap_size > new_max_elems) { + if(!hqueue_resize_heap(hqueue, new_max_elems)) { + return 0; + } + } + + old_max_elems = hqueue->max_elems; + hqueue->max_elems = new_max_elems; + + return old_max_elems; +} diff --git a/c_src/couchdb_hqueue/c_src/hqueue.d b/c_src/couchdb_hqueue/c_src/hqueue.d new file mode 100644 index 0000000..b8094c8 --- /dev/null +++ b/c_src/couchdb_hqueue/c_src/hqueue.d @@ -0,0 +1,5 @@ +c_src/hqueue.o: c_src/hqueue.c c_src/hqueue.h \ + /usr/lib/erlang/erts-10.6.2/include/erl_nif.h \ + /usr/lib/erlang/erts-10.6.2/include/erl_drv_nif.h \ + /usr/lib/erlang/erts-10.6.2/include/erl_int_sizes_config.h \ + /usr/lib/erlang/erts-10.6.2/include/erl_nif_api_funcs.h diff --git a/c_src/couchdb_hqueue/c_src/hqueue.h b/c_src/couchdb_hqueue/c_src/hqueue.h new file mode 100644 index 0000000..4e422e4 --- /dev/null +++ b/c_src/couchdb_hqueue/c_src/hqueue.h @@ -0,0 +1,60 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#pragma once + + +#include + +#define HQ_VERSION 0 +#define HQ_SCALE_FACTOR 2 // heap expansion scale factor + + +// Override the default memory allocator to use the Erlang versions. +// This bubbles up memory usage for the NIF into Erlang stats. +#ifdef HQ_ENIF_ALLOC + +#include "erl_nif.h" + +#define HQUEUE_ALLOC enif_alloc +#define HQUEUE_FREE enif_free + +#else + +#define HQUEUE_ALLOC malloc +#define HQUEUE_FREE free + +#endif + + +typedef struct hqnode hqnode_t; +typedef struct hqueue hqueue_t; + + +hqueue_t* hqueue_new(uint32_t max_elems, uint32_t heap_size); + +void hqueue_free(hqueue_t* hqueue); +void hqueue_free2(hqueue_t* hqueue, void (*free_node)(void* node)); + +int hqueue_insert(hqueue_t* hqueue, double priority, void* val); +int hqueue_extract_max(hqueue_t* hqueue, double* priority, void** value); +void hqueue_get_elem(hqueue_t* hqueue, uint32_t idx, double *priority, + void** value); + +uint32_t hqueue_size(hqueue_t* hqueue); +uint32_t hqueue_heap_size(hqueue_t* hqueue); + +uint32_t hqueue_max_elems(hqueue_t* hqueue); +int hqueue_set_max_elems(hqueue_t* hqueue, uint32_t new_max_elems); + +void hqueue_scale_by(hqueue_t* hqueue, double factor); +uint32_t hqueue_resize_heap(hqueue_t* hqueue, uint32_t new_heap_size); diff --git a/c_src/couchdb_hqueue/c_src/hqueue_nif.c b/c_src/couchdb_hqueue/c_src/hqueue_nif.c new file mode 100644 index 0000000..7cbc5e2 --- /dev/null +++ b/c_src/couchdb_hqueue/c_src/hqueue_nif.c @@ -0,0 +1,601 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#include +#include +#include + +#include "hqueue.h" + + +typedef struct +{ + ERL_NIF_TERM atom_ok; + ERL_NIF_TERM atom_error; + ERL_NIF_TERM atom_value; + ERL_NIF_TERM atom_empty; + ERL_NIF_TERM atom_full; + ERL_NIF_TERM atom_max_elems; + ERL_NIF_TERM atom_heap_size; + ERL_NIF_TERM atom_too_small; + ErlNifResourceType* res_hqueue; +} hqueue_priv; + + +typedef struct +{ + ErlNifEnv* env; + ERL_NIF_TERM value; +} hqnode_nif_t; + + +typedef struct +{ + int version; + uint64_t gen; + hqueue_t* hqueue; + ErlNifPid p; +} hqueue_nif_t; + + +static const uint32_t default_max_elems = UINT32_MAX-1; +static const uint32_t default_heap_size = 1024; + + +static inline ERL_NIF_TERM +make_atom(ErlNifEnv* env, const char* name) +{ + ERL_NIF_TERM ret; + if(enif_make_existing_atom(env, name, &ret, ERL_NIF_LATIN1)) { + return ret; + } + return enif_make_atom(env, name); +} + + +static inline ERL_NIF_TERM +make_ok(ErlNifEnv* env, hqueue_priv* priv, ERL_NIF_TERM value) +{ + return enif_make_tuple2(env, priv->atom_ok, value); +} + + +static inline ERL_NIF_TERM +make_error(ErlNifEnv* env, hqueue_priv* priv, ERL_NIF_TERM reason) +{ + return enif_make_tuple2(env, priv->atom_error, reason); +} + + +static inline int +check_pid(ErlNifEnv* env, hqueue_nif_t* hqueue_nif) +{ + ErlNifPid pid; + enif_self(env, &pid); + + if(enif_compare(pid.pid, hqueue_nif->p.pid) == 0) { + return 1; + } + + return 0; +} + + +void +hqueue_nif_node_free(hqnode_nif_t* hqnode_nif) +{ + enif_free_env(hqnode_nif->env); + enif_free(hqnode_nif); + + return; +} + + +void +hqueue_nif_node_free_ext(void* node) +{ + hqueue_nif_node_free((hqnode_nif_t*) node); + + return; +} + + +hqnode_nif_t* +hqueue_nif_node_alloc() +{ + hqnode_nif_t* node = (hqnode_nif_t*) enif_alloc(sizeof(hqnode_nif_t*)); + + memset(node, 0, sizeof(hqnode_nif_t)); + + node->env = enif_alloc_env(); + + return node; +} + + +static int +get_uint_param(ErlNifEnv* env, ERL_NIF_TERM value, ERL_NIF_TERM atom, uint32_t* p) +{ + const ERL_NIF_TERM* tuple; + int arity; + + if(!enif_get_tuple(env, value, &arity, &tuple)) { + return 0; + } + + if(arity != 2) { + return 0; + } + + if(enif_compare(tuple[0], atom) != 0) { + return 0; + } + + if(!enif_get_uint(env, tuple[1], p)) { + return 0; + } + + return 1; +} + + +static inline hqueue_nif_t* +hqueue_nif_create_int(ErlNifEnv* env, hqueue_priv* priv, uint32_t max_elems, + uint32_t heap_size) +{ + hqueue_nif_t* hqueue_nif = NULL; + + assert(priv != NULL && "missing private data member"); + + hqueue_nif = (hqueue_nif_t*) enif_alloc_resource( + priv->res_hqueue, sizeof(hqueue_nif_t)); + memset(hqueue_nif, 0, sizeof(hqueue_nif_t)); + hqueue_nif->version = HQ_VERSION; + + hqueue_nif->hqueue = hqueue_new(max_elems, heap_size); + + if(hqueue_nif->hqueue == NULL ) { + enif_release_resource(hqueue_nif); + return NULL; + } + + enif_self(env, &(hqueue_nif->p)); + + return hqueue_nif; +} + + +static ERL_NIF_TERM +hqueue_nif_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + hqueue_priv* priv = enif_priv_data(env); + hqueue_nif_t* hqueue_nif; + ERL_NIF_TERM ret; + ERL_NIF_TERM opts; + ERL_NIF_TERM value; + uint32_t max_elems = default_max_elems; + uint32_t heap_size = default_heap_size; + + if(argc != 1) { + return enif_make_badarg(env); + } + + opts = argv[0]; + if(!enif_is_list(env, opts)) { + return enif_make_badarg(env); + } + + while(enif_get_list_cell(env, opts, &value, &opts)) { + if(get_uint_param(env, value, priv->atom_max_elems, &max_elems)) { + continue; + } else if(get_uint_param(env, value, priv->atom_heap_size, &heap_size)) { + continue; + } else { + return enif_make_badarg(env); + } + } + + hqueue_nif = hqueue_nif_create_int(env, priv, max_elems, heap_size); + if(hqueue_nif == NULL) { + return enif_make_badarg(env); + } + + ret = enif_make_resource(env, hqueue_nif); + enif_release_resource(hqueue_nif); + + return make_ok(env, priv, ret); +} + + +static void +hqueue_nif_free(ErlNifEnv* env, void* obj) +{ + hqueue_nif_t* hqueue_nif = (hqueue_nif_t*) obj; + + hqueue_free2(hqueue_nif->hqueue, hqueue_nif_node_free_ext); + + return; +} + + +static ERL_NIF_TERM +hqueue_nif_extract_max(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + hqueue_priv* priv = enif_priv_data(env); + hqueue_nif_t* hqueue_nif; + hqnode_nif_t* hqnode_nif; + double tmp_priority; + ERL_NIF_TERM ret; + ERL_NIF_TERM priority; + ERL_NIF_TERM value; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!check_pid(env, hqueue_nif)) { + return enif_make_badarg(env); + } + + if (!hqueue_extract_max(hqueue_nif->hqueue, &tmp_priority, (void**) &hqnode_nif)) { + return make_error(env, priv, priv->atom_empty); + } + + priority = enif_make_double(env, tmp_priority); + value = enif_make_copy(env, hqnode_nif->value); + ret = enif_make_tuple2(env, priority, value); + + hqueue_nif_node_free(hqnode_nif); + + return ret; +} + + +static ERL_NIF_TERM +hqueue_nif_insert(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + hqueue_priv* priv = enif_priv_data(env); + hqueue_nif_t* hqueue_nif; + hqnode_nif_t* hqnode_nif; + ERL_NIF_TERM ret; + double priority; + + if(argc != 3) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!check_pid(env, hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!enif_get_double(env, argv[1], &priority)) { + return enif_make_badarg(env); + } + + if(priority < 0.0) { + return enif_make_badarg(env); + } + + hqnode_nif = hqueue_nif_node_alloc(); + hqnode_nif->value = enif_make_copy(hqnode_nif->env, argv[2]); + + if (!hqueue_insert(hqueue_nif->hqueue, priority, (void*) hqnode_nif)) { + return make_error(env, priv, priv->atom_full); + } + + ret = priv->atom_ok; + + return ret; +} + + +static ERL_NIF_TERM +hqueue_nif_size(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + hqueue_priv* priv = enif_priv_data(env); + hqueue_nif_t* hqueue_nif; + ERL_NIF_TERM ret; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!check_pid(env, hqueue_nif)) { + return enif_make_badarg(env); + } + + ret = enif_make_uint64(env, hqueue_size(hqueue_nif->hqueue)); + + return ret; +} + + +static ERL_NIF_TERM +hqueue_nif_heap_size(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + hqueue_priv* priv = enif_priv_data(env); + hqueue_nif_t* hqueue_nif; + ERL_NIF_TERM ret; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!check_pid(env, hqueue_nif)) { + return enif_make_badarg(env); + } + + ret = enif_make_uint64(env, hqueue_heap_size(hqueue_nif->hqueue)); + + return ret; +} + + +static ERL_NIF_TERM +hqueue_nif_max_elems(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + hqueue_priv* priv = enif_priv_data(env); + hqueue_nif_t* hqueue_nif; + ERL_NIF_TERM ret; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!check_pid(env, hqueue_nif)) { + return enif_make_badarg(env); + } + + ret = enif_make_uint64(env, hqueue_max_elems(hqueue_nif->hqueue)); + + return ret; +} + + +static ERL_NIF_TERM +hqueue_nif_to_list(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + hqueue_priv* priv = enif_priv_data(env); + hqueue_nif_t* hqueue_nif; + hqueue_t* hqueue; + hqnode_nif_t* hqnode_nif; + double tmp_priority; + ERL_NIF_TERM ret = enif_make_list(env, 0); + ERL_NIF_TERM priority; + ERL_NIF_TERM value; + ERL_NIF_TERM tuple; + uint32_t i; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!check_pid(env, hqueue_nif)) { + return enif_make_badarg(env); + } + + hqueue = hqueue_nif->hqueue; + + for (i = 1; i <= hqueue_size(hqueue); i++) { + hqueue_get_elem(hqueue, i, &tmp_priority, (void **) &hqnode_nif); + priority = enif_make_double(env, tmp_priority); + value = enif_make_copy(env, hqnode_nif->value); + tuple = enif_make_tuple2(env, priority, value); + ret = enif_make_list_cell(env, tuple, ret); + } + + return ret; +} + + +static ERL_NIF_TERM +hqueue_nif_scale_by(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + hqueue_priv* priv = enif_priv_data(env); + hqueue_nif_t* hqueue_nif; + ERL_NIF_TERM ret; + double factor; + + if(argc != 2) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!check_pid(env, hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!enif_get_double(env, argv[1], &factor)) { + return enif_make_badarg(env); + } + + if(factor < 0.0) { + return enif_make_badarg(env); + } + + hqueue_scale_by(hqueue_nif->hqueue, factor); + + ret = priv->atom_ok; + + return ret; +} + + +static ERL_NIF_TERM +hqueue_nif_resize_heap(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + hqueue_priv* priv = enif_priv_data(env); + hqueue_nif_t* hqueue_nif; + ERL_NIF_TERM ret; + uint32_t new_heap_size; + uint32_t old_heap_size; + + if(argc != 2) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!check_pid(env, hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!enif_get_uint(env, argv[1], &new_heap_size)) { + return enif_make_badarg(env); + } + + if(hqueue_size(hqueue_nif->hqueue) > new_heap_size) { + return make_error(env, priv, priv->atom_too_small); + } + + if((old_heap_size = hqueue_resize_heap(hqueue_nif->hqueue, new_heap_size)) == 0) { + return enif_make_badarg(env); + } + + ret = enif_make_uint64(env, old_heap_size); + + return ret; +} + + +static ERL_NIF_TERM +hqueue_nif_set_max_elems(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + hqueue_priv* priv = enif_priv_data(env); + hqueue_nif_t* hqueue_nif; + ERL_NIF_TERM ret; + uint32_t new_max_elems; + uint32_t old_max_elems; + + if(argc != 2) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!check_pid(env, hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!enif_get_uint(env, argv[1], &new_max_elems)) { + return enif_make_badarg(env); + } + + if(hqueue_size(hqueue_nif->hqueue) > new_max_elems) { + return make_error(env, priv, priv->atom_too_small); + } + + if ((old_max_elems = hqueue_set_max_elems(hqueue_nif->hqueue, new_max_elems)) == 0) { + return enif_make_badarg(env); + } + + ret = enif_make_uint64(env, old_max_elems); + + return ret; +} + + +static int +load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info) +{ + int flags = ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER; + ErlNifResourceType* res; + + hqueue_priv* new_priv = (hqueue_priv*) enif_alloc(sizeof(hqueue_priv)); + if(new_priv == NULL) { + return 1; + } + + res = enif_open_resource_type( + env, NULL, "hqueue", hqueue_nif_free, flags, NULL); + if(res == NULL) { + enif_free(new_priv); + return 1; + } + new_priv->res_hqueue = res; + + new_priv->atom_ok = make_atom(env, "ok"); + new_priv->atom_error = make_atom(env, "error"); + new_priv->atom_value = make_atom(env, "value"); + new_priv->atom_empty = make_atom(env, "empty"); + new_priv->atom_full = make_atom(env, "full"); + new_priv->atom_max_elems = make_atom(env, "max_elems"); + new_priv->atom_heap_size = make_atom(env, "heap_size"); + new_priv->atom_too_small = make_atom(env, "too_small"); + + *priv = (void*) new_priv; + + return 0; +} + + +static int +upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM info) +{ + return load(env, priv, info); +} + + +static void +unload(ErlNifEnv* env, void* priv) +{ + enif_free(priv); + return; +} + + +static ErlNifFunc funcs[] = { + {"new", 1, hqueue_nif_new}, + {"extract_max", 1, hqueue_nif_extract_max}, + {"insert", 3, hqueue_nif_insert}, + {"size", 1, hqueue_nif_size}, + {"heap_size", 1, hqueue_nif_heap_size}, + {"max_elems", 1, hqueue_nif_max_elems}, + {"set_max_elems", 2, hqueue_nif_set_max_elems}, + {"to_list", 1, hqueue_nif_to_list}, + {"scale_by", 2, hqueue_nif_scale_by}, + {"resize_heap", 2, hqueue_nif_resize_heap} +}; + + +ERL_NIF_INIT(hqueue, funcs, &load, NULL, &upgrade, &unload); diff --git a/c_src/couchdb_hqueue/c_src/hqueue_nif.d b/c_src/couchdb_hqueue/c_src/hqueue_nif.d new file mode 100644 index 0000000..d8b20c2 --- /dev/null +++ b/c_src/couchdb_hqueue/c_src/hqueue_nif.d @@ -0,0 +1,5 @@ +c_src/hqueue_nif.o: c_src/hqueue_nif.c c_src/hqueue.h \ + /usr/lib/erlang/erts-10.6.2/include/erl_nif.h \ + /usr/lib/erlang/erts-10.6.2/include/erl_drv_nif.h \ + /usr/lib/erlang/erts-10.6.2/include/erl_int_sizes_config.h \ + /usr/lib/erlang/erts-10.6.2/include/erl_nif_api_funcs.h diff --git a/c_src/couchdb_hqueue/c_src/valgrind_sample.c b/c_src/couchdb_hqueue/c_src/valgrind_sample.c new file mode 100644 index 0000000..3c78da5 --- /dev/null +++ b/c_src/couchdb_hqueue/c_src/valgrind_sample.c @@ -0,0 +1,72 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#include +#include +#include + +#include "hqueue.h" + + +// Simple test script to stress the public HQueue API. +// Primary use case is for running this under Valgrind. +int main(void) +{ + int str_len = 100; + int iterations = 1000; + uint32_t max_elems = 1024; + uint32_t heap_size = 64; + hqueue_t* hq = hqueue_new(max_elems, heap_size); + double priority; + double priority_res; + char* val; + char* val_res; + int i; + + assert(max_elems == hqueue_max_elems(hq)); + assert(heap_size == hqueue_heap_size(hq)); + + for(i = 0; i < iterations; i++) { + priority = 1234.4321 * i; + val = (char*) malloc(str_len + 1); + + if(val == NULL) { + return 1; + } + + assert(hqueue_size(hq) == i); + + if(snprintf(val, str_len + 1, "Fun string #%d\n", i)) { + if(!hqueue_insert(hq, priority, val)) { + return 1; + } + } else { + return 1; + } + } + + hqueue_scale_by(hq, 3.7); + + // Added 1000 elements, so heap size should have expanded to 1024 + assert(max_elems == hqueue_max_elems(hq)); + assert(max_elems == hqueue_heap_size(hq)); + + if(!hqueue_extract_max(hq, &priority_res, (void**) &val_res)) { + return 1; + } + free(val_res); + + hqueue_free2(hq, free); + + return 0; +} + diff --git a/c_src/couchdb_hqueue/hqueue.c b/c_src/couchdb_hqueue/hqueue.c new file mode 100644 index 0000000..f02f251 --- /dev/null +++ b/c_src/couchdb_hqueue/hqueue.c @@ -0,0 +1,318 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#include +#include +#include +#include +#include + +#include "hqueue.h" + + +struct hqueue +{ + int version; + uint32_t idx; + uint32_t max_elems; + uint32_t heap_size; + hqnode_t* heap; // one based index +}; + + +struct hqnode +{ + double priority; + void* value; +}; + + +static inline void +hqueue_exchange(hqueue_t* hqueue, int i, int j) +{ + hqnode_t tmp; + + tmp = hqueue->heap[i]; + hqueue->heap[i] = hqueue->heap[j]; + hqueue->heap[j] = tmp; + return; +} + + +static inline int +hqueue_less(hqueue_t* hqueue, int i, int j) +{ + return hqueue->heap[i].priority < hqueue->heap[j].priority; +} + + +static void +hqueue_fix_up(hqueue_t* hqueue, int k) +{ + while(k > 1 && hqueue_less(hqueue, k/2, k)) { + hqueue_exchange(hqueue, k/2, k); + k = k/2; + } + return; +} + + +static void +hqueue_fix_down(hqueue_t* hqueue, int k) +{ + int j; + int n = hqueue->idx; + + while(2*k <= n) { + j = 2*k; + if(j < n && hqueue_less(hqueue, j, j+1)) { + j++; + } + if(!hqueue_less(hqueue, k, j)) { + break; + } + hqueue_exchange(hqueue, k, j); + k = j; + } + return; +} + + +hqueue_t* +hqueue_new(uint32_t max_elems, uint32_t heap_size) +{ + hqueue_t* hqueue = NULL; + size_t total_heap_size; + + if(max_elems == 0 || heap_size == 0) { + return NULL; + } + + if(max_elems < heap_size) { + heap_size = max_elems; + } + + hqueue = HQUEUE_ALLOC(sizeof(hqueue_t)); + if(hqueue == NULL) { + return NULL; + } + + memset(hqueue, '\0', sizeof(hqueue_t)); + hqueue->version = HQ_VERSION; + hqueue->max_elems = max_elems; + hqueue->heap_size = heap_size; + hqueue->idx = 0; + + total_heap_size = sizeof(hqnode_t) * (hqueue->heap_size+1); + + hqueue->heap = (hqnode_t*) HQUEUE_ALLOC(total_heap_size); + + if(hqueue->heap == NULL ) { + HQUEUE_FREE(hqueue); + return NULL; + } + + memset(hqueue->heap, '\0', total_heap_size); + + return hqueue; +} + + +void +hqueue_free(hqueue_t* hqueue) +{ + HQUEUE_FREE(hqueue->heap); + HQUEUE_FREE(hqueue); + + return; +} + + +void +hqueue_free2(hqueue_t* hqueue, void (*free_node)(void* node)) +{ + uint32_t i; + + for(i = 1; i < hqueue->heap_size + 1; i++) { + if(i <= hqueue->idx) { + free_node(hqueue->heap[i].value); + } else { + assert(hqueue->heap[i].value == NULL && "inactive elements must be NULL"); + } + } + + hqueue_free(hqueue); + + return; +} + + +// Extraction order is undefined for entries with duplicate priorities +int +hqueue_extract_max(hqueue_t* hqueue, double* priority, void** value) +{ + if(hqueue->idx <= 0) { + return 0; + } + + hqueue_exchange(hqueue, 1, hqueue->idx); + + *priority = hqueue->heap[hqueue->idx].priority; + *value = hqueue->heap[hqueue->idx].value; + + hqueue->heap[hqueue->idx].value = NULL; + + hqueue->idx--; // heap uses one based index, so we decrement after + hqueue_fix_down(hqueue, 1); + + return 1; +} + + +void +hqueue_get_elem(hqueue_t* hqueue, uint32_t idx, double *priority, void** value) +{ + *priority = hqueue->heap[idx].priority; + *value = hqueue->heap[idx].value; + + return; +} + + +static int +hqueue_maybe_resize(hqueue_t* hqueue) +{ + uint32_t min_resize; + + if(hqueue->idx + 1 > hqueue->heap_size) { + if(hqueue->idx * HQ_SCALE_FACTOR > hqueue->max_elems) { + min_resize = hqueue->max_elems; + } else { + min_resize = hqueue->idx * HQ_SCALE_FACTOR; + } + return hqueue_resize_heap(hqueue, min_resize); + } + + return 1; +} + + +int +hqueue_insert(hqueue_t* hqueue, double priority, void* value) +{ + if(hqueue->idx >= hqueue->max_elems) { + return 0; + } + + if(!hqueue_maybe_resize(hqueue)) { + return 0; + } + + hqueue->idx++; // heap uses one based index, so we increment first + hqueue->heap[hqueue->idx].priority = priority; + hqueue->heap[hqueue->idx].value = value; + + hqueue_fix_up(hqueue, hqueue->idx); + + return 1; +} + + +uint32_t +hqueue_size(hqueue_t* hqueue) +{ + return hqueue->idx; +} + + +uint32_t +hqueue_heap_size(hqueue_t* hqueue) +{ + return hqueue->heap_size; +} + + +uint32_t +hqueue_max_elems(hqueue_t* hqueue) +{ + return hqueue->max_elems; +} + + +void +hqueue_scale_by(hqueue_t* hqueue, double factor) +{ + uint32_t i; + + for(i = 1; i <= hqueue->idx && i <= hqueue->heap_size; i++) { + hqueue->heap[i].priority *= factor; + } + + return; +} + + +uint32_t +hqueue_resize_heap(hqueue_t* hqueue, uint32_t new_heap_size) +{ + uint32_t old_heap_size; + size_t total_heap_size; + hqnode_t* tmp_heap; + uint32_t i; + + if(hqueue->idx > new_heap_size) { + return 0; + } + + total_heap_size = sizeof(hqnode_t) * (new_heap_size+1); + old_heap_size = hqueue->heap_size; + + if((tmp_heap = (hqnode_t*) HQUEUE_ALLOC(total_heap_size)) == NULL) { + return 0; + } + + memset(tmp_heap, '\0', total_heap_size); + + for(i = 1; i <= hqueue->idx && i <= old_heap_size; i++) { + if(i <= hqueue->idx) { + tmp_heap[i] = hqueue->heap[i]; + hqueue->heap[i].value = NULL; + } else { + assert(hqueue->heap[i].value == NULL && + "unexpected NULL element during heap resize"); + } + } + + HQUEUE_FREE(hqueue->heap); + hqueue->heap = tmp_heap; + hqueue->heap_size = new_heap_size; + + return old_heap_size; +} + + +int +hqueue_set_max_elems(hqueue_t* hqueue, uint32_t new_max_elems) +{ + uint32_t old_max_elems; + + if(hqueue->heap_size > new_max_elems) { + if(!hqueue_resize_heap(hqueue, new_max_elems)) { + return 0; + } + } + + old_max_elems = hqueue->max_elems; + hqueue->max_elems = new_max_elems; + + return old_max_elems; +} diff --git a/c_src/couchdb_hqueue/hqueue.h b/c_src/couchdb_hqueue/hqueue.h new file mode 100644 index 0000000..4e422e4 --- /dev/null +++ b/c_src/couchdb_hqueue/hqueue.h @@ -0,0 +1,60 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#pragma once + + +#include + +#define HQ_VERSION 0 +#define HQ_SCALE_FACTOR 2 // heap expansion scale factor + + +// Override the default memory allocator to use the Erlang versions. +// This bubbles up memory usage for the NIF into Erlang stats. +#ifdef HQ_ENIF_ALLOC + +#include "erl_nif.h" + +#define HQUEUE_ALLOC enif_alloc +#define HQUEUE_FREE enif_free + +#else + +#define HQUEUE_ALLOC malloc +#define HQUEUE_FREE free + +#endif + + +typedef struct hqnode hqnode_t; +typedef struct hqueue hqueue_t; + + +hqueue_t* hqueue_new(uint32_t max_elems, uint32_t heap_size); + +void hqueue_free(hqueue_t* hqueue); +void hqueue_free2(hqueue_t* hqueue, void (*free_node)(void* node)); + +int hqueue_insert(hqueue_t* hqueue, double priority, void* val); +int hqueue_extract_max(hqueue_t* hqueue, double* priority, void** value); +void hqueue_get_elem(hqueue_t* hqueue, uint32_t idx, double *priority, + void** value); + +uint32_t hqueue_size(hqueue_t* hqueue); +uint32_t hqueue_heap_size(hqueue_t* hqueue); + +uint32_t hqueue_max_elems(hqueue_t* hqueue); +int hqueue_set_max_elems(hqueue_t* hqueue, uint32_t new_max_elems); + +void hqueue_scale_by(hqueue_t* hqueue, double factor); +uint32_t hqueue_resize_heap(hqueue_t* hqueue, uint32_t new_heap_size); diff --git a/c_src/couchdb_hqueue/hqueue_nif.c b/c_src/couchdb_hqueue/hqueue_nif.c new file mode 100644 index 0000000..7cbc5e2 --- /dev/null +++ b/c_src/couchdb_hqueue/hqueue_nif.c @@ -0,0 +1,601 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#include +#include +#include + +#include "hqueue.h" + + +typedef struct +{ + ERL_NIF_TERM atom_ok; + ERL_NIF_TERM atom_error; + ERL_NIF_TERM atom_value; + ERL_NIF_TERM atom_empty; + ERL_NIF_TERM atom_full; + ERL_NIF_TERM atom_max_elems; + ERL_NIF_TERM atom_heap_size; + ERL_NIF_TERM atom_too_small; + ErlNifResourceType* res_hqueue; +} hqueue_priv; + + +typedef struct +{ + ErlNifEnv* env; + ERL_NIF_TERM value; +} hqnode_nif_t; + + +typedef struct +{ + int version; + uint64_t gen; + hqueue_t* hqueue; + ErlNifPid p; +} hqueue_nif_t; + + +static const uint32_t default_max_elems = UINT32_MAX-1; +static const uint32_t default_heap_size = 1024; + + +static inline ERL_NIF_TERM +make_atom(ErlNifEnv* env, const char* name) +{ + ERL_NIF_TERM ret; + if(enif_make_existing_atom(env, name, &ret, ERL_NIF_LATIN1)) { + return ret; + } + return enif_make_atom(env, name); +} + + +static inline ERL_NIF_TERM +make_ok(ErlNifEnv* env, hqueue_priv* priv, ERL_NIF_TERM value) +{ + return enif_make_tuple2(env, priv->atom_ok, value); +} + + +static inline ERL_NIF_TERM +make_error(ErlNifEnv* env, hqueue_priv* priv, ERL_NIF_TERM reason) +{ + return enif_make_tuple2(env, priv->atom_error, reason); +} + + +static inline int +check_pid(ErlNifEnv* env, hqueue_nif_t* hqueue_nif) +{ + ErlNifPid pid; + enif_self(env, &pid); + + if(enif_compare(pid.pid, hqueue_nif->p.pid) == 0) { + return 1; + } + + return 0; +} + + +void +hqueue_nif_node_free(hqnode_nif_t* hqnode_nif) +{ + enif_free_env(hqnode_nif->env); + enif_free(hqnode_nif); + + return; +} + + +void +hqueue_nif_node_free_ext(void* node) +{ + hqueue_nif_node_free((hqnode_nif_t*) node); + + return; +} + + +hqnode_nif_t* +hqueue_nif_node_alloc() +{ + hqnode_nif_t* node = (hqnode_nif_t*) enif_alloc(sizeof(hqnode_nif_t*)); + + memset(node, 0, sizeof(hqnode_nif_t)); + + node->env = enif_alloc_env(); + + return node; +} + + +static int +get_uint_param(ErlNifEnv* env, ERL_NIF_TERM value, ERL_NIF_TERM atom, uint32_t* p) +{ + const ERL_NIF_TERM* tuple; + int arity; + + if(!enif_get_tuple(env, value, &arity, &tuple)) { + return 0; + } + + if(arity != 2) { + return 0; + } + + if(enif_compare(tuple[0], atom) != 0) { + return 0; + } + + if(!enif_get_uint(env, tuple[1], p)) { + return 0; + } + + return 1; +} + + +static inline hqueue_nif_t* +hqueue_nif_create_int(ErlNifEnv* env, hqueue_priv* priv, uint32_t max_elems, + uint32_t heap_size) +{ + hqueue_nif_t* hqueue_nif = NULL; + + assert(priv != NULL && "missing private data member"); + + hqueue_nif = (hqueue_nif_t*) enif_alloc_resource( + priv->res_hqueue, sizeof(hqueue_nif_t)); + memset(hqueue_nif, 0, sizeof(hqueue_nif_t)); + hqueue_nif->version = HQ_VERSION; + + hqueue_nif->hqueue = hqueue_new(max_elems, heap_size); + + if(hqueue_nif->hqueue == NULL ) { + enif_release_resource(hqueue_nif); + return NULL; + } + + enif_self(env, &(hqueue_nif->p)); + + return hqueue_nif; +} + + +static ERL_NIF_TERM +hqueue_nif_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + hqueue_priv* priv = enif_priv_data(env); + hqueue_nif_t* hqueue_nif; + ERL_NIF_TERM ret; + ERL_NIF_TERM opts; + ERL_NIF_TERM value; + uint32_t max_elems = default_max_elems; + uint32_t heap_size = default_heap_size; + + if(argc != 1) { + return enif_make_badarg(env); + } + + opts = argv[0]; + if(!enif_is_list(env, opts)) { + return enif_make_badarg(env); + } + + while(enif_get_list_cell(env, opts, &value, &opts)) { + if(get_uint_param(env, value, priv->atom_max_elems, &max_elems)) { + continue; + } else if(get_uint_param(env, value, priv->atom_heap_size, &heap_size)) { + continue; + } else { + return enif_make_badarg(env); + } + } + + hqueue_nif = hqueue_nif_create_int(env, priv, max_elems, heap_size); + if(hqueue_nif == NULL) { + return enif_make_badarg(env); + } + + ret = enif_make_resource(env, hqueue_nif); + enif_release_resource(hqueue_nif); + + return make_ok(env, priv, ret); +} + + +static void +hqueue_nif_free(ErlNifEnv* env, void* obj) +{ + hqueue_nif_t* hqueue_nif = (hqueue_nif_t*) obj; + + hqueue_free2(hqueue_nif->hqueue, hqueue_nif_node_free_ext); + + return; +} + + +static ERL_NIF_TERM +hqueue_nif_extract_max(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + hqueue_priv* priv = enif_priv_data(env); + hqueue_nif_t* hqueue_nif; + hqnode_nif_t* hqnode_nif; + double tmp_priority; + ERL_NIF_TERM ret; + ERL_NIF_TERM priority; + ERL_NIF_TERM value; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!check_pid(env, hqueue_nif)) { + return enif_make_badarg(env); + } + + if (!hqueue_extract_max(hqueue_nif->hqueue, &tmp_priority, (void**) &hqnode_nif)) { + return make_error(env, priv, priv->atom_empty); + } + + priority = enif_make_double(env, tmp_priority); + value = enif_make_copy(env, hqnode_nif->value); + ret = enif_make_tuple2(env, priority, value); + + hqueue_nif_node_free(hqnode_nif); + + return ret; +} + + +static ERL_NIF_TERM +hqueue_nif_insert(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + hqueue_priv* priv = enif_priv_data(env); + hqueue_nif_t* hqueue_nif; + hqnode_nif_t* hqnode_nif; + ERL_NIF_TERM ret; + double priority; + + if(argc != 3) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!check_pid(env, hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!enif_get_double(env, argv[1], &priority)) { + return enif_make_badarg(env); + } + + if(priority < 0.0) { + return enif_make_badarg(env); + } + + hqnode_nif = hqueue_nif_node_alloc(); + hqnode_nif->value = enif_make_copy(hqnode_nif->env, argv[2]); + + if (!hqueue_insert(hqueue_nif->hqueue, priority, (void*) hqnode_nif)) { + return make_error(env, priv, priv->atom_full); + } + + ret = priv->atom_ok; + + return ret; +} + + +static ERL_NIF_TERM +hqueue_nif_size(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + hqueue_priv* priv = enif_priv_data(env); + hqueue_nif_t* hqueue_nif; + ERL_NIF_TERM ret; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!check_pid(env, hqueue_nif)) { + return enif_make_badarg(env); + } + + ret = enif_make_uint64(env, hqueue_size(hqueue_nif->hqueue)); + + return ret; +} + + +static ERL_NIF_TERM +hqueue_nif_heap_size(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + hqueue_priv* priv = enif_priv_data(env); + hqueue_nif_t* hqueue_nif; + ERL_NIF_TERM ret; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!check_pid(env, hqueue_nif)) { + return enif_make_badarg(env); + } + + ret = enif_make_uint64(env, hqueue_heap_size(hqueue_nif->hqueue)); + + return ret; +} + + +static ERL_NIF_TERM +hqueue_nif_max_elems(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + hqueue_priv* priv = enif_priv_data(env); + hqueue_nif_t* hqueue_nif; + ERL_NIF_TERM ret; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!check_pid(env, hqueue_nif)) { + return enif_make_badarg(env); + } + + ret = enif_make_uint64(env, hqueue_max_elems(hqueue_nif->hqueue)); + + return ret; +} + + +static ERL_NIF_TERM +hqueue_nif_to_list(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + hqueue_priv* priv = enif_priv_data(env); + hqueue_nif_t* hqueue_nif; + hqueue_t* hqueue; + hqnode_nif_t* hqnode_nif; + double tmp_priority; + ERL_NIF_TERM ret = enif_make_list(env, 0); + ERL_NIF_TERM priority; + ERL_NIF_TERM value; + ERL_NIF_TERM tuple; + uint32_t i; + + if(argc != 1) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!check_pid(env, hqueue_nif)) { + return enif_make_badarg(env); + } + + hqueue = hqueue_nif->hqueue; + + for (i = 1; i <= hqueue_size(hqueue); i++) { + hqueue_get_elem(hqueue, i, &tmp_priority, (void **) &hqnode_nif); + priority = enif_make_double(env, tmp_priority); + value = enif_make_copy(env, hqnode_nif->value); + tuple = enif_make_tuple2(env, priority, value); + ret = enif_make_list_cell(env, tuple, ret); + } + + return ret; +} + + +static ERL_NIF_TERM +hqueue_nif_scale_by(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + hqueue_priv* priv = enif_priv_data(env); + hqueue_nif_t* hqueue_nif; + ERL_NIF_TERM ret; + double factor; + + if(argc != 2) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!check_pid(env, hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!enif_get_double(env, argv[1], &factor)) { + return enif_make_badarg(env); + } + + if(factor < 0.0) { + return enif_make_badarg(env); + } + + hqueue_scale_by(hqueue_nif->hqueue, factor); + + ret = priv->atom_ok; + + return ret; +} + + +static ERL_NIF_TERM +hqueue_nif_resize_heap(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + hqueue_priv* priv = enif_priv_data(env); + hqueue_nif_t* hqueue_nif; + ERL_NIF_TERM ret; + uint32_t new_heap_size; + uint32_t old_heap_size; + + if(argc != 2) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!check_pid(env, hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!enif_get_uint(env, argv[1], &new_heap_size)) { + return enif_make_badarg(env); + } + + if(hqueue_size(hqueue_nif->hqueue) > new_heap_size) { + return make_error(env, priv, priv->atom_too_small); + } + + if((old_heap_size = hqueue_resize_heap(hqueue_nif->hqueue, new_heap_size)) == 0) { + return enif_make_badarg(env); + } + + ret = enif_make_uint64(env, old_heap_size); + + return ret; +} + + +static ERL_NIF_TERM +hqueue_nif_set_max_elems(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + hqueue_priv* priv = enif_priv_data(env); + hqueue_nif_t* hqueue_nif; + ERL_NIF_TERM ret; + uint32_t new_max_elems; + uint32_t old_max_elems; + + if(argc != 2) { + return enif_make_badarg(env); + } + + if(!enif_get_resource(env, argv[0], priv->res_hqueue, (void**) &hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!check_pid(env, hqueue_nif)) { + return enif_make_badarg(env); + } + + if(!enif_get_uint(env, argv[1], &new_max_elems)) { + return enif_make_badarg(env); + } + + if(hqueue_size(hqueue_nif->hqueue) > new_max_elems) { + return make_error(env, priv, priv->atom_too_small); + } + + if ((old_max_elems = hqueue_set_max_elems(hqueue_nif->hqueue, new_max_elems)) == 0) { + return enif_make_badarg(env); + } + + ret = enif_make_uint64(env, old_max_elems); + + return ret; +} + + +static int +load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info) +{ + int flags = ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER; + ErlNifResourceType* res; + + hqueue_priv* new_priv = (hqueue_priv*) enif_alloc(sizeof(hqueue_priv)); + if(new_priv == NULL) { + return 1; + } + + res = enif_open_resource_type( + env, NULL, "hqueue", hqueue_nif_free, flags, NULL); + if(res == NULL) { + enif_free(new_priv); + return 1; + } + new_priv->res_hqueue = res; + + new_priv->atom_ok = make_atom(env, "ok"); + new_priv->atom_error = make_atom(env, "error"); + new_priv->atom_value = make_atom(env, "value"); + new_priv->atom_empty = make_atom(env, "empty"); + new_priv->atom_full = make_atom(env, "full"); + new_priv->atom_max_elems = make_atom(env, "max_elems"); + new_priv->atom_heap_size = make_atom(env, "heap_size"); + new_priv->atom_too_small = make_atom(env, "too_small"); + + *priv = (void*) new_priv; + + return 0; +} + + +static int +upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM info) +{ + return load(env, priv, info); +} + + +static void +unload(ErlNifEnv* env, void* priv) +{ + enif_free(priv); + return; +} + + +static ErlNifFunc funcs[] = { + {"new", 1, hqueue_nif_new}, + {"extract_max", 1, hqueue_nif_extract_max}, + {"insert", 3, hqueue_nif_insert}, + {"size", 1, hqueue_nif_size}, + {"heap_size", 1, hqueue_nif_heap_size}, + {"max_elems", 1, hqueue_nif_max_elems}, + {"set_max_elems", 2, hqueue_nif_set_max_elems}, + {"to_list", 1, hqueue_nif_to_list}, + {"scale_by", 2, hqueue_nif_scale_by}, + {"resize_heap", 2, hqueue_nif_resize_heap} +}; + + +ERL_NIF_INIT(hqueue, funcs, &load, NULL, &upgrade, &unload); diff --git a/c_src/couchdb_hqueue/rebar.config b/c_src/couchdb_hqueue/rebar.config new file mode 100644 index 0000000..82d6eaf --- /dev/null +++ b/c_src/couchdb_hqueue/rebar.config @@ -0,0 +1,13 @@ +{port_specs, [ + {"../../priv/hqueue.so", ["hqueue*.c"]} +]}. + + +{port_env, [ + {"(linux|solaris|darwin|freebsd)", "CFLAGS", "$CFLAGS -g -Wall -Werror -DHQ_ENIF_ALLOC -O3"}, + {"win32", "CFLAGS", "$CFLAGS /O2 /DNDEBUG /DHQ_ENIF_ALLOC /Dinline=__inline /Wall"} + %% {".*", "CFLAGS", "$CFLAGS -g -Wall -Werror -Wextra"} +]}. + + + diff --git a/c_src/couchdb_hqueue/valgrind_sample.c b/c_src/couchdb_hqueue/valgrind_sample.c new file mode 100644 index 0000000..3c78da5 --- /dev/null +++ b/c_src/couchdb_hqueue/valgrind_sample.c @@ -0,0 +1,72 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#include +#include +#include + +#include "hqueue.h" + + +// Simple test script to stress the public HQueue API. +// Primary use case is for running this under Valgrind. +int main(void) +{ + int str_len = 100; + int iterations = 1000; + uint32_t max_elems = 1024; + uint32_t heap_size = 64; + hqueue_t* hq = hqueue_new(max_elems, heap_size); + double priority; + double priority_res; + char* val; + char* val_res; + int i; + + assert(max_elems == hqueue_max_elems(hq)); + assert(heap_size == hqueue_heap_size(hq)); + + for(i = 0; i < iterations; i++) { + priority = 1234.4321 * i; + val = (char*) malloc(str_len + 1); + + if(val == NULL) { + return 1; + } + + assert(hqueue_size(hq) == i); + + if(snprintf(val, str_len + 1, "Fun string #%d\n", i)) { + if(!hqueue_insert(hq, priority, val)) { + return 1; + } + } else { + return 1; + } + } + + hqueue_scale_by(hq, 3.7); + + // Added 1000 elements, so heap size should have expanded to 1024 + assert(max_elems == hqueue_max_elems(hq)); + assert(max_elems == hqueue_heap_size(hq)); + + if(!hqueue_extract_max(hq, &priority_res, (void**) &val_res)) { + return 1; + } + free(val_res); + + hqueue_free2(hq, free); + + return 0; +} + diff --git a/c_src/cq/cq_nif.c b/c_src/cq/cq_nif.c deleted file mode 100644 index 2f26a20..0000000 --- a/c_src/cq/cq_nif.c +++ /dev/null @@ -1,564 +0,0 @@ -#include -#include - -#include "erl_nif.h" -#include "cq_nif.h" - - -/* #ifndef ERL_NIF_DIRTY_SCHEDULER_SUPPORT -# error Requires dirty schedulers -#endif */ - - - - - -ERL_NIF_TERM -mk_atom(ErlNifEnv* env, const char* atom) -{ - ERL_NIF_TERM ret; - - if(!enif_make_existing_atom(env, atom, &ret, ERL_NIF_LATIN1)) - return enif_make_atom(env, atom); - - return ret; -} - -ERL_NIF_TERM -mk_error(ErlNifEnv* env, const char* mesg) -{ - return enif_make_tuple2(env, mk_atom(env, "error"), mk_atom(env, mesg)); -} - - -static ERL_NIF_TERM -queue_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) -{ - cq_t *q = enif_alloc_resource(CQ_RESOURCE, sizeof(cq_t)); - if (q == NULL) - return mk_error(env, "priv_alloc_error"); - - ERL_NIF_TERM ret = enif_make_resource(env, q); - /* enif_release_resource(ret); */ - - uint32_t queue_id = 0; - uint32_t queue_size = 0; - uint32_t overflow_size = 0; - - if (!enif_get_uint(env, argv[0], &queue_id) || - !enif_get_uint(env, argv[1], &queue_size) || - !enif_get_uint(env, argv[2], &overflow_size)) - return mk_error(env, "badarg"); - - if (queue_id > 8) - return mk_error(env, "bad_queue_id"); - - /* TODO: Check that queue_size is power of 2 */ - - if (QUEUES[queue_id] != NULL) - return mk_error(env, "queue_id_already_exists"); - - q->id = queue_id; - q->queue_size = queue_size; - q->overflow_size = overflow_size; - q->tail = 0; - q->head = 0; - q->slots_states = calloc(q->queue_size, CACHE_LINE_SIZE); - q->slots_terms = calloc(q->queue_size, CACHE_LINE_SIZE); - q->slots_envs = calloc(q->queue_size, CACHE_LINE_SIZE); - q->overflow_terms = calloc(q->overflow_size, CACHE_LINE_SIZE); - q->overflow_envs = calloc(q->queue_size, CACHE_LINE_SIZE); - - q->push_queue = new_queue(); - q->pop_queue = new_queue(); - - /* TODO: Check calloc return */ - - - for (int i = 0; i < q->queue_size; i++) { - ErlNifEnv *slot_env = enif_alloc_env(); - - q->slots_envs[i*CACHE_LINE_SIZE] = slot_env; - //q->overflow_envs[i*CACHE_LINE_SIZE] = (ErlNifEnv *) enif_alloc_env(); - } - - QUEUES[q->id] = q; - - return enif_make_tuple2(env, mk_atom(env, "ok"), ret); -} - - -static ERL_NIF_TERM -queue_free(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) -{ - uint32_t queue_id = 0; - - if (!enif_get_uint(env, argv[0], &queue_id)) - return mk_error(env, "badarg"); - - if (queue_id > 8) - return mk_error(env, "badarg"); - - cq_t *q = QUEUES[queue_id]; - if (q == NULL) - return mk_error(env, "bad_queue_id"); - - - /* TODO: Free all the things! */ - QUEUES[queue_id] = NULL; - - return enif_make_atom(env, "ok"); - -} - -/* Push to the head of the queue. */ -static ERL_NIF_TERM -queue_push(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) -{ - uint32_t queue_id = 0; - - if (!enif_get_uint(env, argv[0], &queue_id)) - return mk_error(env, "badarg"); - - if (queue_id > 8) - return mk_error(env, "badarg"); - - /* Load the queue */ - cq_t *q = QUEUES[queue_id]; - if (q == NULL) - return mk_error(env, "bad_queue_id"); - - if (q->id != queue_id) - return mk_error(env, "not_identical_queue_id"); - - - for (int i = 0; i < q->queue_size; i++) { - fprintf(stderr, "queue slot %d, index %d, state %d\n", - i, i*CACHE_LINE_SIZE, q->slots_states[i*CACHE_LINE_SIZE]); - } - - /* If there's consumers waiting, the queue must be empty and we - should directly pick a consumer to notify. */ - - ErlNifPid *waiting_consumer; - int dequeue_ret = dequeue(q->pop_queue, &waiting_consumer); - if (dequeue_ret) { - ErlNifEnv *msg_env = enif_alloc_env(); - ERL_NIF_TERM copy = enif_make_copy(msg_env, argv[1]); - ERL_NIF_TERM tuple = enif_make_tuple2(msg_env, mk_atom(env, "pop"), copy); - - if (enif_send(env, waiting_consumer, msg_env, tuple)) { - enif_free_env(msg_env); - return mk_atom(env, "ok"); - } else { - return mk_error(env, "notify_failed"); - } - } - - - - /* Increment head and attempt to claim the slot by marking it as - busy. This ensures no other thread will attempt to modify this - slot. If we cannot lock it, another thread must have */ - - uint64_t head = __sync_add_and_fetch(&q->head, 1); - size_t size = q->queue_size; - - while (1) { - uint64_t index = SLOT_INDEX(head, size); - uint64_t ret = __sync_val_compare_and_swap(&q->slots_states[index], - STATE_EMPTY, - STATE_WRITE); - - switch (ret) { - - case STATE_EMPTY: - head = __sync_add_and_fetch(&q->head, 1); - - case STATE_WRITE: - /* We acquired the write lock, go ahead with the write. */ - break; - - case STATE_FULL: - /* We have caught up with the tail and the buffer is - full. Block the producer until a consumer reads the - item. */ - return mk_error(env, "full_not_implemented"); - } - } - - /* If head catches up with tail, the queue is full. Add to - overflow instead */ - - - /* Copy term to slot-specific temporary process env. */ - ERL_NIF_TERM copy = enif_make_copy(q->slots_envs[SLOT_INDEX(head, size)], argv[1]); - q->slots_terms[SLOT_INDEX(head, size)] = copy; - - __sync_synchronize(); /* Or compiler memory barrier? */ - - - /* TODO: Do we need to collect garbage? */ - - - /* Mark the slot ready to be consumed */ - if (__sync_bool_compare_and_swap(&q->slots_states[SLOT_INDEX(head, size)], - STATE_WRITE, - STATE_FULL)) { - return mk_atom(env, "ok"); - } else { - return mk_error(env, "could_not_update_slots_after_insert"); - } - -} - - - -static ERL_NIF_TERM -queue_async_pop(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) -{ - /* Load queue */ - - uint32_t queue_id = 0; - - if (!enif_get_uint(env, argv[0], &queue_id)) - return mk_error(env, "badarg"); - - if (queue_id > 8) - return mk_error(env, "badarg"); - - cq_t *q = QUEUES[queue_id]; - if (q == NULL) - return mk_error(env, "bad_queue_id"); - - if (q->id != queue_id) - return mk_error(env, "not_identical_queue_id"); - - uint64_t qsize = q->queue_size; - uint64_t tail = q->tail; - uint64_t num_busy = 0; - - /* Walk the buffer starting the tail position until we are either - able to consume a term or find an empty slot. */ - while (1) { - uint64_t index = SLOT_INDEX(tail, qsize); - uint64_t ret = __sync_val_compare_and_swap(&q->slots_states[index], - STATE_FULL, - STATE_READ); - - if (ret == STATE_READ) { - /* We were able to mark the term as read in progress. We - now have an exclusive lock. */ - break; - - } else if (ret == STATE_WRITE) { - /* We found an item with a write in progress. If that - thread progresses, it will eventually mark the slot as - full. We can spin until that happens. - - This can take an arbitrary amount of time and multiple - reading threads will compete for the same slot. - - Instead we add the caller to the queue of blocking - consumers. When the next producer comes it will "help" - this thread by calling enif_send on the current - in-progress term *and* handle it's own terms. If - there's no new push to the queue, this will block - forever. */ - return mk_atom(env, "write_in_progress_not_implemented"); - - } else if (ret == STATE_EMPTY) { - /* We found an empty item. Queue must be empty. Add - calling Erlang consumer process to queue of waiting - processes. When the next producer comes along, it first - checks the waiting consumers and calls enif_send - instead of writing to the slots. */ - - ErlNifPid *pid = enif_alloc(sizeof(ErlNifPid)); - pid = enif_self(env, pid); - enqueue(q->pop_queue, pid); - - return mk_atom(env, "wait_for_msg"); - - } else { - tail = __sync_add_and_fetch(&q->tail, 1); - } - } - - - /* Copy term into calling process env. The NIF env can now be - gargbage collected. */ - ERL_NIF_TERM copy = enif_make_copy(env, q->slots_terms[SLOT_INDEX(tail, qsize)]); - - - /* Mark the slot as free. Note: We don't increment the tail - position here, as another thread also walking the buffer might - have incremented it multiple times */ - q->slots_terms[SLOT_INDEX(tail, qsize)] = 0; - if (__sync_bool_compare_and_swap(&q->slots_states[SLOT_INDEX(tail, qsize)], - STATE_READ, - STATE_EMPTY)) { - return enif_make_tuple2(env, mk_atom(env, "ok"), copy); - } else { - return mk_error(env, "could_not_update_slots_after_pop"); - } -} - - -static ERL_NIF_TERM -queue_debug(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) -{ - - uint32_t queue_id = 0; - - if (!enif_get_uint(env, argv[0], &queue_id)) - return mk_error(env, "badarg"); - - if (queue_id > 8) - return mk_error(env, "badarg"); - - cq_t *q = QUEUES[queue_id]; - if (q == NULL) - return mk_error(env, "bad_queue_id"); - - - - ERL_NIF_TERM *slots_states = enif_alloc(sizeof(ERL_NIF_TERM) * q->queue_size); - ERL_NIF_TERM *slots_terms = enif_alloc(sizeof(ERL_NIF_TERM) * q->queue_size); - for (int i = 0; i < q->queue_size; i++) { - slots_states[i] = enif_make_int(env, q->slots_states[i * CACHE_LINE_SIZE]); - - if (q->slots_terms[i * CACHE_LINE_SIZE] == 0) { - slots_terms[i] = mk_atom(env, "null"); - } else { - slots_terms[i] = enif_make_copy(env, q->slots_terms[i * CACHE_LINE_SIZE]); - } - } - return enif_make_tuple4(env, - enif_make_uint64(env, q->tail), - enif_make_uint64(env, q->head), - enif_make_list_from_array(env, slots_states, q->queue_size), - enif_make_list_from_array(env, slots_terms, q->queue_size)); -} - -static ERL_NIF_TERM -queue_debug_poppers(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) -{ - uint32_t queue_id = 0; - - if (!enif_get_uint(env, argv[0], &queue_id)) - return mk_error(env, "badarg"); - - if (queue_id > 8) - return mk_error(env, "badarg"); - - cq_t *q = QUEUES[queue_id]; - if (q == NULL) - return mk_error(env, "bad_queue_id"); - - - uint64_t pop_queue_size = 0; - cq_node_t *node = q->pop_queue->head; - if (node->value == NULL) { - node = node->next; - node = Q_PTR(node); - } - - while (node != NULL) { - pop_queue_size++; - node = node->next; - node = Q_PTR(node); - } - - ERL_NIF_TERM *pop_queue_pids = enif_alloc(sizeof(ERL_NIF_TERM) * pop_queue_size); - - node = q->pop_queue->head; - node = Q_PTR(node); - if (node->value == NULL) { - node = node->next; - node = Q_PTR(node); - } - - uint64_t i = 0; - while (node != NULL) { - if (node->value == 0) { - pop_queue_pids[i] = mk_atom(env, "null"); - } - else { - pop_queue_pids[i] = enif_make_pid(env, node->value); - } - - i++; - node = node->next; - node = Q_PTR(node); - } - - ERL_NIF_TERM list = enif_make_list_from_array(env, pop_queue_pids, pop_queue_size); - enif_free(pop_queue_pids); - - return list; -} - - - -static ERL_NIF_TERM -print_bits(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) -{ - - uint64_t *p1 = malloc(8); - *p1 = 0; - - - for (int bit = 63; bit >= 0; bit--) { - uint64_t power = 1 << bit; - //uint64_t byte = *p1; - uint64_t byte = p1; - fprintf(stderr, "%d", (byte & power) >> bit); - } - fprintf(stderr, "\n"); - - //enif_free(p1); - - return mk_atom(env, "ok"); -} - -void free_resource(ErlNifEnv* env, void* arg) -{ - //cq_t *cq = (cq_t *) arg; - - fprintf(stderr, "free_resource\n"); -} - - -cq_queue_t * new_queue() -{ - cq_queue_t *queue = enif_alloc(sizeof(cq_queue_t)); - cq_node_t *node = enif_alloc(sizeof(cq_node_t)); - node->next = NULL; - //node->env = NULL; - node->value = NULL; - queue->head = node; - queue->tail = node; - - return queue; -} - - - -void enqueue(cq_queue_t *queue, ErlNifPid *pid) -{ - cq_node_t *node = enif_alloc(sizeof(cq_node_t)); - //node->env = enif_alloc_env(); - //node->term = enif_make_copy(node->env, term); - node->value = pid; - node->next = NULL; - fprintf(stderr, "node %lu\n", node); - - cq_node_t *tail = NULL; - uint64_t tail_count = 0; - while (1) { - tail = queue->tail; - cq_node_t *tail_ptr = Q_PTR(tail); - tail_count = Q_COUNT(tail); - - cq_node_t *next = tail->next; - cq_node_t *next_ptr = Q_PTR(next); - uint64_t next_count = Q_COUNT(next); - - if (tail == queue->tail) { - fprintf(stderr, "tail == queue->tail\n"); - if (next_ptr == NULL) { - fprintf(stderr, "next_ptr == NULL\n"); - if (__sync_bool_compare_and_swap(&tail_ptr->next, - next, - Q_SET_COUNT(node, next_count+1))) - fprintf(stderr, "CAS(tail_ptr->next, next, (node, next_count+1)) -> true\n"); - break; - } else { - __sync_bool_compare_and_swap(&queue->tail, - tail, - Q_SET_COUNT(next_ptr, next_count+1)); - fprintf(stderr, "CAS(queue->tail, tail, (next_ptr, next_count+1))\n"); - } - } - } - - cq_node_t *node_with_count = Q_SET_COUNT(node, tail_count+1); - int ret = __sync_bool_compare_and_swap(&queue->tail, - tail, - node_with_count); - fprintf(stderr, "CAS(queue->tail, tail, %lu) -> %d\n", node_with_count, ret); -} - - -int dequeue(cq_queue_t *queue, ErlNifPid **pid) -{ - fprintf(stderr, "dequeue\n"); - cq_node_t *head, *head_ptr, *tail, *tail_ptr, *next, *next_ptr; - - while (1) { - head = queue->head; - head_ptr = Q_PTR(head); - tail = queue->tail; - tail_ptr = Q_PTR(tail); - next = head->next; - next_ptr = Q_PTR(next); - fprintf(stderr, "head %lu, tail %lu, next %lu\n", head, tail, next); - - if (head == queue->head) { - if (head_ptr == tail_ptr) { - if (next_ptr == NULL) { - return 0; /* Queue is empty */ - } - fprintf(stderr, "CAS(queue->tail, tail, (next_ptr, tail+1))\n"); - __sync_bool_compare_and_swap(&queue->tail, - tail, - Q_SET_COUNT(next_ptr, Q_COUNT(tail)+1)); - } else { - fprintf(stderr, "next->value %lu\n", next_ptr->value); - *pid = next_ptr->value; - fprintf(stderr, "CAS(queue->head, head, (next_ptr, head+1))\n"); - if (__sync_bool_compare_and_swap(&queue->head, - head, - Q_SET_COUNT(next_ptr, Q_COUNT(head)+1))) - break; - } - } - } - // free pid - //enif_free(Q_PTR(head)); - return 1; -} - - - - -int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info) { - /* Initialize global array mapping id to cq_t ptr */ - QUEUES = (cq_t **) calloc(8, sizeof(cq_t **)); - if (QUEUES == NULL) - return -1; - - - ErlNifResourceFlags flags = (ErlNifResourceFlags)(ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER); - CQ_RESOURCE = enif_open_resource_type(env, "cq", "cq", - &free_resource, flags, NULL); - - if (CQ_RESOURCE == NULL) - return -1; - - return 0; -} - - -static ErlNifFunc nif_funcs[] = { - {"new" , 3, queue_new}, - {"free" , 1, queue_free}, - {"push" , 2, queue_push}, - {"async_pop", 1, queue_async_pop}, - {"debug" , 1, queue_debug}, - {"debug_poppers", 1, queue_debug_poppers}, - {"print_bits", 0, print_bits} -}; - -ERL_NIF_INIT(cq, nif_funcs, load, NULL, NULL, NULL); diff --git a/c_src/cq/cq_nif.h b/c_src/cq/cq_nif.h deleted file mode 100644 index 75f8891..0000000 --- a/c_src/cq/cq_nif.h +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include "erl_nif.h" - - -#define CACHE_LINE_SIZE 64 - -#define SLOT_INDEX(__index, __size) __index & (__size - 1) - -#define Q_MASK 3L -#define Q_PTR(__ptr) (cq_node_t *) (((uint64_t)__ptr) & (~Q_MASK)) -#define Q_COUNT(__ptr) ((uint64_t) __ptr & Q_MASK) -#define Q_SET_COUNT(__ptr, __val) (cq_node_t *) ((uint64_t) __ptr | (__val & Q_MASK)) - - -#define STATE_EMPTY 0 -#define STATE_WRITE 1 -#define STATE_READ 2 -#define STATE_FULL 3 - - -ErlNifResourceType* CQ_RESOURCE; - -typedef struct cq_node cq_node_t; - -struct cq_node { - ErlNifEnv *env; - //ERL_NIF_TERM term; - ErlNifPid *value; - cq_node_t *next; -}; - - - -typedef struct cq_queue { - cq_node_t *head; - cq_node_t *tail; -} cq_queue_t; - - -// TODO: Add padding between the fields -typedef struct cq { - uint32_t id; - uint64_t queue_size; - uint64_t overflow_size; - uint64_t head; - uint64_t tail; - - uint8_t *slots_states; - ERL_NIF_TERM *slots_terms; - ErlNifEnv **slots_envs; - - cq_queue_t *push_queue; - cq_queue_t *pop_queue; - - uint8_t *overflow_states; - ERL_NIF_TERM *overflow_terms; - ErlNifEnv **overflow_envs; - -} cq_t; - -cq_t **QUEUES = NULL; /* Initialized on nif load */ - - -ERL_NIF_TERM mk_atom(ErlNifEnv* env, const char* atom); -ERL_NIF_TERM mk_error(ErlNifEnv* env, const char* msg); -int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info); -void free_resource(ErlNifEnv*, void*); - - -cq_queue_t* new_queue(void); -void enqueue(cq_queue_t *q, ErlNifPid *pid); diff --git a/c_src/cq1/cq_nif.c b/c_src/cq1/cq_nif.c deleted file mode 100644 index 2f26a20..0000000 --- a/c_src/cq1/cq_nif.c +++ /dev/null @@ -1,564 +0,0 @@ -#include -#include - -#include "erl_nif.h" -#include "cq_nif.h" - - -/* #ifndef ERL_NIF_DIRTY_SCHEDULER_SUPPORT -# error Requires dirty schedulers -#endif */ - - - - - -ERL_NIF_TERM -mk_atom(ErlNifEnv* env, const char* atom) -{ - ERL_NIF_TERM ret; - - if(!enif_make_existing_atom(env, atom, &ret, ERL_NIF_LATIN1)) - return enif_make_atom(env, atom); - - return ret; -} - -ERL_NIF_TERM -mk_error(ErlNifEnv* env, const char* mesg) -{ - return enif_make_tuple2(env, mk_atom(env, "error"), mk_atom(env, mesg)); -} - - -static ERL_NIF_TERM -queue_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) -{ - cq_t *q = enif_alloc_resource(CQ_RESOURCE, sizeof(cq_t)); - if (q == NULL) - return mk_error(env, "priv_alloc_error"); - - ERL_NIF_TERM ret = enif_make_resource(env, q); - /* enif_release_resource(ret); */ - - uint32_t queue_id = 0; - uint32_t queue_size = 0; - uint32_t overflow_size = 0; - - if (!enif_get_uint(env, argv[0], &queue_id) || - !enif_get_uint(env, argv[1], &queue_size) || - !enif_get_uint(env, argv[2], &overflow_size)) - return mk_error(env, "badarg"); - - if (queue_id > 8) - return mk_error(env, "bad_queue_id"); - - /* TODO: Check that queue_size is power of 2 */ - - if (QUEUES[queue_id] != NULL) - return mk_error(env, "queue_id_already_exists"); - - q->id = queue_id; - q->queue_size = queue_size; - q->overflow_size = overflow_size; - q->tail = 0; - q->head = 0; - q->slots_states = calloc(q->queue_size, CACHE_LINE_SIZE); - q->slots_terms = calloc(q->queue_size, CACHE_LINE_SIZE); - q->slots_envs = calloc(q->queue_size, CACHE_LINE_SIZE); - q->overflow_terms = calloc(q->overflow_size, CACHE_LINE_SIZE); - q->overflow_envs = calloc(q->queue_size, CACHE_LINE_SIZE); - - q->push_queue = new_queue(); - q->pop_queue = new_queue(); - - /* TODO: Check calloc return */ - - - for (int i = 0; i < q->queue_size; i++) { - ErlNifEnv *slot_env = enif_alloc_env(); - - q->slots_envs[i*CACHE_LINE_SIZE] = slot_env; - //q->overflow_envs[i*CACHE_LINE_SIZE] = (ErlNifEnv *) enif_alloc_env(); - } - - QUEUES[q->id] = q; - - return enif_make_tuple2(env, mk_atom(env, "ok"), ret); -} - - -static ERL_NIF_TERM -queue_free(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) -{ - uint32_t queue_id = 0; - - if (!enif_get_uint(env, argv[0], &queue_id)) - return mk_error(env, "badarg"); - - if (queue_id > 8) - return mk_error(env, "badarg"); - - cq_t *q = QUEUES[queue_id]; - if (q == NULL) - return mk_error(env, "bad_queue_id"); - - - /* TODO: Free all the things! */ - QUEUES[queue_id] = NULL; - - return enif_make_atom(env, "ok"); - -} - -/* Push to the head of the queue. */ -static ERL_NIF_TERM -queue_push(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) -{ - uint32_t queue_id = 0; - - if (!enif_get_uint(env, argv[0], &queue_id)) - return mk_error(env, "badarg"); - - if (queue_id > 8) - return mk_error(env, "badarg"); - - /* Load the queue */ - cq_t *q = QUEUES[queue_id]; - if (q == NULL) - return mk_error(env, "bad_queue_id"); - - if (q->id != queue_id) - return mk_error(env, "not_identical_queue_id"); - - - for (int i = 0; i < q->queue_size; i++) { - fprintf(stderr, "queue slot %d, index %d, state %d\n", - i, i*CACHE_LINE_SIZE, q->slots_states[i*CACHE_LINE_SIZE]); - } - - /* If there's consumers waiting, the queue must be empty and we - should directly pick a consumer to notify. */ - - ErlNifPid *waiting_consumer; - int dequeue_ret = dequeue(q->pop_queue, &waiting_consumer); - if (dequeue_ret) { - ErlNifEnv *msg_env = enif_alloc_env(); - ERL_NIF_TERM copy = enif_make_copy(msg_env, argv[1]); - ERL_NIF_TERM tuple = enif_make_tuple2(msg_env, mk_atom(env, "pop"), copy); - - if (enif_send(env, waiting_consumer, msg_env, tuple)) { - enif_free_env(msg_env); - return mk_atom(env, "ok"); - } else { - return mk_error(env, "notify_failed"); - } - } - - - - /* Increment head and attempt to claim the slot by marking it as - busy. This ensures no other thread will attempt to modify this - slot. If we cannot lock it, another thread must have */ - - uint64_t head = __sync_add_and_fetch(&q->head, 1); - size_t size = q->queue_size; - - while (1) { - uint64_t index = SLOT_INDEX(head, size); - uint64_t ret = __sync_val_compare_and_swap(&q->slots_states[index], - STATE_EMPTY, - STATE_WRITE); - - switch (ret) { - - case STATE_EMPTY: - head = __sync_add_and_fetch(&q->head, 1); - - case STATE_WRITE: - /* We acquired the write lock, go ahead with the write. */ - break; - - case STATE_FULL: - /* We have caught up with the tail and the buffer is - full. Block the producer until a consumer reads the - item. */ - return mk_error(env, "full_not_implemented"); - } - } - - /* If head catches up with tail, the queue is full. Add to - overflow instead */ - - - /* Copy term to slot-specific temporary process env. */ - ERL_NIF_TERM copy = enif_make_copy(q->slots_envs[SLOT_INDEX(head, size)], argv[1]); - q->slots_terms[SLOT_INDEX(head, size)] = copy; - - __sync_synchronize(); /* Or compiler memory barrier? */ - - - /* TODO: Do we need to collect garbage? */ - - - /* Mark the slot ready to be consumed */ - if (__sync_bool_compare_and_swap(&q->slots_states[SLOT_INDEX(head, size)], - STATE_WRITE, - STATE_FULL)) { - return mk_atom(env, "ok"); - } else { - return mk_error(env, "could_not_update_slots_after_insert"); - } - -} - - - -static ERL_NIF_TERM -queue_async_pop(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) -{ - /* Load queue */ - - uint32_t queue_id = 0; - - if (!enif_get_uint(env, argv[0], &queue_id)) - return mk_error(env, "badarg"); - - if (queue_id > 8) - return mk_error(env, "badarg"); - - cq_t *q = QUEUES[queue_id]; - if (q == NULL) - return mk_error(env, "bad_queue_id"); - - if (q->id != queue_id) - return mk_error(env, "not_identical_queue_id"); - - uint64_t qsize = q->queue_size; - uint64_t tail = q->tail; - uint64_t num_busy = 0; - - /* Walk the buffer starting the tail position until we are either - able to consume a term or find an empty slot. */ - while (1) { - uint64_t index = SLOT_INDEX(tail, qsize); - uint64_t ret = __sync_val_compare_and_swap(&q->slots_states[index], - STATE_FULL, - STATE_READ); - - if (ret == STATE_READ) { - /* We were able to mark the term as read in progress. We - now have an exclusive lock. */ - break; - - } else if (ret == STATE_WRITE) { - /* We found an item with a write in progress. If that - thread progresses, it will eventually mark the slot as - full. We can spin until that happens. - - This can take an arbitrary amount of time and multiple - reading threads will compete for the same slot. - - Instead we add the caller to the queue of blocking - consumers. When the next producer comes it will "help" - this thread by calling enif_send on the current - in-progress term *and* handle it's own terms. If - there's no new push to the queue, this will block - forever. */ - return mk_atom(env, "write_in_progress_not_implemented"); - - } else if (ret == STATE_EMPTY) { - /* We found an empty item. Queue must be empty. Add - calling Erlang consumer process to queue of waiting - processes. When the next producer comes along, it first - checks the waiting consumers and calls enif_send - instead of writing to the slots. */ - - ErlNifPid *pid = enif_alloc(sizeof(ErlNifPid)); - pid = enif_self(env, pid); - enqueue(q->pop_queue, pid); - - return mk_atom(env, "wait_for_msg"); - - } else { - tail = __sync_add_and_fetch(&q->tail, 1); - } - } - - - /* Copy term into calling process env. The NIF env can now be - gargbage collected. */ - ERL_NIF_TERM copy = enif_make_copy(env, q->slots_terms[SLOT_INDEX(tail, qsize)]); - - - /* Mark the slot as free. Note: We don't increment the tail - position here, as another thread also walking the buffer might - have incremented it multiple times */ - q->slots_terms[SLOT_INDEX(tail, qsize)] = 0; - if (__sync_bool_compare_and_swap(&q->slots_states[SLOT_INDEX(tail, qsize)], - STATE_READ, - STATE_EMPTY)) { - return enif_make_tuple2(env, mk_atom(env, "ok"), copy); - } else { - return mk_error(env, "could_not_update_slots_after_pop"); - } -} - - -static ERL_NIF_TERM -queue_debug(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) -{ - - uint32_t queue_id = 0; - - if (!enif_get_uint(env, argv[0], &queue_id)) - return mk_error(env, "badarg"); - - if (queue_id > 8) - return mk_error(env, "badarg"); - - cq_t *q = QUEUES[queue_id]; - if (q == NULL) - return mk_error(env, "bad_queue_id"); - - - - ERL_NIF_TERM *slots_states = enif_alloc(sizeof(ERL_NIF_TERM) * q->queue_size); - ERL_NIF_TERM *slots_terms = enif_alloc(sizeof(ERL_NIF_TERM) * q->queue_size); - for (int i = 0; i < q->queue_size; i++) { - slots_states[i] = enif_make_int(env, q->slots_states[i * CACHE_LINE_SIZE]); - - if (q->slots_terms[i * CACHE_LINE_SIZE] == 0) { - slots_terms[i] = mk_atom(env, "null"); - } else { - slots_terms[i] = enif_make_copy(env, q->slots_terms[i * CACHE_LINE_SIZE]); - } - } - return enif_make_tuple4(env, - enif_make_uint64(env, q->tail), - enif_make_uint64(env, q->head), - enif_make_list_from_array(env, slots_states, q->queue_size), - enif_make_list_from_array(env, slots_terms, q->queue_size)); -} - -static ERL_NIF_TERM -queue_debug_poppers(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) -{ - uint32_t queue_id = 0; - - if (!enif_get_uint(env, argv[0], &queue_id)) - return mk_error(env, "badarg"); - - if (queue_id > 8) - return mk_error(env, "badarg"); - - cq_t *q = QUEUES[queue_id]; - if (q == NULL) - return mk_error(env, "bad_queue_id"); - - - uint64_t pop_queue_size = 0; - cq_node_t *node = q->pop_queue->head; - if (node->value == NULL) { - node = node->next; - node = Q_PTR(node); - } - - while (node != NULL) { - pop_queue_size++; - node = node->next; - node = Q_PTR(node); - } - - ERL_NIF_TERM *pop_queue_pids = enif_alloc(sizeof(ERL_NIF_TERM) * pop_queue_size); - - node = q->pop_queue->head; - node = Q_PTR(node); - if (node->value == NULL) { - node = node->next; - node = Q_PTR(node); - } - - uint64_t i = 0; - while (node != NULL) { - if (node->value == 0) { - pop_queue_pids[i] = mk_atom(env, "null"); - } - else { - pop_queue_pids[i] = enif_make_pid(env, node->value); - } - - i++; - node = node->next; - node = Q_PTR(node); - } - - ERL_NIF_TERM list = enif_make_list_from_array(env, pop_queue_pids, pop_queue_size); - enif_free(pop_queue_pids); - - return list; -} - - - -static ERL_NIF_TERM -print_bits(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) -{ - - uint64_t *p1 = malloc(8); - *p1 = 0; - - - for (int bit = 63; bit >= 0; bit--) { - uint64_t power = 1 << bit; - //uint64_t byte = *p1; - uint64_t byte = p1; - fprintf(stderr, "%d", (byte & power) >> bit); - } - fprintf(stderr, "\n"); - - //enif_free(p1); - - return mk_atom(env, "ok"); -} - -void free_resource(ErlNifEnv* env, void* arg) -{ - //cq_t *cq = (cq_t *) arg; - - fprintf(stderr, "free_resource\n"); -} - - -cq_queue_t * new_queue() -{ - cq_queue_t *queue = enif_alloc(sizeof(cq_queue_t)); - cq_node_t *node = enif_alloc(sizeof(cq_node_t)); - node->next = NULL; - //node->env = NULL; - node->value = NULL; - queue->head = node; - queue->tail = node; - - return queue; -} - - - -void enqueue(cq_queue_t *queue, ErlNifPid *pid) -{ - cq_node_t *node = enif_alloc(sizeof(cq_node_t)); - //node->env = enif_alloc_env(); - //node->term = enif_make_copy(node->env, term); - node->value = pid; - node->next = NULL; - fprintf(stderr, "node %lu\n", node); - - cq_node_t *tail = NULL; - uint64_t tail_count = 0; - while (1) { - tail = queue->tail; - cq_node_t *tail_ptr = Q_PTR(tail); - tail_count = Q_COUNT(tail); - - cq_node_t *next = tail->next; - cq_node_t *next_ptr = Q_PTR(next); - uint64_t next_count = Q_COUNT(next); - - if (tail == queue->tail) { - fprintf(stderr, "tail == queue->tail\n"); - if (next_ptr == NULL) { - fprintf(stderr, "next_ptr == NULL\n"); - if (__sync_bool_compare_and_swap(&tail_ptr->next, - next, - Q_SET_COUNT(node, next_count+1))) - fprintf(stderr, "CAS(tail_ptr->next, next, (node, next_count+1)) -> true\n"); - break; - } else { - __sync_bool_compare_and_swap(&queue->tail, - tail, - Q_SET_COUNT(next_ptr, next_count+1)); - fprintf(stderr, "CAS(queue->tail, tail, (next_ptr, next_count+1))\n"); - } - } - } - - cq_node_t *node_with_count = Q_SET_COUNT(node, tail_count+1); - int ret = __sync_bool_compare_and_swap(&queue->tail, - tail, - node_with_count); - fprintf(stderr, "CAS(queue->tail, tail, %lu) -> %d\n", node_with_count, ret); -} - - -int dequeue(cq_queue_t *queue, ErlNifPid **pid) -{ - fprintf(stderr, "dequeue\n"); - cq_node_t *head, *head_ptr, *tail, *tail_ptr, *next, *next_ptr; - - while (1) { - head = queue->head; - head_ptr = Q_PTR(head); - tail = queue->tail; - tail_ptr = Q_PTR(tail); - next = head->next; - next_ptr = Q_PTR(next); - fprintf(stderr, "head %lu, tail %lu, next %lu\n", head, tail, next); - - if (head == queue->head) { - if (head_ptr == tail_ptr) { - if (next_ptr == NULL) { - return 0; /* Queue is empty */ - } - fprintf(stderr, "CAS(queue->tail, tail, (next_ptr, tail+1))\n"); - __sync_bool_compare_and_swap(&queue->tail, - tail, - Q_SET_COUNT(next_ptr, Q_COUNT(tail)+1)); - } else { - fprintf(stderr, "next->value %lu\n", next_ptr->value); - *pid = next_ptr->value; - fprintf(stderr, "CAS(queue->head, head, (next_ptr, head+1))\n"); - if (__sync_bool_compare_and_swap(&queue->head, - head, - Q_SET_COUNT(next_ptr, Q_COUNT(head)+1))) - break; - } - } - } - // free pid - //enif_free(Q_PTR(head)); - return 1; -} - - - - -int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info) { - /* Initialize global array mapping id to cq_t ptr */ - QUEUES = (cq_t **) calloc(8, sizeof(cq_t **)); - if (QUEUES == NULL) - return -1; - - - ErlNifResourceFlags flags = (ErlNifResourceFlags)(ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER); - CQ_RESOURCE = enif_open_resource_type(env, "cq", "cq", - &free_resource, flags, NULL); - - if (CQ_RESOURCE == NULL) - return -1; - - return 0; -} - - -static ErlNifFunc nif_funcs[] = { - {"new" , 3, queue_new}, - {"free" , 1, queue_free}, - {"push" , 2, queue_push}, - {"async_pop", 1, queue_async_pop}, - {"debug" , 1, queue_debug}, - {"debug_poppers", 1, queue_debug_poppers}, - {"print_bits", 0, print_bits} -}; - -ERL_NIF_INIT(cq, nif_funcs, load, NULL, NULL, NULL); diff --git a/c_src/cq1/cq_nif.h b/c_src/cq1/cq_nif.h deleted file mode 100644 index 75f8891..0000000 --- a/c_src/cq1/cq_nif.h +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include "erl_nif.h" - - -#define CACHE_LINE_SIZE 64 - -#define SLOT_INDEX(__index, __size) __index & (__size - 1) - -#define Q_MASK 3L -#define Q_PTR(__ptr) (cq_node_t *) (((uint64_t)__ptr) & (~Q_MASK)) -#define Q_COUNT(__ptr) ((uint64_t) __ptr & Q_MASK) -#define Q_SET_COUNT(__ptr, __val) (cq_node_t *) ((uint64_t) __ptr | (__val & Q_MASK)) - - -#define STATE_EMPTY 0 -#define STATE_WRITE 1 -#define STATE_READ 2 -#define STATE_FULL 3 - - -ErlNifResourceType* CQ_RESOURCE; - -typedef struct cq_node cq_node_t; - -struct cq_node { - ErlNifEnv *env; - //ERL_NIF_TERM term; - ErlNifPid *value; - cq_node_t *next; -}; - - - -typedef struct cq_queue { - cq_node_t *head; - cq_node_t *tail; -} cq_queue_t; - - -// TODO: Add padding between the fields -typedef struct cq { - uint32_t id; - uint64_t queue_size; - uint64_t overflow_size; - uint64_t head; - uint64_t tail; - - uint8_t *slots_states; - ERL_NIF_TERM *slots_terms; - ErlNifEnv **slots_envs; - - cq_queue_t *push_queue; - cq_queue_t *pop_queue; - - uint8_t *overflow_states; - ERL_NIF_TERM *overflow_terms; - ErlNifEnv **overflow_envs; - -} cq_t; - -cq_t **QUEUES = NULL; /* Initialized on nif load */ - - -ERL_NIF_TERM mk_atom(ErlNifEnv* env, const char* atom); -ERL_NIF_TERM mk_error(ErlNifEnv* env, const char* msg); -int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info); -void free_resource(ErlNifEnv*, void*); - - -cq_queue_t* new_queue(void); -void enqueue(cq_queue_t *q, ErlNifPid *pid); diff --git a/c_src/cq1/rebar.config b/c_src/cq1/rebar.config deleted file mode 100644 index 6fd2f2c..0000000 --- a/c_src/cq1/rebar.config +++ /dev/null @@ -1,26 +0,0 @@ -{port_specs, [ - {"../../priv/cq1.so", [ - "*.c", - "*.cc" - ]} -]}. - -%% {port_env, [ -%% {"(linux|solaris|freebsd|netbsd|openbsd|dragonfly|darwin|gnu)", -%% "CFLAGS", "$CFLAGS -Ic_src/ -g -Wall -flto -Werror -O3"}, -%% {"(linux|solaris|freebsd|netbsd|openbsd|dragonfly|darwin|gnu)", -%% "CXXFLAGS", "$CXXFLAGS -Ic_src/ -g -Wall -flto -Werror -O3"}, -%% -%% {"(linux|solaris|freebsd|netbsd|openbsd|dragonfly|darwin|gnu)", -%% "LDFLAGS", "$LDFLAGS -flto -lstdc++"}, -%% -%% %% OS X Leopard flags for 64-bit -%% {"darwin9.*-64$", "CXXFLAGS", "-m64"}, -%% {"darwin9.*-64$", "LDFLAGS", "-arch x86_64"}, -%% -%% %% OS X Snow Leopard flags for 32-bit -%% {"darwin10.*-32$", "CXXFLAGS", "-m32"}, -%% {"darwin10.*-32$", "LDFLAGS", "-arch i386"}, -%% -%% {"win32", "CXXFLAGS", "$CXXFLAGS /O2 /DNDEBUG"} -%% ]}. diff --git a/c_src/cq2/cq_nif.c b/c_src/cq2/cq_nif.c deleted file mode 100644 index 2f26a20..0000000 --- a/c_src/cq2/cq_nif.c +++ /dev/null @@ -1,564 +0,0 @@ -#include -#include - -#include "erl_nif.h" -#include "cq_nif.h" - - -/* #ifndef ERL_NIF_DIRTY_SCHEDULER_SUPPORT -# error Requires dirty schedulers -#endif */ - - - - - -ERL_NIF_TERM -mk_atom(ErlNifEnv* env, const char* atom) -{ - ERL_NIF_TERM ret; - - if(!enif_make_existing_atom(env, atom, &ret, ERL_NIF_LATIN1)) - return enif_make_atom(env, atom); - - return ret; -} - -ERL_NIF_TERM -mk_error(ErlNifEnv* env, const char* mesg) -{ - return enif_make_tuple2(env, mk_atom(env, "error"), mk_atom(env, mesg)); -} - - -static ERL_NIF_TERM -queue_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) -{ - cq_t *q = enif_alloc_resource(CQ_RESOURCE, sizeof(cq_t)); - if (q == NULL) - return mk_error(env, "priv_alloc_error"); - - ERL_NIF_TERM ret = enif_make_resource(env, q); - /* enif_release_resource(ret); */ - - uint32_t queue_id = 0; - uint32_t queue_size = 0; - uint32_t overflow_size = 0; - - if (!enif_get_uint(env, argv[0], &queue_id) || - !enif_get_uint(env, argv[1], &queue_size) || - !enif_get_uint(env, argv[2], &overflow_size)) - return mk_error(env, "badarg"); - - if (queue_id > 8) - return mk_error(env, "bad_queue_id"); - - /* TODO: Check that queue_size is power of 2 */ - - if (QUEUES[queue_id] != NULL) - return mk_error(env, "queue_id_already_exists"); - - q->id = queue_id; - q->queue_size = queue_size; - q->overflow_size = overflow_size; - q->tail = 0; - q->head = 0; - q->slots_states = calloc(q->queue_size, CACHE_LINE_SIZE); - q->slots_terms = calloc(q->queue_size, CACHE_LINE_SIZE); - q->slots_envs = calloc(q->queue_size, CACHE_LINE_SIZE); - q->overflow_terms = calloc(q->overflow_size, CACHE_LINE_SIZE); - q->overflow_envs = calloc(q->queue_size, CACHE_LINE_SIZE); - - q->push_queue = new_queue(); - q->pop_queue = new_queue(); - - /* TODO: Check calloc return */ - - - for (int i = 0; i < q->queue_size; i++) { - ErlNifEnv *slot_env = enif_alloc_env(); - - q->slots_envs[i*CACHE_LINE_SIZE] = slot_env; - //q->overflow_envs[i*CACHE_LINE_SIZE] = (ErlNifEnv *) enif_alloc_env(); - } - - QUEUES[q->id] = q; - - return enif_make_tuple2(env, mk_atom(env, "ok"), ret); -} - - -static ERL_NIF_TERM -queue_free(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) -{ - uint32_t queue_id = 0; - - if (!enif_get_uint(env, argv[0], &queue_id)) - return mk_error(env, "badarg"); - - if (queue_id > 8) - return mk_error(env, "badarg"); - - cq_t *q = QUEUES[queue_id]; - if (q == NULL) - return mk_error(env, "bad_queue_id"); - - - /* TODO: Free all the things! */ - QUEUES[queue_id] = NULL; - - return enif_make_atom(env, "ok"); - -} - -/* Push to the head of the queue. */ -static ERL_NIF_TERM -queue_push(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) -{ - uint32_t queue_id = 0; - - if (!enif_get_uint(env, argv[0], &queue_id)) - return mk_error(env, "badarg"); - - if (queue_id > 8) - return mk_error(env, "badarg"); - - /* Load the queue */ - cq_t *q = QUEUES[queue_id]; - if (q == NULL) - return mk_error(env, "bad_queue_id"); - - if (q->id != queue_id) - return mk_error(env, "not_identical_queue_id"); - - - for (int i = 0; i < q->queue_size; i++) { - fprintf(stderr, "queue slot %d, index %d, state %d\n", - i, i*CACHE_LINE_SIZE, q->slots_states[i*CACHE_LINE_SIZE]); - } - - /* If there's consumers waiting, the queue must be empty and we - should directly pick a consumer to notify. */ - - ErlNifPid *waiting_consumer; - int dequeue_ret = dequeue(q->pop_queue, &waiting_consumer); - if (dequeue_ret) { - ErlNifEnv *msg_env = enif_alloc_env(); - ERL_NIF_TERM copy = enif_make_copy(msg_env, argv[1]); - ERL_NIF_TERM tuple = enif_make_tuple2(msg_env, mk_atom(env, "pop"), copy); - - if (enif_send(env, waiting_consumer, msg_env, tuple)) { - enif_free_env(msg_env); - return mk_atom(env, "ok"); - } else { - return mk_error(env, "notify_failed"); - } - } - - - - /* Increment head and attempt to claim the slot by marking it as - busy. This ensures no other thread will attempt to modify this - slot. If we cannot lock it, another thread must have */ - - uint64_t head = __sync_add_and_fetch(&q->head, 1); - size_t size = q->queue_size; - - while (1) { - uint64_t index = SLOT_INDEX(head, size); - uint64_t ret = __sync_val_compare_and_swap(&q->slots_states[index], - STATE_EMPTY, - STATE_WRITE); - - switch (ret) { - - case STATE_EMPTY: - head = __sync_add_and_fetch(&q->head, 1); - - case STATE_WRITE: - /* We acquired the write lock, go ahead with the write. */ - break; - - case STATE_FULL: - /* We have caught up with the tail and the buffer is - full. Block the producer until a consumer reads the - item. */ - return mk_error(env, "full_not_implemented"); - } - } - - /* If head catches up with tail, the queue is full. Add to - overflow instead */ - - - /* Copy term to slot-specific temporary process env. */ - ERL_NIF_TERM copy = enif_make_copy(q->slots_envs[SLOT_INDEX(head, size)], argv[1]); - q->slots_terms[SLOT_INDEX(head, size)] = copy; - - __sync_synchronize(); /* Or compiler memory barrier? */ - - - /* TODO: Do we need to collect garbage? */ - - - /* Mark the slot ready to be consumed */ - if (__sync_bool_compare_and_swap(&q->slots_states[SLOT_INDEX(head, size)], - STATE_WRITE, - STATE_FULL)) { - return mk_atom(env, "ok"); - } else { - return mk_error(env, "could_not_update_slots_after_insert"); - } - -} - - - -static ERL_NIF_TERM -queue_async_pop(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) -{ - /* Load queue */ - - uint32_t queue_id = 0; - - if (!enif_get_uint(env, argv[0], &queue_id)) - return mk_error(env, "badarg"); - - if (queue_id > 8) - return mk_error(env, "badarg"); - - cq_t *q = QUEUES[queue_id]; - if (q == NULL) - return mk_error(env, "bad_queue_id"); - - if (q->id != queue_id) - return mk_error(env, "not_identical_queue_id"); - - uint64_t qsize = q->queue_size; - uint64_t tail = q->tail; - uint64_t num_busy = 0; - - /* Walk the buffer starting the tail position until we are either - able to consume a term or find an empty slot. */ - while (1) { - uint64_t index = SLOT_INDEX(tail, qsize); - uint64_t ret = __sync_val_compare_and_swap(&q->slots_states[index], - STATE_FULL, - STATE_READ); - - if (ret == STATE_READ) { - /* We were able to mark the term as read in progress. We - now have an exclusive lock. */ - break; - - } else if (ret == STATE_WRITE) { - /* We found an item with a write in progress. If that - thread progresses, it will eventually mark the slot as - full. We can spin until that happens. - - This can take an arbitrary amount of time and multiple - reading threads will compete for the same slot. - - Instead we add the caller to the queue of blocking - consumers. When the next producer comes it will "help" - this thread by calling enif_send on the current - in-progress term *and* handle it's own terms. If - there's no new push to the queue, this will block - forever. */ - return mk_atom(env, "write_in_progress_not_implemented"); - - } else if (ret == STATE_EMPTY) { - /* We found an empty item. Queue must be empty. Add - calling Erlang consumer process to queue of waiting - processes. When the next producer comes along, it first - checks the waiting consumers and calls enif_send - instead of writing to the slots. */ - - ErlNifPid *pid = enif_alloc(sizeof(ErlNifPid)); - pid = enif_self(env, pid); - enqueue(q->pop_queue, pid); - - return mk_atom(env, "wait_for_msg"); - - } else { - tail = __sync_add_and_fetch(&q->tail, 1); - } - } - - - /* Copy term into calling process env. The NIF env can now be - gargbage collected. */ - ERL_NIF_TERM copy = enif_make_copy(env, q->slots_terms[SLOT_INDEX(tail, qsize)]); - - - /* Mark the slot as free. Note: We don't increment the tail - position here, as another thread also walking the buffer might - have incremented it multiple times */ - q->slots_terms[SLOT_INDEX(tail, qsize)] = 0; - if (__sync_bool_compare_and_swap(&q->slots_states[SLOT_INDEX(tail, qsize)], - STATE_READ, - STATE_EMPTY)) { - return enif_make_tuple2(env, mk_atom(env, "ok"), copy); - } else { - return mk_error(env, "could_not_update_slots_after_pop"); - } -} - - -static ERL_NIF_TERM -queue_debug(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) -{ - - uint32_t queue_id = 0; - - if (!enif_get_uint(env, argv[0], &queue_id)) - return mk_error(env, "badarg"); - - if (queue_id > 8) - return mk_error(env, "badarg"); - - cq_t *q = QUEUES[queue_id]; - if (q == NULL) - return mk_error(env, "bad_queue_id"); - - - - ERL_NIF_TERM *slots_states = enif_alloc(sizeof(ERL_NIF_TERM) * q->queue_size); - ERL_NIF_TERM *slots_terms = enif_alloc(sizeof(ERL_NIF_TERM) * q->queue_size); - for (int i = 0; i < q->queue_size; i++) { - slots_states[i] = enif_make_int(env, q->slots_states[i * CACHE_LINE_SIZE]); - - if (q->slots_terms[i * CACHE_LINE_SIZE] == 0) { - slots_terms[i] = mk_atom(env, "null"); - } else { - slots_terms[i] = enif_make_copy(env, q->slots_terms[i * CACHE_LINE_SIZE]); - } - } - return enif_make_tuple4(env, - enif_make_uint64(env, q->tail), - enif_make_uint64(env, q->head), - enif_make_list_from_array(env, slots_states, q->queue_size), - enif_make_list_from_array(env, slots_terms, q->queue_size)); -} - -static ERL_NIF_TERM -queue_debug_poppers(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) -{ - uint32_t queue_id = 0; - - if (!enif_get_uint(env, argv[0], &queue_id)) - return mk_error(env, "badarg"); - - if (queue_id > 8) - return mk_error(env, "badarg"); - - cq_t *q = QUEUES[queue_id]; - if (q == NULL) - return mk_error(env, "bad_queue_id"); - - - uint64_t pop_queue_size = 0; - cq_node_t *node = q->pop_queue->head; - if (node->value == NULL) { - node = node->next; - node = Q_PTR(node); - } - - while (node != NULL) { - pop_queue_size++; - node = node->next; - node = Q_PTR(node); - } - - ERL_NIF_TERM *pop_queue_pids = enif_alloc(sizeof(ERL_NIF_TERM) * pop_queue_size); - - node = q->pop_queue->head; - node = Q_PTR(node); - if (node->value == NULL) { - node = node->next; - node = Q_PTR(node); - } - - uint64_t i = 0; - while (node != NULL) { - if (node->value == 0) { - pop_queue_pids[i] = mk_atom(env, "null"); - } - else { - pop_queue_pids[i] = enif_make_pid(env, node->value); - } - - i++; - node = node->next; - node = Q_PTR(node); - } - - ERL_NIF_TERM list = enif_make_list_from_array(env, pop_queue_pids, pop_queue_size); - enif_free(pop_queue_pids); - - return list; -} - - - -static ERL_NIF_TERM -print_bits(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) -{ - - uint64_t *p1 = malloc(8); - *p1 = 0; - - - for (int bit = 63; bit >= 0; bit--) { - uint64_t power = 1 << bit; - //uint64_t byte = *p1; - uint64_t byte = p1; - fprintf(stderr, "%d", (byte & power) >> bit); - } - fprintf(stderr, "\n"); - - //enif_free(p1); - - return mk_atom(env, "ok"); -} - -void free_resource(ErlNifEnv* env, void* arg) -{ - //cq_t *cq = (cq_t *) arg; - - fprintf(stderr, "free_resource\n"); -} - - -cq_queue_t * new_queue() -{ - cq_queue_t *queue = enif_alloc(sizeof(cq_queue_t)); - cq_node_t *node = enif_alloc(sizeof(cq_node_t)); - node->next = NULL; - //node->env = NULL; - node->value = NULL; - queue->head = node; - queue->tail = node; - - return queue; -} - - - -void enqueue(cq_queue_t *queue, ErlNifPid *pid) -{ - cq_node_t *node = enif_alloc(sizeof(cq_node_t)); - //node->env = enif_alloc_env(); - //node->term = enif_make_copy(node->env, term); - node->value = pid; - node->next = NULL; - fprintf(stderr, "node %lu\n", node); - - cq_node_t *tail = NULL; - uint64_t tail_count = 0; - while (1) { - tail = queue->tail; - cq_node_t *tail_ptr = Q_PTR(tail); - tail_count = Q_COUNT(tail); - - cq_node_t *next = tail->next; - cq_node_t *next_ptr = Q_PTR(next); - uint64_t next_count = Q_COUNT(next); - - if (tail == queue->tail) { - fprintf(stderr, "tail == queue->tail\n"); - if (next_ptr == NULL) { - fprintf(stderr, "next_ptr == NULL\n"); - if (__sync_bool_compare_and_swap(&tail_ptr->next, - next, - Q_SET_COUNT(node, next_count+1))) - fprintf(stderr, "CAS(tail_ptr->next, next, (node, next_count+1)) -> true\n"); - break; - } else { - __sync_bool_compare_and_swap(&queue->tail, - tail, - Q_SET_COUNT(next_ptr, next_count+1)); - fprintf(stderr, "CAS(queue->tail, tail, (next_ptr, next_count+1))\n"); - } - } - } - - cq_node_t *node_with_count = Q_SET_COUNT(node, tail_count+1); - int ret = __sync_bool_compare_and_swap(&queue->tail, - tail, - node_with_count); - fprintf(stderr, "CAS(queue->tail, tail, %lu) -> %d\n", node_with_count, ret); -} - - -int dequeue(cq_queue_t *queue, ErlNifPid **pid) -{ - fprintf(stderr, "dequeue\n"); - cq_node_t *head, *head_ptr, *tail, *tail_ptr, *next, *next_ptr; - - while (1) { - head = queue->head; - head_ptr = Q_PTR(head); - tail = queue->tail; - tail_ptr = Q_PTR(tail); - next = head->next; - next_ptr = Q_PTR(next); - fprintf(stderr, "head %lu, tail %lu, next %lu\n", head, tail, next); - - if (head == queue->head) { - if (head_ptr == tail_ptr) { - if (next_ptr == NULL) { - return 0; /* Queue is empty */ - } - fprintf(stderr, "CAS(queue->tail, tail, (next_ptr, tail+1))\n"); - __sync_bool_compare_and_swap(&queue->tail, - tail, - Q_SET_COUNT(next_ptr, Q_COUNT(tail)+1)); - } else { - fprintf(stderr, "next->value %lu\n", next_ptr->value); - *pid = next_ptr->value; - fprintf(stderr, "CAS(queue->head, head, (next_ptr, head+1))\n"); - if (__sync_bool_compare_and_swap(&queue->head, - head, - Q_SET_COUNT(next_ptr, Q_COUNT(head)+1))) - break; - } - } - } - // free pid - //enif_free(Q_PTR(head)); - return 1; -} - - - - -int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info) { - /* Initialize global array mapping id to cq_t ptr */ - QUEUES = (cq_t **) calloc(8, sizeof(cq_t **)); - if (QUEUES == NULL) - return -1; - - - ErlNifResourceFlags flags = (ErlNifResourceFlags)(ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER); - CQ_RESOURCE = enif_open_resource_type(env, "cq", "cq", - &free_resource, flags, NULL); - - if (CQ_RESOURCE == NULL) - return -1; - - return 0; -} - - -static ErlNifFunc nif_funcs[] = { - {"new" , 3, queue_new}, - {"free" , 1, queue_free}, - {"push" , 2, queue_push}, - {"async_pop", 1, queue_async_pop}, - {"debug" , 1, queue_debug}, - {"debug_poppers", 1, queue_debug_poppers}, - {"print_bits", 0, print_bits} -}; - -ERL_NIF_INIT(cq, nif_funcs, load, NULL, NULL, NULL); diff --git a/c_src/cq2/cq_nif.h b/c_src/cq2/cq_nif.h deleted file mode 100644 index 75f8891..0000000 --- a/c_src/cq2/cq_nif.h +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include "erl_nif.h" - - -#define CACHE_LINE_SIZE 64 - -#define SLOT_INDEX(__index, __size) __index & (__size - 1) - -#define Q_MASK 3L -#define Q_PTR(__ptr) (cq_node_t *) (((uint64_t)__ptr) & (~Q_MASK)) -#define Q_COUNT(__ptr) ((uint64_t) __ptr & Q_MASK) -#define Q_SET_COUNT(__ptr, __val) (cq_node_t *) ((uint64_t) __ptr | (__val & Q_MASK)) - - -#define STATE_EMPTY 0 -#define STATE_WRITE 1 -#define STATE_READ 2 -#define STATE_FULL 3 - - -ErlNifResourceType* CQ_RESOURCE; - -typedef struct cq_node cq_node_t; - -struct cq_node { - ErlNifEnv *env; - //ERL_NIF_TERM term; - ErlNifPid *value; - cq_node_t *next; -}; - - - -typedef struct cq_queue { - cq_node_t *head; - cq_node_t *tail; -} cq_queue_t; - - -// TODO: Add padding between the fields -typedef struct cq { - uint32_t id; - uint64_t queue_size; - uint64_t overflow_size; - uint64_t head; - uint64_t tail; - - uint8_t *slots_states; - ERL_NIF_TERM *slots_terms; - ErlNifEnv **slots_envs; - - cq_queue_t *push_queue; - cq_queue_t *pop_queue; - - uint8_t *overflow_states; - ERL_NIF_TERM *overflow_terms; - ErlNifEnv **overflow_envs; - -} cq_t; - -cq_t **QUEUES = NULL; /* Initialized on nif load */ - - -ERL_NIF_TERM mk_atom(ErlNifEnv* env, const char* atom); -ERL_NIF_TERM mk_error(ErlNifEnv* env, const char* msg); -int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info); -void free_resource(ErlNifEnv*, void*); - - -cq_queue_t* new_queue(void); -void enqueue(cq_queue_t *q, ErlNifPid *pid); diff --git a/c_src/enlfq/Makefile b/c_src/enlfq/Makefile new file mode 100644 index 0000000..d85d904 --- /dev/null +++ b/c_src/enlfq/Makefile @@ -0,0 +1,80 @@ + +PROJECT = enlfq +CXXFLAGS = -std=c++11 -O2 -Wextra -Werror -Wno-missing-field-initializers -fno-rtti -fno-exceptions +LDLIBS = -lstdc++ + + +# Based on c_src.mk from erlang.mk by Loic Hoguin + +CURDIR := $(shell pwd) +BASEDIR := $(abspath $(CURDIR)/..) + +PROJECT ?= $(notdir $(BASEDIR)) +PROJECT := $(strip $(PROJECT)) + +ERTS_INCLUDE_DIR ?= $(shell erl -noshell -s init stop -eval "io:format(\"~ts/erts-~ts/include/\", [code:root_dir(), erlang:system_info(version)]).") +ERL_INTERFACE_INCLUDE_DIR ?= $(shell erl -noshell -s init stop -eval "io:format(\"~ts\", [code:lib_dir(erl_interface, include)]).") +ERL_INTERFACE_LIB_DIR ?= $(shell erl -noshell -s init stop -eval "io:format(\"~ts\", [code:lib_dir(erl_interface, lib)]).") + +C_SRC_DIR = $(CURDIR) +C_SRC_OUTPUT ?= $(CURDIR)/../priv/$(PROJECT).so + +# System type and C compiler/flags. + +UNAME_SYS := $(shell uname -s) +ifeq ($(UNAME_SYS), Darwin) + CC ?= cc + CFLAGS ?= -O3 -std=c99 -arch x86_64 -finline-functions -Wall -Wmissing-prototypes + CXXFLAGS ?= -O3 -arch x86_64 -finline-functions -Wall + LDFLAGS ?= -arch x86_64 -flat_namespace -undefined suppress +else ifeq ($(UNAME_SYS), FreeBSD) + CC ?= cc + CFLAGS ?= -O3 -std=c99 -finline-functions -Wall -Wmissing-prototypes + CXXFLAGS ?= -O3 -finline-functions -Wall +else ifeq ($(UNAME_SYS), Linux) + CC ?= gcc + CFLAGS ?= -O3 -std=c99 -finline-functions -Wall -Wmissing-prototypes + CXXFLAGS ?= -O3 -finline-functions -Wall +endif + +CFLAGS += -fPIC -I $(ERTS_INCLUDE_DIR) -I $(ERL_INTERFACE_INCLUDE_DIR) +CXXFLAGS += -fPIC -I $(ERTS_INCLUDE_DIR) -I $(ERL_INTERFACE_INCLUDE_DIR) + +LDLIBS += -L $(ERL_INTERFACE_LIB_DIR) -lerl_interface -lei +LDFLAGS += -shared + +# Verbosity. + +c_verbose_0 = @echo " C " $(?F); +c_verbose = $(c_verbose_$(V)) + +cpp_verbose_0 = @echo " CPP " $(?F); +cpp_verbose = $(cpp_verbose_$(V)) + +link_verbose_0 = @echo " LD " $(@F); +link_verbose = $(link_verbose_$(V)) + +SOURCES := $(shell find $(C_SRC_DIR) -type f \( -name "*.c" -o -name "*.C" -o -name "*.cc" -o -name "*.cpp" \)) +OBJECTS = $(addsuffix .o, $(basename $(SOURCES))) + +COMPILE_C = $(c_verbose) $(CC) $(CFLAGS) $(CPPFLAGS) -c +COMPILE_CPP = $(cpp_verbose) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c + +$(C_SRC_OUTPUT): $(OBJECTS) + @mkdir -p $(BASEDIR)/priv/ + $(link_verbose) $(CC) $(OBJECTS) $(LDFLAGS) $(LDLIBS) -o $(C_SRC_OUTPUT) + +%.o: %.c + $(COMPILE_C) $(OUTPUT_OPTION) $< + +%.o: %.cc + $(COMPILE_CPP) $(OUTPUT_OPTION) $< + +%.o: %.C + $(COMPILE_CPP) $(OUTPUT_OPTION) $< + +%.o: %.cpp + $(COMPILE_CPP) $(OUTPUT_OPTION) $< + +clean: + @rm -f $(C_SRC_OUTPUT) $(OBJECTS) diff --git a/c_src/enlfq/concurrentqueue.h b/c_src/enlfq/concurrentqueue.h new file mode 100644 index 0000000..68f66df --- /dev/null +++ b/c_src/enlfq/concurrentqueue.h @@ -0,0 +1,3637 @@ +// Provides a C++11 implementation of a multi-producer, multi-consumer lock-free queue. +// An overview, including benchmark results, is provided here: +// http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++ +// The full design is also described in excruciating detail at: +// http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue + +// Simplified BSD license: +// Copyright (c) 2013-2016, Cameron Desrochers. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials +// provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +// OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR +// TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +#pragma once + +#if defined(__GNUC__) +// Disable -Wconversion warnings (spuriously triggered when Traits::size_t and +// Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings +// upon assigning any computed values) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" + +#ifdef MCDBGQ_USE_RELACY +#pragma GCC diagnostic ignored "-Wint-to-pointer-cast" +#endif +#endif + +#if defined(__APPLE__) +#include "TargetConditionals.h" +#endif + +#ifdef MCDBGQ_USE_RELACY +#include "relacy/relacy_std.hpp" +#include "relacy_shims.h" +// We only use malloc/free anyway, and the delete macro messes up `= delete` method declarations. +// We'll override the default trait malloc ourselves without a macro. +#undef new +#undef delete +#undef malloc +#undef free +#else +#include // Requires C++11. Sorry VS2010. +#include +#endif +#include // for max_align_t +#include +#include +#include +#include +#include +#include +#include // for CHAR_BIT +#include +#include // partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading + +// Platform-specific definitions of a numeric thread ID type and an invalid value +namespace moodycamel { namespace details { + template struct thread_id_converter { + typedef thread_id_t thread_id_numeric_size_t; + typedef thread_id_t thread_id_hash_t; + static thread_id_hash_t prehash(thread_id_t const& x) { return x; } + }; +} } +#if defined(MCDBGQ_USE_RELACY) +namespace moodycamel { namespace details { + typedef std::uint32_t thread_id_t; + static const thread_id_t invalid_thread_id = 0xFFFFFFFFU; + static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU; + static inline thread_id_t thread_id() { return rl::thread_index(); } +} } +#elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__) +// No sense pulling in windows.h in a header, we'll manually declare the function +// we use and rely on backwards-compatibility for this not to break +extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void); +namespace moodycamel { namespace details { + static_assert(sizeof(unsigned long) == sizeof(std::uint32_t), "Expected size of unsigned long to be 32 bits on Windows"); + typedef std::uint32_t thread_id_t; + static const thread_id_t invalid_thread_id = 0; // See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx + static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU; // Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4. + static inline thread_id_t thread_id() { return static_cast(::GetCurrentThreadId()); } +} } +#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE) +namespace moodycamel { namespace details { + static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes"); + + typedef std::thread::id thread_id_t; + static const thread_id_t invalid_thread_id; // Default ctor creates invalid ID + + // Note we don't define a invalid_thread_id2 since std::thread::id doesn't have one; it's + // only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined anyway, which it won't + // be. + static inline thread_id_t thread_id() { return std::this_thread::get_id(); } + + template struct thread_id_size { }; + template<> struct thread_id_size<4> { typedef std::uint32_t numeric_t; }; + template<> struct thread_id_size<8> { typedef std::uint64_t numeric_t; }; + + template<> struct thread_id_converter { + typedef thread_id_size::numeric_t thread_id_numeric_size_t; +#ifndef __APPLE__ + typedef std::size_t thread_id_hash_t; +#else + typedef thread_id_numeric_size_t thread_id_hash_t; +#endif + + static thread_id_hash_t prehash(thread_id_t const& x) + { +#ifndef __APPLE__ + return std::hash()(x); +#else + return *reinterpret_cast(&x); +#endif + } + }; +} } +#else +// Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475 +// In order to get a numeric thread ID in a platform-independent way, we use a thread-local +// static variable's address as a thread identifier :-) +#if defined(__GNUC__) || defined(__INTEL_COMPILER) +#define MOODYCAMEL_THREADLOCAL __thread +#elif defined(_MSC_VER) +#define MOODYCAMEL_THREADLOCAL __declspec(thread) +#else +// Assume C++11 compliant compiler +#define MOODYCAMEL_THREADLOCAL thread_local +#endif +namespace moodycamel { namespace details { + typedef std::uintptr_t thread_id_t; + static const thread_id_t invalid_thread_id = 0; // Address can't be nullptr + static const thread_id_t invalid_thread_id2 = 1; // Member accesses off a null pointer are also generally invalid. Plus it's not aligned. + static inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast(&x); } +} } +#endif + +// Exceptions +#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED +#if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__)) +#define MOODYCAMEL_EXCEPTIONS_ENABLED +#endif +#endif +#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED +#define MOODYCAMEL_TRY try +#define MOODYCAMEL_CATCH(...) catch(__VA_ARGS__) +#define MOODYCAMEL_RETHROW throw +#define MOODYCAMEL_THROW(expr) throw (expr) +#else +#define MOODYCAMEL_TRY if (true) +#define MOODYCAMEL_CATCH(...) else if (false) +#define MOODYCAMEL_RETHROW +#define MOODYCAMEL_THROW(expr) +#endif + +#ifndef MOODYCAMEL_NOEXCEPT +#if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED) +#define MOODYCAMEL_NOEXCEPT +#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true +#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true +#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800 +// VS2012's std::is_nothrow_[move_]constructible is broken and returns true when it shouldn't :-( +// We have to assume *all* non-trivial constructors may throw on VS2012! +#define MOODYCAMEL_NOEXCEPT _NOEXCEPT +#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference::value && std::is_move_constructible::value ? std::is_trivially_move_constructible::value : std::is_trivially_copy_constructible::value) +#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference::value && std::is_move_assignable::value ? std::is_trivially_move_assignable::value || std::is_nothrow_move_assignable::value : std::is_trivially_copy_assignable::value || std::is_nothrow_copy_assignable::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)) +#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900 +#define MOODYCAMEL_NOEXCEPT _NOEXCEPT +#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference::value && std::is_move_constructible::value ? std::is_trivially_move_constructible::value || std::is_nothrow_move_constructible::value : std::is_trivially_copy_constructible::value || std::is_nothrow_copy_constructible::value) +#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference::value && std::is_move_assignable::value ? std::is_trivially_move_assignable::value || std::is_nothrow_move_assignable::value : std::is_trivially_copy_assignable::value || std::is_nothrow_copy_assignable::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)) +#else +#define MOODYCAMEL_NOEXCEPT noexcept +#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr) +#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr) +#endif +#endif + +#ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED +#ifdef MCDBGQ_USE_RELACY +#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED +#else +// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445 +// g++ <=4.7 doesn't support thread_local either. +// Finally, iOS/ARM doesn't have support for it either, and g++/ARM allows it to compile but it's unconfirmed to actually work +#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) +// Assume `thread_local` is fully supported in all other C++11 compilers/platforms +//#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED // always disabled for now since several users report having problems with it on +#endif +#endif +#endif + +// VS2012 doesn't support deleted functions. +// In this case, we declare the function normally but don't define it. A link error will be generated if the function is called. +#ifndef MOODYCAMEL_DELETE_FUNCTION +#if defined(_MSC_VER) && _MSC_VER < 1800 +#define MOODYCAMEL_DELETE_FUNCTION +#else +#define MOODYCAMEL_DELETE_FUNCTION = delete +#endif +#endif + +// Compiler-specific likely/unlikely hints +namespace moodycamel { namespace details { +#if defined(__GNUC__) + static inline bool (likely)(bool x) { return __builtin_expect((x), true); } + static inline bool (unlikely)(bool x) { return __builtin_expect((x), false); } +#else + static inline bool (likely)(bool x) { return x; } + static inline bool (unlikely)(bool x) { return x; } +#endif +} } + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG +#include "internal/concurrentqueue_internal_debug.h" +#endif + +namespace moodycamel { +namespace details { + template + struct const_numeric_max { + static_assert(std::is_integral::value, "const_numeric_max can only be used with integers"); + static const T value = std::numeric_limits::is_signed + ? (static_cast(1) << (sizeof(T) * CHAR_BIT - 1)) - static_cast(1) + : static_cast(-1); + }; + +#if defined(__GLIBCXX__) + typedef ::max_align_t std_max_align_t; // libstdc++ forgot to add it to std:: for a while +#else + typedef std::max_align_t std_max_align_t; // Others (e.g. MSVC) insist it can *only* be accessed via std:: +#endif + + // Some platforms have incorrectly set max_align_t to a type with <8 bytes alignment even while supporting + // 8-byte aligned scalar values (*cough* 32-bit iOS). Work around this with our own union. See issue #64. + typedef union { + std_max_align_t x; + long long y; + void* z; + } max_align_t; +} + +// Default traits for the ConcurrentQueue. To change some of the +// traits without re-implementing all of them, inherit from this +// struct and shadow the declarations you wish to be different; +// since the traits are used as a template type parameter, the +// shadowed declarations will be used where defined, and the defaults +// otherwise. +struct ConcurrentQueueDefaultTraits +{ + // General-purpose size type. std::size_t is strongly recommended. + typedef std::size_t size_t; + + // The type used for the enqueue and dequeue indices. Must be at least as + // large as size_t. Should be significantly larger than the number of elements + // you expect to hold at once, especially if you have a high turnover rate; + // for example, on 32-bit x86, if you expect to have over a hundred million + // elements or pump several million elements through your queue in a very + // short space of time, using a 32-bit type *may* trigger a race condition. + // A 64-bit int type is recommended in that case, and in practice will + // prevent a race condition no matter the usage of the queue. Note that + // whether the queue is lock-free with a 64-int type depends on the whether + // std::atomic is lock-free, which is platform-specific. + typedef std::size_t index_t; + + // Internally, all elements are enqueued and dequeued from multi-element + // blocks; this is the smallest controllable unit. If you expect few elements + // but many producers, a smaller block size should be favoured. For few producers + // and/or many elements, a larger block size is preferred. A sane default + // is provided. Must be a power of 2. + static const size_t BLOCK_SIZE = 32; + + // For explicit producers (i.e. when using a producer token), the block is + // checked for being empty by iterating through a list of flags, one per element. + // For large block sizes, this is too inefficient, and switching to an atomic + // counter-based approach is faster. The switch is made for block sizes strictly + // larger than this threshold. + static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32; + + // How many full blocks can be expected for a single explicit producer? This should + // reflect that number's maximum for optimal performance. Must be a power of 2. + static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32; + + // How many full blocks can be expected for a single implicit producer? This should + // reflect that number's maximum for optimal performance. Must be a power of 2. + static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32; + + // The initial size of the hash table mapping thread IDs to implicit producers. + // Note that the hash is resized every time it becomes half full. + // Must be a power of two, and either 0 or at least 1. If 0, implicit production + // (using the enqueue methods without an explicit producer token) is disabled. + static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32; + + // Controls the number of items that an explicit consumer (i.e. one with a token) + // must consume before it causes all consumers to rotate and move on to the next + // internal queue. + static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256; + + // The maximum number of elements (inclusive) that can be enqueued to a sub-queue. + // Enqueue operations that would cause this limit to be surpassed will fail. Note + // that this limit is enforced at the block level (for performance reasons), i.e. + // it's rounded up to the nearest block size. + static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max::value; + + +#ifndef MCDBGQ_USE_RELACY + // Memory allocation can be customized if needed. + // malloc should return nullptr on failure, and handle alignment like std::malloc. +#if defined(malloc) || defined(free) + // Gah, this is 2015, stop defining macros that break standard code already! + // Work around malloc/free being special macros: + static inline void* WORKAROUND_malloc(size_t size) { return malloc(size); } + static inline void WORKAROUND_free(void* ptr) { return free(ptr); } + static inline void* (malloc)(size_t size) { return WORKAROUND_malloc(size); } + static inline void (free)(void* ptr) { return WORKAROUND_free(ptr); } +#else + static inline void* malloc(size_t size) { return std::malloc(size); } + static inline void free(void* ptr) { return std::free(ptr); } +#endif +#else + // Debug versions when running under the Relacy race detector (ignore + // these in user code) + static inline void* malloc(size_t size) { return rl::rl_malloc(size, $); } + static inline void free(void* ptr) { return rl::rl_free(ptr, $); } +#endif +}; + + +// When producing or consuming many elements, the most efficient way is to: +// 1) Use one of the bulk-operation methods of the queue with a token +// 2) Failing that, use the bulk-operation methods without a token +// 3) Failing that, create a token and use that with the single-item methods +// 4) Failing that, use the single-parameter methods of the queue +// Having said that, don't create tokens willy-nilly -- ideally there should be +// a maximum of one token per thread (of each kind). +struct ProducerToken; +struct ConsumerToken; + +template class ConcurrentQueue; +template class BlockingConcurrentQueue; +class ConcurrentQueueTests; + + +namespace details +{ + struct ConcurrentQueueProducerTypelessBase + { + ConcurrentQueueProducerTypelessBase* next; + std::atomic inactive; + ProducerToken* token; + + ConcurrentQueueProducerTypelessBase() + : next(nullptr), inactive(false), token(nullptr) + { + } + }; + + template struct _hash_32_or_64 { + static inline std::uint32_t hash(std::uint32_t h) + { + // MurmurHash3 finalizer -- see https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp + // Since the thread ID is already unique, all we really want to do is propagate that + // uniqueness evenly across all the bits, so that we can use a subset of the bits while + // reducing collisions significantly + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + return h ^ (h >> 16); + } + }; + template<> struct _hash_32_or_64<1> { + static inline std::uint64_t hash(std::uint64_t h) + { + h ^= h >> 33; + h *= 0xff51afd7ed558ccd; + h ^= h >> 33; + h *= 0xc4ceb9fe1a85ec53; + return h ^ (h >> 33); + } + }; + template struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> { }; + + static inline size_t hash_thread_id(thread_id_t id) + { + static_assert(sizeof(thread_id_t) <= 8, "Expected a platform where thread IDs are at most 64-bit values"); + return static_cast(hash_32_or_64::thread_id_hash_t)>::hash( + thread_id_converter::prehash(id))); + } + + template + static inline bool circular_less_than(T a, T b) + { +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4554) +#endif + static_assert(std::is_integral::value && !std::numeric_limits::is_signed, "circular_less_than is intended to be used only with unsigned integer types"); + return static_cast(a - b) > static_cast(static_cast(1) << static_cast(sizeof(T) * CHAR_BIT - 1)); +#ifdef _MSC_VER +#pragma warning(pop) +#endif + } + + template + static inline char* align_for(char* ptr) + { + const std::size_t alignment = std::alignment_of::value; + return ptr + (alignment - (reinterpret_cast(ptr) % alignment)) % alignment; + } + + template + static inline T ceil_to_pow_2(T x) + { + static_assert(std::is_integral::value && !std::numeric_limits::is_signed, "ceil_to_pow_2 is intended to be used only with unsigned integer types"); + + // Adapted from http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + --x; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + for (std::size_t i = 1; i < sizeof(T); i <<= 1) { + x |= x >> (i << 3); + } + ++x; + return x; + } + + template + static inline void swap_relaxed(std::atomic& left, std::atomic& right) + { + T temp = std::move(left.load(std::memory_order_relaxed)); + left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed); + right.store(std::move(temp), std::memory_order_relaxed); + } + + template + static inline T const& nomove(T const& x) + { + return x; + } + + template + struct nomove_if + { + template + static inline T const& eval(T const& x) + { + return x; + } + }; + + template<> + struct nomove_if + { + template + static inline auto eval(U&& x) + -> decltype(std::forward(x)) + { + return std::forward(x); + } + }; + + template + static inline auto deref_noexcept(It& it) MOODYCAMEL_NOEXCEPT -> decltype(*it) + { + return *it; + } + +#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) + template struct is_trivially_destructible : std::is_trivially_destructible { }; +#else + template struct is_trivially_destructible : std::has_trivial_destructor { }; +#endif + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED +#ifdef MCDBGQ_USE_RELACY + typedef RelacyThreadExitListener ThreadExitListener; + typedef RelacyThreadExitNotifier ThreadExitNotifier; +#else + struct ThreadExitListener + { + typedef void (*callback_t)(void*); + callback_t callback; + void* userData; + + ThreadExitListener* next; // reserved for use by the ThreadExitNotifier + }; + + + class ThreadExitNotifier + { + public: + static void subscribe(ThreadExitListener* listener) + { + auto& tlsInst = instance(); + listener->next = tlsInst.tail; + tlsInst.tail = listener; + } + + static void unsubscribe(ThreadExitListener* listener) + { + auto& tlsInst = instance(); + ThreadExitListener** prev = &tlsInst.tail; + for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) { + if (ptr == listener) { + *prev = ptr->next; + break; + } + prev = &ptr->next; + } + } + + private: + ThreadExitNotifier() : tail(nullptr) { } + ThreadExitNotifier(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION; + ThreadExitNotifier& operator=(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION; + + ~ThreadExitNotifier() + { + // This thread is about to exit, let everyone know! + assert(this == &instance() && "If this assert fails, you likely have a buggy compiler! Change the preprocessor conditions such that MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined."); + for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) { + ptr->callback(ptr->userData); + } + } + + // Thread-local + static inline ThreadExitNotifier& instance() + { + static thread_local ThreadExitNotifier notifier; + return notifier; + } + + private: + ThreadExitListener* tail; + }; +#endif +#endif + + template struct static_is_lock_free_num { enum { value = 0 }; }; + template<> struct static_is_lock_free_num { enum { value = ATOMIC_CHAR_LOCK_FREE }; }; + template<> struct static_is_lock_free_num { enum { value = ATOMIC_SHORT_LOCK_FREE }; }; + template<> struct static_is_lock_free_num { enum { value = ATOMIC_INT_LOCK_FREE }; }; + template<> struct static_is_lock_free_num { enum { value = ATOMIC_LONG_LOCK_FREE }; }; + template<> struct static_is_lock_free_num { enum { value = ATOMIC_LLONG_LOCK_FREE }; }; + template struct static_is_lock_free : static_is_lock_free_num::type> { }; + template<> struct static_is_lock_free { enum { value = ATOMIC_BOOL_LOCK_FREE }; }; + template struct static_is_lock_free { enum { value = ATOMIC_POINTER_LOCK_FREE }; }; +} + + +struct ProducerToken +{ + template + explicit ProducerToken(ConcurrentQueue& queue); + + template + explicit ProducerToken(BlockingConcurrentQueue& queue); + + ProducerToken(ProducerToken&& other) MOODYCAMEL_NOEXCEPT + : producer(other.producer) + { + other.producer = nullptr; + if (producer != nullptr) { + producer->token = this; + } + } + + inline ProducerToken& operator=(ProducerToken&& other) MOODYCAMEL_NOEXCEPT + { + swap(other); + return *this; + } + + void swap(ProducerToken& other) MOODYCAMEL_NOEXCEPT + { + std::swap(producer, other.producer); + if (producer != nullptr) { + producer->token = this; + } + if (other.producer != nullptr) { + other.producer->token = &other; + } + } + + // A token is always valid unless: + // 1) Memory allocation failed during construction + // 2) It was moved via the move constructor + // (Note: assignment does a swap, leaving both potentially valid) + // 3) The associated queue was destroyed + // Note that if valid() returns true, that only indicates + // that the token is valid for use with a specific queue, + // but not which one; that's up to the user to track. + inline bool valid() const { return producer != nullptr; } + + ~ProducerToken() + { + if (producer != nullptr) { + producer->token = nullptr; + producer->inactive.store(true, std::memory_order_release); + } + } + + // Disable copying and assignment + ProducerToken(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION; + ProducerToken& operator=(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION; + +private: + template friend class ConcurrentQueue; + friend class ConcurrentQueueTests; + +protected: + details::ConcurrentQueueProducerTypelessBase* producer; +}; + + +struct ConsumerToken +{ + template + explicit ConsumerToken(ConcurrentQueue& q); + + template + explicit ConsumerToken(BlockingConcurrentQueue& q); + + ConsumerToken(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT + : initialOffset(other.initialOffset), lastKnownGlobalOffset(other.lastKnownGlobalOffset), itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), currentProducer(other.currentProducer), desiredProducer(other.desiredProducer) + { + } + + inline ConsumerToken& operator=(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT + { + swap(other); + return *this; + } + + void swap(ConsumerToken& other) MOODYCAMEL_NOEXCEPT + { + std::swap(initialOffset, other.initialOffset); + std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset); + std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent); + std::swap(currentProducer, other.currentProducer); + std::swap(desiredProducer, other.desiredProducer); + } + + // Disable copying and assignment + ConsumerToken(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION; + ConsumerToken& operator=(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION; + +private: + template friend class ConcurrentQueue; + friend class ConcurrentQueueTests; + +private: // but shared with ConcurrentQueue + std::uint32_t initialOffset; + std::uint32_t lastKnownGlobalOffset; + std::uint32_t itemsConsumedFromCurrent; + details::ConcurrentQueueProducerTypelessBase* currentProducer; + details::ConcurrentQueueProducerTypelessBase* desiredProducer; +}; + +// Need to forward-declare this swap because it's in a namespace. +// See http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces +template +inline void swap(typename ConcurrentQueue::ImplicitProducerKVP& a, typename ConcurrentQueue::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT; + + +template +class ConcurrentQueue +{ +public: + typedef ::moodycamel::ProducerToken producer_token_t; + typedef ::moodycamel::ConsumerToken consumer_token_t; + + typedef typename Traits::index_t index_t; + typedef typename Traits::size_t size_t; + + static const size_t BLOCK_SIZE = static_cast(Traits::BLOCK_SIZE); + static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD); + static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast(Traits::EXPLICIT_INITIAL_INDEX_SIZE); + static const size_t IMPLICIT_INITIAL_INDEX_SIZE = static_cast(Traits::IMPLICIT_INITIAL_INDEX_SIZE); + static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = static_cast(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE); + static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = static_cast(Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE); +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4307) // + integral constant overflow (that's what the ternary expression is for!) +#pragma warning(disable: 4309) // static_cast: Truncation of constant value +#endif + static const size_t MAX_SUBQUEUE_SIZE = (details::const_numeric_max::value - static_cast(Traits::MAX_SUBQUEUE_SIZE) < BLOCK_SIZE) ? details::const_numeric_max::value : ((static_cast(Traits::MAX_SUBQUEUE_SIZE) + (BLOCK_SIZE - 1)) / BLOCK_SIZE * BLOCK_SIZE); +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + static_assert(!std::numeric_limits::is_signed && std::is_integral::value, "Traits::size_t must be an unsigned integral type"); + static_assert(!std::numeric_limits::is_signed && std::is_integral::value, "Traits::index_t must be an unsigned integral type"); + static_assert(sizeof(index_t) >= sizeof(size_t), "Traits::index_t must be at least as wide as Traits::size_t"); + static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)), "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)"); + static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) && !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD & (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)), "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a power of 2 (and greater than 1)"); + static_assert((EXPLICIT_INITIAL_INDEX_SIZE > 1) && !(EXPLICIT_INITIAL_INDEX_SIZE & (EXPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)"); + static_assert((IMPLICIT_INITIAL_INDEX_SIZE > 1) && !(IMPLICIT_INITIAL_INDEX_SIZE & (IMPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)"); + static_assert((INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) || !(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE & (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - 1)), "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2"); + static_assert(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0 || INITIAL_IMPLICIT_PRODUCER_HASH_SIZE >= 1, "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least 1 (or 0 to disable implicit enqueueing)"); + +public: + // Creates a queue with at least `capacity` element slots; note that the + // actual number of elements that can be inserted without additional memory + // allocation depends on the number of producers and the block size (e.g. if + // the block size is equal to `capacity`, only a single block will be allocated + // up-front, which means only a single producer will be able to enqueue elements + // without an extra allocation -- blocks aren't shared between producers). + // This method is not thread safe -- it is up to the user to ensure that the + // queue is fully constructed before it starts being used by other threads (this + // includes making the memory effects of construction visible, possibly with a + // memory barrier). + explicit ConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE) + : producerListTail(nullptr), + producerCount(0), + initialBlockPoolIndex(0), + nextExplicitConsumerId(0), + globalExplicitConsumerOffset(0) + { + implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); + populate_initial_implicit_producer_hash(); + populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1)); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + // Track all the producers using a fully-resolved typed list for + // each kind; this makes it possible to debug them starting from + // the root queue object (otherwise wacky casts are needed that + // don't compile in the debugger's expression evaluator). + explicitProducers.store(nullptr, std::memory_order_relaxed); + implicitProducers.store(nullptr, std::memory_order_relaxed); +#endif + } + + // Computes the correct amount of pre-allocated blocks for you based + // on the minimum number of elements you want available at any given + // time, and the maximum concurrent number of each type of producer. + ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers) + : producerListTail(nullptr), + producerCount(0), + initialBlockPoolIndex(0), + nextExplicitConsumerId(0), + globalExplicitConsumerOffset(0) + { + implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); + populate_initial_implicit_producer_hash(); + size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) + 2 * (maxExplicitProducers + maxImplicitProducers); + populate_initial_block_list(blocks); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + explicitProducers.store(nullptr, std::memory_order_relaxed); + implicitProducers.store(nullptr, std::memory_order_relaxed); +#endif + } + + // Note: The queue should not be accessed concurrently while it's + // being deleted. It's up to the user to synchronize this. + // This method is not thread safe. + ~ConcurrentQueue() + { + // Destroy producers + auto ptr = producerListTail.load(std::memory_order_relaxed); + while (ptr != nullptr) { + auto next = ptr->next_prod(); + if (ptr->token != nullptr) { + ptr->token->producer = nullptr; + } + destroy(ptr); + ptr = next; + } + + // Destroy implicit producer hash tables + if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) { + auto hash = implicitProducerHash.load(std::memory_order_relaxed); + while (hash != nullptr) { + auto prev = hash->prev; + if (prev != nullptr) { // The last hash is part of this object and was not allocated dynamically + for (size_t i = 0; i != hash->capacity; ++i) { + hash->entries[i].~ImplicitProducerKVP(); + } + hash->~ImplicitProducerHash(); + (Traits::free)(hash); + } + hash = prev; + } + } + + // Destroy global free list + auto block = freeList.head_unsafe(); + while (block != nullptr) { + auto next = block->freeListNext.load(std::memory_order_relaxed); + if (block->dynamicallyAllocated) { + destroy(block); + } + block = next; + } + + // Destroy initial free list + destroy_array(initialBlockPool, initialBlockPoolSize); + } + + // Disable copying and copy assignment + ConcurrentQueue(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; + ConcurrentQueue& operator=(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; + + // Moving is supported, but note that it is *not* a thread-safe operation. + // Nobody can use the queue while it's being moved, and the memory effects + // of that move must be propagated to other threads before they can use it. + // Note: When a queue is moved, its tokens are still valid but can only be + // used with the destination queue (i.e. semantically they are moved along + // with the queue itself). + ConcurrentQueue(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT + : producerListTail(other.producerListTail.load(std::memory_order_relaxed)), + producerCount(other.producerCount.load(std::memory_order_relaxed)), + initialBlockPoolIndex(other.initialBlockPoolIndex.load(std::memory_order_relaxed)), + initialBlockPool(other.initialBlockPool), + initialBlockPoolSize(other.initialBlockPoolSize), + freeList(std::move(other.freeList)), + nextExplicitConsumerId(other.nextExplicitConsumerId.load(std::memory_order_relaxed)), + globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load(std::memory_order_relaxed)) + { + // Move the other one into this, and leave the other one as an empty queue + implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); + populate_initial_implicit_producer_hash(); + swap_implicit_producer_hashes(other); + + other.producerListTail.store(nullptr, std::memory_order_relaxed); + other.producerCount.store(0, std::memory_order_relaxed); + other.nextExplicitConsumerId.store(0, std::memory_order_relaxed); + other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + explicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.explicitProducers.store(nullptr, std::memory_order_relaxed); + implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.implicitProducers.store(nullptr, std::memory_order_relaxed); +#endif + + other.initialBlockPoolIndex.store(0, std::memory_order_relaxed); + other.initialBlockPoolSize = 0; + other.initialBlockPool = nullptr; + + reown_producers(); + } + + inline ConcurrentQueue& operator=(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT + { + return swap_internal(other); + } + + // Swaps this queue's state with the other's. Not thread-safe. + // Swapping two queues does not invalidate their tokens, however + // the tokens that were created for one queue must be used with + // only the swapped queue (i.e. the tokens are tied to the + // queue's movable state, not the object itself). + inline void swap(ConcurrentQueue& other) MOODYCAMEL_NOEXCEPT + { + swap_internal(other); + } + +private: + ConcurrentQueue& swap_internal(ConcurrentQueue& other) + { + if (this == &other) { + return *this; + } + + details::swap_relaxed(producerListTail, other.producerListTail); + details::swap_relaxed(producerCount, other.producerCount); + details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex); + std::swap(initialBlockPool, other.initialBlockPool); + std::swap(initialBlockPoolSize, other.initialBlockPoolSize); + freeList.swap(other.freeList); + details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId); + details::swap_relaxed(globalExplicitConsumerOffset, other.globalExplicitConsumerOffset); + + swap_implicit_producer_hashes(other); + + reown_producers(); + other.reown_producers(); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + details::swap_relaxed(explicitProducers, other.explicitProducers); + details::swap_relaxed(implicitProducers, other.implicitProducers); +#endif + + return *this; + } + +public: + // Enqueues a single item (by copying it). + // Allocates memory if required. Only fails if memory allocation fails (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, + // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(T const& item) + { + if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + return inner_enqueue(item); + } + + // Enqueues a single item (by moving it, if possible). + // Allocates memory if required. Only fails if memory allocation fails (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, + // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(T&& item) + { + if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + return inner_enqueue(std::move(item)); + } + + // Enqueues a single item (by copying it) using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails (or + // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(producer_token_t const& token, T const& item) + { + return inner_enqueue(token, item); + } + + // Enqueues a single item (by moving it, if possible) using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails (or + // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(producer_token_t const& token, T&& item) + { + return inner_enqueue(token, std::move(item)); + } + + // Enqueues several items. + // Allocates memory if required. Only fails if memory allocation fails (or + // implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE + // is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Note: Use std::make_move_iterator if the elements should be moved instead of copied. + // Thread-safe. + template + bool enqueue_bulk(It itemFirst, size_t count) + { + if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + return inner_enqueue_bulk(itemFirst, count); + } + + // Enqueues several items using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails + // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) + { + return inner_enqueue_bulk(token, itemFirst, count); + } + + // Enqueues a single item (by copying it). + // Does not allocate memory. Fails if not enough room to enqueue (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE + // is 0). + // Thread-safe. + inline bool try_enqueue(T const& item) + { + if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + return inner_enqueue(item); + } + + // Enqueues a single item (by moving it, if possible). + // Does not allocate memory (except for one-time implicit producer). + // Fails if not enough room to enqueue (or implicit production is + // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). + // Thread-safe. + inline bool try_enqueue(T&& item) + { + if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + return inner_enqueue(std::move(item)); + } + + // Enqueues a single item (by copying it) using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Thread-safe. + inline bool try_enqueue(producer_token_t const& token, T const& item) + { + return inner_enqueue(token, item); + } + + // Enqueues a single item (by moving it, if possible) using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Thread-safe. + inline bool try_enqueue(producer_token_t const& token, T&& item) + { + return inner_enqueue(token, std::move(item)); + } + + // Enqueues several items. + // Does not allocate memory (except for one-time implicit producer). + // Fails if not enough room to enqueue (or implicit production is + // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + bool try_enqueue_bulk(It itemFirst, size_t count) + { + if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + return inner_enqueue_bulk(itemFirst, count); + } + + // Enqueues several items using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) + { + return inner_enqueue_bulk(token, itemFirst, count); + } + + + + // Attempts to dequeue from the queue. + // Returns false if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + bool try_dequeue(U& item) + { + // Instead of simply trying each producer in turn (which could cause needless contention on the first + // producer), we score them heuristically. + size_t nonEmptyCount = 0; + ProducerBase* best = nullptr; + size_t bestSize = 0; + for (auto ptr = producerListTail.load(std::memory_order_acquire); nonEmptyCount < 3 && ptr != nullptr; ptr = ptr->next_prod()) { + auto size = ptr->size_approx(); + if (size > 0) { + if (size > bestSize) { + bestSize = size; + best = ptr; + } + ++nonEmptyCount; + } + } + + // If there was at least one non-empty queue but it appears empty at the time + // we try to dequeue from it, we need to make sure every queue's been tried + if (nonEmptyCount > 0) { + if ((details::likely)(best->dequeue(item))) { + return true; + } + for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + if (ptr != best && ptr->dequeue(item)) { + return true; + } + } + } + return false; + } + + // Attempts to dequeue from the queue. + // Returns false if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // This differs from the try_dequeue(item) method in that this one does + // not attempt to reduce contention by interleaving the order that producer + // streams are dequeued from. So, using this method can reduce overall throughput + // under contention, but will give more predictable results in single-threaded + // consumer scenarios. This is mostly only useful for internal unit tests. + // Never allocates. Thread-safe. + template + bool try_dequeue_non_interleaved(U& item) + { + for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + if (ptr->dequeue(item)) { + return true; + } + } + return false; + } + + // Attempts to dequeue from the queue using an explicit consumer token. + // Returns false if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + bool try_dequeue(consumer_token_t& token, U& item) + { + // The idea is roughly as follows: + // Every 256 items from one producer, make everyone rotate (increase the global offset) -> this means the highest efficiency consumer dictates the rotation speed of everyone else, more or less + // If you see that the global offset has changed, you must reset your consumption counter and move to your designated place + // If there's no items where you're supposed to be, keep moving until you find a producer with some items + // If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in it + + if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) { + if (!update_current_producer_after_rotation(token)) { + return false; + } + } + + // If there was at least one non-empty queue but it appears empty at the time + // we try to dequeue from it, we need to make sure every queue's been tried + if (static_cast(token.currentProducer)->dequeue(item)) { + if (++token.itemsConsumedFromCurrent == EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) { + globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed); + } + return true; + } + + auto tail = producerListTail.load(std::memory_order_acquire); + auto ptr = static_cast(token.currentProducer)->next_prod(); + if (ptr == nullptr) { + ptr = tail; + } + while (ptr != static_cast(token.currentProducer)) { + if (ptr->dequeue(item)) { + token.currentProducer = ptr; + token.itemsConsumedFromCurrent = 1; + return true; + } + ptr = ptr->next_prod(); + if (ptr == nullptr) { + ptr = tail; + } + } + return false; + } + + // Attempts to dequeue several elements from the queue. + // Returns the number of items actually dequeued. + // Returns 0 if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + size_t try_dequeue_bulk(It itemFirst, size_t max) + { + size_t count = 0; + for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + count += ptr->dequeue_bulk(itemFirst, max - count); + if (count == max) { + break; + } + } + return count; + } + + // Attempts to dequeue several elements from the queue using an explicit consumer token. + // Returns the number of items actually dequeued. + // Returns 0 if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max) + { + if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) { + if (!update_current_producer_after_rotation(token)) { + return 0; + } + } + + size_t count = static_cast(token.currentProducer)->dequeue_bulk(itemFirst, max); + if (count == max) { + if ((token.itemsConsumedFromCurrent += static_cast(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) { + globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed); + } + return max; + } + token.itemsConsumedFromCurrent += static_cast(count); + max -= count; + + auto tail = producerListTail.load(std::memory_order_acquire); + auto ptr = static_cast(token.currentProducer)->next_prod(); + if (ptr == nullptr) { + ptr = tail; + } + while (ptr != static_cast(token.currentProducer)) { + auto dequeued = ptr->dequeue_bulk(itemFirst, max); + count += dequeued; + if (dequeued != 0) { + token.currentProducer = ptr; + token.itemsConsumedFromCurrent = static_cast(dequeued); + } + if (dequeued == max) { + break; + } + max -= dequeued; + ptr = ptr->next_prod(); + if (ptr == nullptr) { + ptr = tail; + } + } + return count; + } + + + + // Attempts to dequeue from a specific producer's inner queue. + // If you happen to know which producer you want to dequeue from, this + // is significantly faster than using the general-case try_dequeue methods. + // Returns false if the producer's queue appeared empty at the time it + // was checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + inline bool try_dequeue_from_producer(producer_token_t const& producer, U& item) + { + return static_cast(producer.producer)->dequeue(item); + } + + // Attempts to dequeue several elements from a specific producer's inner queue. + // Returns the number of items actually dequeued. + // If you happen to know which producer you want to dequeue from, this + // is significantly faster than using the general-case try_dequeue methods. + // Returns 0 if the producer's queue appeared empty at the time it + // was checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + inline size_t try_dequeue_bulk_from_producer(producer_token_t const& producer, It itemFirst, size_t max) + { + return static_cast(producer.producer)->dequeue_bulk(itemFirst, max); + } + + + // Returns an estimate of the total number of elements currently in the queue. This + // estimate is only accurate if the queue has completely stabilized before it is called + // (i.e. all enqueue and dequeue operations have completed and their memory effects are + // visible on the calling thread, and no further operations start while this method is + // being called). + // Thread-safe. + size_t size_approx() const + { + size_t size = 0; + for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + size += ptr->size_approx(); + } + return size; + } + + + // Returns true if the underlying atomic variables used by + // the queue are lock-free (they should be on most platforms). + // Thread-safe. + static bool is_lock_free() + { + return + details::static_is_lock_free::value == 2 && + details::static_is_lock_free::value == 2 && + details::static_is_lock_free::value == 2 && + details::static_is_lock_free::value == 2 && + details::static_is_lock_free::value == 2 && + details::static_is_lock_free::thread_id_numeric_size_t>::value == 2; + } + + +private: + friend struct ProducerToken; + friend struct ConsumerToken; + struct ExplicitProducer; + friend struct ExplicitProducer; + struct ImplicitProducer; + friend struct ImplicitProducer; + friend class ConcurrentQueueTests; + + enum AllocationMode { CanAlloc, CannotAlloc }; + + + /////////////////////////////// + // Queue methods + /////////////////////////////// + + template + inline bool inner_enqueue(producer_token_t const& token, U&& element) + { + return static_cast(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue(std::forward(element)); + } + + template + inline bool inner_enqueue(U&& element) + { + auto producer = get_or_add_implicit_producer(); + return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue(std::forward(element)); + } + + template + inline bool inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) + { + return static_cast(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk(itemFirst, count); + } + + template + inline bool inner_enqueue_bulk(It itemFirst, size_t count) + { + auto producer = get_or_add_implicit_producer(); + return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk(itemFirst, count); + } + + inline bool update_current_producer_after_rotation(consumer_token_t& token) + { + // Ah, there's been a rotation, figure out where we should be! + auto tail = producerListTail.load(std::memory_order_acquire); + if (token.desiredProducer == nullptr && tail == nullptr) { + return false; + } + auto prodCount = producerCount.load(std::memory_order_relaxed); + auto globalOffset = globalExplicitConsumerOffset.load(std::memory_order_relaxed); + if ((details::unlikely)(token.desiredProducer == nullptr)) { + // Aha, first time we're dequeueing anything. + // Figure out our local position + // Note: offset is from start, not end, but we're traversing from end -- subtract from count first + std::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount); + token.desiredProducer = tail; + for (std::uint32_t i = 0; i != offset; ++i) { + token.desiredProducer = static_cast(token.desiredProducer)->next_prod(); + if (token.desiredProducer == nullptr) { + token.desiredProducer = tail; + } + } + } + + std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset; + if (delta >= prodCount) { + delta = delta % prodCount; + } + for (std::uint32_t i = 0; i != delta; ++i) { + token.desiredProducer = static_cast(token.desiredProducer)->next_prod(); + if (token.desiredProducer == nullptr) { + token.desiredProducer = tail; + } + } + + token.lastKnownGlobalOffset = globalOffset; + token.currentProducer = token.desiredProducer; + token.itemsConsumedFromCurrent = 0; + return true; + } + + + /////////////////////////// + // Free list + /////////////////////////// + + template + struct FreeListNode + { + FreeListNode() : freeListRefs(0), freeListNext(nullptr) { } + + std::atomic freeListRefs; + std::atomic freeListNext; + }; + + // A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but + // simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly + // speedy under low contention. + template // N must inherit FreeListNode or have the same fields (and initialization of them) + struct FreeList + { + FreeList() : freeListHead(nullptr) { } + FreeList(FreeList&& other) : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) { other.freeListHead.store(nullptr, std::memory_order_relaxed); } + void swap(FreeList& other) { details::swap_relaxed(freeListHead, other.freeListHead); } + + FreeList(FreeList const&) MOODYCAMEL_DELETE_FUNCTION; + FreeList& operator=(FreeList const&) MOODYCAMEL_DELETE_FUNCTION; + + inline void add(N* node) + { +#if MCDBGQ_NOLOCKFREE_FREELIST + debug::DebugLock lock(mutex); +#endif + // We know that the should-be-on-freelist bit is 0 at this point, so it's safe to + // set it using a fetch_add + if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel) == 0) { + // Oh look! We were the last ones referencing this node, and we know + // we want to add it to the free list, so let's do it! + add_knowing_refcount_is_zero(node); + } + } + + inline N* try_get() + { +#if MCDBGQ_NOLOCKFREE_FREELIST + debug::DebugLock lock(mutex); +#endif + auto head = freeListHead.load(std::memory_order_acquire); + while (head != nullptr) { + auto prevHead = head; + auto refs = head->freeListRefs.load(std::memory_order_relaxed); + if ((refs & REFS_MASK) == 0 || !head->freeListRefs.compare_exchange_strong(refs, refs + 1, std::memory_order_acquire, std::memory_order_relaxed)) { + head = freeListHead.load(std::memory_order_acquire); + continue; + } + + // Good, reference count has been incremented (it wasn't at zero), which means we can read the + // next and not worry about it changing between now and the time we do the CAS + auto next = head->freeListNext.load(std::memory_order_relaxed); + if (freeListHead.compare_exchange_strong(head, next, std::memory_order_acquire, std::memory_order_relaxed)) { + // Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no + // matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on). + assert((head->freeListRefs.load(std::memory_order_relaxed) & SHOULD_BE_ON_FREELIST) == 0); + + // Decrease refcount twice, once for our ref, and once for the list's ref + head->freeListRefs.fetch_sub(2, std::memory_order_release); + return head; + } + + // OK, the head must have changed on us, but we still need to decrease the refcount we increased. + // Note that we don't need to release any memory effects, but we do need to ensure that the reference + // count decrement happens-after the CAS on the head. + refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel); + if (refs == SHOULD_BE_ON_FREELIST + 1) { + add_knowing_refcount_is_zero(prevHead); + } + } + + return nullptr; + } + + // Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes) + N* head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); } + + private: + inline void add_knowing_refcount_is_zero(N* node) + { + // Since the refcount is zero, and nobody can increase it once it's zero (except us, and we run + // only one copy of this method per node at a time, i.e. the single thread case), then we know + // we can safely change the next pointer of the node; however, once the refcount is back above + // zero, then other threads could increase it (happens under heavy contention, when the refcount + // goes to zero in between a load and a refcount increment of a node in try_get, then back up to + // something non-zero, then the refcount increment is done by the other thread) -- so, if the CAS + // to add the node to the actual list fails, decrease the refcount and leave the add operation to + // the next thread who puts the refcount back at zero (which could be us, hence the loop). + auto head = freeListHead.load(std::memory_order_relaxed); + while (true) { + node->freeListNext.store(head, std::memory_order_relaxed); + node->freeListRefs.store(1, std::memory_order_release); + if (!freeListHead.compare_exchange_strong(head, node, std::memory_order_release, std::memory_order_relaxed)) { + // Hmm, the add failed, but we can only try again when the refcount goes back to zero + if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1, std::memory_order_release) == 1) { + continue; + } + } + return; + } + } + + private: + // Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention) + std::atomic freeListHead; + + static const std::uint32_t REFS_MASK = 0x7FFFFFFF; + static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000; + +#if MCDBGQ_NOLOCKFREE_FREELIST + debug::DebugMutex mutex; +#endif + }; + + + /////////////////////////// + // Block + /////////////////////////// + + enum InnerQueueContext { implicit_context = 0, explicit_context = 1 }; + + struct Block + { + Block() + : next(nullptr), elementsCompletelyDequeued(0), freeListRefs(0), freeListNext(nullptr), shouldBeOnFreeList(false), dynamicallyAllocated(true) + { +#if MCDBGQ_TRACKMEM + owner = nullptr; +#endif + } + + template + inline bool is_empty() const + { + if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Check flags + for (size_t i = 0; i < BLOCK_SIZE; ++i) { + if (!emptyFlags[i].load(std::memory_order_relaxed)) { + return false; + } + } + + // Aha, empty; make sure we have all other memory effects that happened before the empty flags were set + std::atomic_thread_fence(std::memory_order_acquire); + return true; + } + else { + // Check counter + if (elementsCompletelyDequeued.load(std::memory_order_relaxed) == BLOCK_SIZE) { + std::atomic_thread_fence(std::memory_order_acquire); + return true; + } + assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <= BLOCK_SIZE); + return false; + } + } + + // Returns true if the block is now empty (does not apply in explicit context) + template + inline bool set_empty(index_t i) + { + if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Set flag + assert(!emptyFlags[BLOCK_SIZE - 1 - static_cast(i & static_cast(BLOCK_SIZE - 1))].load(std::memory_order_relaxed)); + emptyFlags[BLOCK_SIZE - 1 - static_cast(i & static_cast(BLOCK_SIZE - 1))].store(true, std::memory_order_release); + return false; + } + else { + // Increment counter + auto prevVal = elementsCompletelyDequeued.fetch_add(1, std::memory_order_release); + assert(prevVal < BLOCK_SIZE); + return prevVal == BLOCK_SIZE - 1; + } + } + + // Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0). + // Returns true if the block is now empty (does not apply in explicit context). + template + inline bool set_many_empty(index_t i, size_t count) + { + if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Set flags + std::atomic_thread_fence(std::memory_order_release); + i = BLOCK_SIZE - 1 - static_cast(i & static_cast(BLOCK_SIZE - 1)) - count + 1; + for (size_t j = 0; j != count; ++j) { + assert(!emptyFlags[i + j].load(std::memory_order_relaxed)); + emptyFlags[i + j].store(true, std::memory_order_relaxed); + } + return false; + } + else { + // Increment counter + auto prevVal = elementsCompletelyDequeued.fetch_add(count, std::memory_order_release); + assert(prevVal + count <= BLOCK_SIZE); + return prevVal + count == BLOCK_SIZE; + } + } + + template + inline void set_all_empty() + { + if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Set all flags + for (size_t i = 0; i != BLOCK_SIZE; ++i) { + emptyFlags[i].store(true, std::memory_order_relaxed); + } + } + else { + // Reset counter + elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed); + } + } + + template + inline void reset_empty() + { + if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Reset flags + for (size_t i = 0; i != BLOCK_SIZE; ++i) { + emptyFlags[i].store(false, std::memory_order_relaxed); + } + } + else { + // Reset counter + elementsCompletelyDequeued.store(0, std::memory_order_relaxed); + } + } + + inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT { return static_cast(static_cast(elements)) + static_cast(idx & static_cast(BLOCK_SIZE - 1)); } + inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return static_cast(static_cast(elements)) + static_cast(idx & static_cast(BLOCK_SIZE - 1)); } + + private: + // IMPORTANT: This must be the first member in Block, so that if T depends on the alignment of + // addresses returned by malloc, that alignment will be preserved. Apparently clang actually + // generates code that uses this assumption for AVX instructions in some cases. Ideally, we + // should also align Block to the alignment of T in case it's higher than malloc's 16-byte + // alignment, but this is hard to do in a cross-platform way. Assert for this case: + static_assert(std::alignment_of::value <= std::alignment_of::value, "The queue does not support super-aligned types at this time"); + // Additionally, we need the alignment of Block itself to be a multiple of max_align_t since + // otherwise the appropriate padding will not be added at the end of Block in order to make + // arrays of Blocks all be properly aligned (not just the first one). We use a union to force + // this. + union { + char elements[sizeof(T) * BLOCK_SIZE]; + details::max_align_t dummy; + }; + public: + Block* next; + std::atomic elementsCompletelyDequeued; + std::atomic emptyFlags[BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1]; + public: + std::atomic freeListRefs; + std::atomic freeListNext; + std::atomic shouldBeOnFreeList; + bool dynamicallyAllocated; // Perhaps a better name for this would be 'isNotPartOfInitialBlockPool' + +#if MCDBGQ_TRACKMEM + void* owner; +#endif + }; + static_assert(std::alignment_of::value >= std::alignment_of::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping"); + + +#if MCDBGQ_TRACKMEM +public: + struct MemStats; +private: +#endif + + /////////////////////////// + // Producer base + /////////////////////////// + + struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase + { + ProducerBase(ConcurrentQueue* parent_, bool isExplicit_) : + tailIndex(0), + headIndex(0), + dequeueOptimisticCount(0), + dequeueOvercommit(0), + tailBlock(nullptr), + isExplicit(isExplicit_), + parent(parent_) + { + } + + virtual ~ProducerBase() { }; + + template + inline bool dequeue(U& element) + { + if (isExplicit) { + return static_cast(this)->dequeue(element); + } + else { + return static_cast(this)->dequeue(element); + } + } + + template + inline size_t dequeue_bulk(It& itemFirst, size_t max) + { + if (isExplicit) { + return static_cast(this)->dequeue_bulk(itemFirst, max); + } + else { + return static_cast(this)->dequeue_bulk(itemFirst, max); + } + } + + inline ProducerBase* next_prod() const { return static_cast(next); } + + inline size_t size_approx() const + { + auto tail = tailIndex.load(std::memory_order_relaxed); + auto head = headIndex.load(std::memory_order_relaxed); + return details::circular_less_than(head, tail) ? static_cast(tail - head) : 0; + } + + inline index_t getTail() const { return tailIndex.load(std::memory_order_relaxed); } + protected: + std::atomic tailIndex; // Where to enqueue to next + std::atomic headIndex; // Where to dequeue from next + + std::atomic dequeueOptimisticCount; + std::atomic dequeueOvercommit; + + Block* tailBlock; + + public: + bool isExplicit; + ConcurrentQueue* parent; + + protected: +#if MCDBGQ_TRACKMEM + friend struct MemStats; +#endif + }; + + + /////////////////////////// + // Explicit queue + /////////////////////////// + + struct ExplicitProducer : public ProducerBase + { + explicit ExplicitProducer(ConcurrentQueue* parent) : + ProducerBase(parent, true), + blockIndex(nullptr), + pr_blockIndexSlotsUsed(0), + pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1), + pr_blockIndexFront(0), + pr_blockIndexEntries(nullptr), + pr_blockIndexRaw(nullptr) + { + size_t poolBasedIndexSize = details::ceil_to_pow_2(parent->initialBlockPoolSize) >> 1; + if (poolBasedIndexSize > pr_blockIndexSize) { + pr_blockIndexSize = poolBasedIndexSize; + } + + new_block_index(0); // This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE + } + + ~ExplicitProducer() + { + // Destruct any elements not yet dequeued. + // Since we're in the destructor, we can assume all elements + // are either completely dequeued or completely not (no halfways). + if (this->tailBlock != nullptr) { // Note this means there must be a block index too + // First find the block that's partially dequeued, if any + Block* halfDequeuedBlock = nullptr; + if ((this->headIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)) != 0) { + // The head's not on a block boundary, meaning a block somewhere is partially dequeued + // (or the head block is the tail block and was fully dequeued, but the head/tail are still not on a boundary) + size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & (pr_blockIndexSize - 1); + while (details::circular_less_than(pr_blockIndexEntries[i].base + BLOCK_SIZE, this->headIndex.load(std::memory_order_relaxed))) { + i = (i + 1) & (pr_blockIndexSize - 1); + } + assert(details::circular_less_than(pr_blockIndexEntries[i].base, this->headIndex.load(std::memory_order_relaxed))); + halfDequeuedBlock = pr_blockIndexEntries[i].block; + } + + // Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration) + auto block = this->tailBlock; + do { + block = block->next; + if (block->ConcurrentQueue::Block::template is_empty()) { + continue; + } + + size_t i = 0; // Offset into block + if (block == halfDequeuedBlock) { + i = static_cast(this->headIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)); + } + + // Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index + auto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)) == 0 ? BLOCK_SIZE : static_cast(this->tailIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)); + while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) { + (*block)[i++]->~T(); + } + } while (block != this->tailBlock); + } + + // Destroy all blocks that we own + if (this->tailBlock != nullptr) { + auto block = this->tailBlock; + do { + auto nextBlock = block->next; + if (block->dynamicallyAllocated) { + destroy(block); + } + else { + this->parent->add_block_to_free_list(block); + } + block = nextBlock; + } while (block != this->tailBlock); + } + + // Destroy the block indices + auto header = static_cast(pr_blockIndexRaw); + while (header != nullptr) { + auto prev = static_cast(header->prev); + header->~BlockIndexHeader(); + (Traits::free)(header); + header = prev; + } + } + + template + inline bool enqueue(U&& element) + { + index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed); + index_t newTailIndex = 1 + currentTailIndex; + if ((currentTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) { + // We reached the end of a block, start a new one + auto startBlock = this->tailBlock; + auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed; + if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::template is_empty()) { + // We can re-use the block ahead of us, it's empty! + this->tailBlock = this->tailBlock->next; + this->tailBlock->ConcurrentQueue::Block::template reset_empty(); + + // We'll put the block on the block index (guaranteed to be room since we're conceptually removing the + // last block from it first -- except instead of removing then adding, we can just overwrite). + // Note that there must be a valid block index here, since even if allocation failed in the ctor, + // it would have been re-attempted when adding the first block to the queue; since there is such + // a block, a block index must have been successfully allocated. + } + else { + // Whatever head value we see here is >= the last value we saw here (relatively), + // and <= its current value. Since we have the most recent tail, the head must be + // <= to it. + auto head = this->headIndex.load(std::memory_order_relaxed); + assert(!details::circular_less_than(currentTailIndex, head)); + if (!details::circular_less_than(head, currentTailIndex + BLOCK_SIZE) + || (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) { + // We can't enqueue in another block because there's not enough leeway -- the + // tail could surpass the head by the time the block fills up! (Or we'll exceed + // the size limit, if the second part of the condition was true.) + return false; + } + // We're going to need a new block; check that the block index has room + if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize) { + // Hmm, the circular block index is already full -- we'll need + // to allocate a new index. Note pr_blockIndexRaw can only be nullptr if + // the initial allocation failed in the constructor. + + if (allocMode == CannotAlloc || !new_block_index(pr_blockIndexSlotsUsed)) { + return false; + } + } + + // Insert a new block in the circular linked list + auto newBlock = this->parent->ConcurrentQueue::template requisition_block(); + if (newBlock == nullptr) { + return false; + } +#if MCDBGQ_TRACKMEM + newBlock->owner = this; +#endif + newBlock->ConcurrentQueue::Block::template reset_empty(); + if (this->tailBlock == nullptr) { + newBlock->next = newBlock; + } + else { + newBlock->next = this->tailBlock->next; + this->tailBlock->next = newBlock; + } + this->tailBlock = newBlock; + ++pr_blockIndexSlotsUsed; + } + + if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward(element)))) { + // The constructor may throw. We want the element not to appear in the queue in + // that case (without corrupting the queue): + MOODYCAMEL_TRY { + new ((*this->tailBlock)[currentTailIndex]) T(std::forward(element)); + } + MOODYCAMEL_CATCH (...) { + // Revert change to the current block, but leave the new block available + // for next time + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = startBlock == nullptr ? this->tailBlock : startBlock; + MOODYCAMEL_RETHROW; + } + } + else { + (void)startBlock; + (void)originalBlockIndexSlotsUsed; + } + + // Add block to block index + auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; + entry.base = currentTailIndex; + entry.block = this->tailBlock; + blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release); + pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); + + if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward(element)))) { + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + } + + // Enqueue + new ((*this->tailBlock)[currentTailIndex]) T(std::forward(element)); + + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + + template + bool dequeue(U& element) + { + auto tail = this->tailIndex.load(std::memory_order_relaxed); + auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); + if (details::circular_less_than(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) { + // Might be something to dequeue, let's give it a try + + // Note that this if is purely for performance purposes in the common case when the queue is + // empty and the values are eventually consistent -- we may enter here spuriously. + + // Note that whatever the values of overcommit and tail are, they are not going to change (unless we + // change them) and must be the same value at this point (inside the if) as when the if condition was + // evaluated. + + // We insert an acquire fence here to synchronize-with the release upon incrementing dequeueOvercommit below. + // This ensures that whatever the value we got loaded into overcommit, the load of dequeueOptisticCount in + // the fetch_add below will result in a value at least as recent as that (and therefore at least as large). + // Note that I believe a compiler (signal) fence here would be sufficient due to the nature of fetch_add (all + // read-modify-write operations are guaranteed to work on the latest value in the modification order), but + // unfortunately that can't be shown to be correct using only the C++11 standard. + // See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case + std::atomic_thread_fence(std::memory_order_acquire); + + // Increment optimistic counter, then check if it went over the boundary + auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed); + + // Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever + // incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now + // have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon + // incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount. + assert(overcommit <= myDequeueCount); + + // Note that we reload tail here in case it changed; it will be the same value as before or greater, since + // this load is sequenced after (happens after) the earlier load above. This is supported by read-read + // coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_order + tail = this->tailIndex.load(std::memory_order_acquire); + if ((details::likely)(details::circular_less_than(myDequeueCount - overcommit, tail))) { + // Guaranteed to be at least one element to dequeue! + + // Get the index. Note that since there's guaranteed to be at least one element, this + // will never exceed tail. We need to do an acquire-release fence here since it's possible + // that whatever condition got us to this point was for an earlier enqueued element (that + // we already see the memory effects for), but that by the time we increment somebody else + // has incremented it, and we need to see the memory effects for *that* element, which is + // in such a case is necessarily visible on the thread that incremented it in the first + // place with the more current condition (they must have acquired a tail that is at least + // as recent). + auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel); + + + // Determine which block the element is in + + auto localBlockIndex = blockIndex.load(std::memory_order_acquire); + auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire); + + // We need to be careful here about subtracting and dividing because of index wrap-around. + // When an index wraps, we need to preserve the sign of the offset when dividing it by the + // block size (in order to get a correct signed block count offset in all cases): + auto headBase = localBlockIndex->entries[localBlockIndexHead].base; + auto blockBaseIndex = index & ~static_cast(BLOCK_SIZE - 1); + auto offset = static_cast(static_cast::type>(blockBaseIndex - headBase) / BLOCK_SIZE); + auto block = localBlockIndex->entries[(localBlockIndexHead + offset) & (localBlockIndex->size - 1)].block; + + // Dequeue + auto& el = *((*block)[index]); + if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) { + // Make sure the element is still fully dequeued and destroyed even if the assignment + // throws + struct Guard { + Block* block; + index_t index; + + ~Guard() + { + (*block)[index]->~T(); + block->ConcurrentQueue::Block::template set_empty(index); + } + } guard = { block, index }; + + element = std::move(el); + } + else { + element = std::move(el); + el.~T(); + block->ConcurrentQueue::Block::template set_empty(index); + } + + return true; + } + else { + // Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent + this->dequeueOvercommit.fetch_add(1, std::memory_order_release); // Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write + } + } + + return false; + } + + template + bool enqueue_bulk(It itemFirst, size_t count) + { + // First, we need to make sure we have enough room to enqueue all of the elements; + // this means pre-allocating blocks and putting them in the block index (but only if + // all the allocations succeeded). + index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed); + auto startBlock = this->tailBlock; + auto originalBlockIndexFront = pr_blockIndexFront; + auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed; + + Block* firstAllocatedBlock = nullptr; + + // Figure out how many blocks we'll need to allocate, and do so + size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1)); + index_t currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); + if (blockBaseDiff > 0) { + // Allocate as many blocks as possible from ahead + while (blockBaseDiff > 0 && this->tailBlock != nullptr && this->tailBlock->next != firstAllocatedBlock && this->tailBlock->next->ConcurrentQueue::Block::template is_empty()) { + blockBaseDiff -= static_cast(BLOCK_SIZE); + currentTailIndex += static_cast(BLOCK_SIZE); + + this->tailBlock = this->tailBlock->next; + firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock; + + auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; + entry.base = currentTailIndex; + entry.block = this->tailBlock; + pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); + } + + // Now allocate as many blocks as necessary from the block pool + while (blockBaseDiff > 0) { + blockBaseDiff -= static_cast(BLOCK_SIZE); + currentTailIndex += static_cast(BLOCK_SIZE); + + auto head = this->headIndex.load(std::memory_order_relaxed); + assert(!details::circular_less_than(currentTailIndex, head)); + bool full = !details::circular_less_than(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); + if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize || full) { + if (allocMode == CannotAlloc || full || !new_block_index(originalBlockIndexSlotsUsed)) { + // Failed to allocate, undo changes (but keep injected blocks) + pr_blockIndexFront = originalBlockIndexFront; + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; + return false; + } + + // pr_blockIndexFront is updated inside new_block_index, so we need to + // update our fallback value too (since we keep the new index even if we + // later fail) + originalBlockIndexFront = originalBlockIndexSlotsUsed; + } + + // Insert a new block in the circular linked list + auto newBlock = this->parent->ConcurrentQueue::template requisition_block(); + if (newBlock == nullptr) { + pr_blockIndexFront = originalBlockIndexFront; + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; + return false; + } + +#if MCDBGQ_TRACKMEM + newBlock->owner = this; +#endif + newBlock->ConcurrentQueue::Block::template set_all_empty(); + if (this->tailBlock == nullptr) { + newBlock->next = newBlock; + } + else { + newBlock->next = this->tailBlock->next; + this->tailBlock->next = newBlock; + } + this->tailBlock = newBlock; + firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock; + + ++pr_blockIndexSlotsUsed; + + auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; + entry.base = currentTailIndex; + entry.block = this->tailBlock; + pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); + } + + // Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and + // publish the new block index front + auto block = firstAllocatedBlock; + while (true) { + block->ConcurrentQueue::Block::template reset_empty(); + if (block == this->tailBlock) { + break; + } + block = block->next; + } + + if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) { + blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release); + } + } + + // Enqueue, one block at a time + index_t newTailIndex = startTailIndex + static_cast(count); + currentTailIndex = startTailIndex; + auto endBlock = this->tailBlock; + this->tailBlock = startBlock; + assert((startTailIndex & static_cast(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0); + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) { + this->tailBlock = firstAllocatedBlock; + } + while (true) { + auto stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + if (details::circular_less_than(newTailIndex, stopIndex)) { + stopIndex = newTailIndex; + } + if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) { + while (currentTailIndex != stopIndex) { + new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); + } + } + else { + MOODYCAMEL_TRY { + while (currentTailIndex != stopIndex) { + // Must use copy constructor even if move constructor is available + // because we may have to revert if there's an exception. + // Sorry about the horrible templated next line, but it was the only way + // to disable moving *at compile time*, which is important because a type + // may only define a (noexcept) move constructor, and so calls to the + // cctor will not compile, even if they are in an if branch that will never + // be executed + new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); + ++currentTailIndex; + ++itemFirst; + } + } + MOODYCAMEL_CATCH (...) { + // Oh dear, an exception's been thrown -- destroy the elements that + // were enqueued so far and revert the entire bulk operation (we'll keep + // any allocated blocks in our linked list for later, though). + auto constructedStopIndex = currentTailIndex; + auto lastBlockEnqueued = this->tailBlock; + + pr_blockIndexFront = originalBlockIndexFront; + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; + + if (!details::is_trivially_destructible::value) { + auto block = startBlock; + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) { + block = firstAllocatedBlock; + } + currentTailIndex = startTailIndex; + while (true) { + stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + if (details::circular_less_than(constructedStopIndex, stopIndex)) { + stopIndex = constructedStopIndex; + } + while (currentTailIndex != stopIndex) { + (*block)[currentTailIndex++]->~T(); + } + if (block == lastBlockEnqueued) { + break; + } + block = block->next; + } + } + MOODYCAMEL_RETHROW; + } + } + + if (this->tailBlock == endBlock) { + assert(currentTailIndex == newTailIndex); + break; + } + this->tailBlock = this->tailBlock->next; + } + + if (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst))) && firstAllocatedBlock != nullptr) { + blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release); + } + + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + + template + size_t dequeue_bulk(It& itemFirst, size_t max) + { + auto tail = this->tailIndex.load(std::memory_order_relaxed); + auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); + auto desiredCount = static_cast(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit)); + if (details::circular_less_than(0, desiredCount)) { + desiredCount = desiredCount < max ? desiredCount : max; + std::atomic_thread_fence(std::memory_order_acquire); + + auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed); + assert(overcommit <= myDequeueCount); + + tail = this->tailIndex.load(std::memory_order_acquire); + auto actualCount = static_cast(tail - (myDequeueCount - overcommit)); + if (details::circular_less_than(0, actualCount)) { + actualCount = desiredCount < actualCount ? desiredCount : actualCount; + if (actualCount < desiredCount) { + this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release); + } + + // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this + // will never exceed tail. + auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel); + + // Determine which block the first element is in + auto localBlockIndex = blockIndex.load(std::memory_order_acquire); + auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire); + + auto headBase = localBlockIndex->entries[localBlockIndexHead].base; + auto firstBlockBaseIndex = firstIndex & ~static_cast(BLOCK_SIZE - 1); + auto offset = static_cast(static_cast::type>(firstBlockBaseIndex - headBase) / BLOCK_SIZE); + auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1); + + // Iterate the blocks and dequeue + auto index = firstIndex; + do { + auto firstIndexInBlock = index; + auto endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; + auto block = localBlockIndex->entries[indexIndex].block; + if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) { + while (index != endIndex) { + auto& el = *((*block)[index]); + *itemFirst++ = std::move(el); + el.~T(); + ++index; + } + } + else { + MOODYCAMEL_TRY { + while (index != endIndex) { + auto& el = *((*block)[index]); + *itemFirst = std::move(el); + ++itemFirst; + el.~T(); + ++index; + } + } + MOODYCAMEL_CATCH (...) { + // It's too late to revert the dequeue, but we can make sure that all + // the dequeued objects are properly destroyed and the block index + // (and empty count) are properly updated before we propagate the exception + do { + block = localBlockIndex->entries[indexIndex].block; + while (index != endIndex) { + (*block)[index++]->~T(); + } + block->ConcurrentQueue::Block::template set_many_empty(firstIndexInBlock, static_cast(endIndex - firstIndexInBlock)); + indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1); + + firstIndexInBlock = index; + endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; + } while (index != firstIndex + actualCount); + + MOODYCAMEL_RETHROW; + } + } + block->ConcurrentQueue::Block::template set_many_empty(firstIndexInBlock, static_cast(endIndex - firstIndexInBlock)); + indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1); + } while (index != firstIndex + actualCount); + + return actualCount; + } + else { + // Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent + this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release); + } + } + + return 0; + } + + private: + struct BlockIndexEntry + { + index_t base; + Block* block; + }; + + struct BlockIndexHeader + { + size_t size; + std::atomic front; // Current slot (not next, like pr_blockIndexFront) + BlockIndexEntry* entries; + void* prev; + }; + + + bool new_block_index(size_t numberOfFilledSlotsToExpose) + { + auto prevBlockSizeMask = pr_blockIndexSize - 1; + + // Create the new block + pr_blockIndexSize <<= 1; + auto newRawPtr = static_cast((Traits::malloc)(sizeof(BlockIndexHeader) + std::alignment_of::value - 1 + sizeof(BlockIndexEntry) * pr_blockIndexSize)); + if (newRawPtr == nullptr) { + pr_blockIndexSize >>= 1; // Reset to allow graceful retry + return false; + } + + auto newBlockIndexEntries = reinterpret_cast(details::align_for(newRawPtr + sizeof(BlockIndexHeader))); + + // Copy in all the old indices, if any + size_t j = 0; + if (pr_blockIndexSlotsUsed != 0) { + auto i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask; + do { + newBlockIndexEntries[j++] = pr_blockIndexEntries[i]; + i = (i + 1) & prevBlockSizeMask; + } while (i != pr_blockIndexFront); + } + + // Update everything + auto header = new (newRawPtr) BlockIndexHeader; + header->size = pr_blockIndexSize; + header->front.store(numberOfFilledSlotsToExpose - 1, std::memory_order_relaxed); + header->entries = newBlockIndexEntries; + header->prev = pr_blockIndexRaw; // we link the new block to the old one so we can free it later + + pr_blockIndexFront = j; + pr_blockIndexEntries = newBlockIndexEntries; + pr_blockIndexRaw = newRawPtr; + blockIndex.store(header, std::memory_order_release); + + return true; + } + + private: + std::atomic blockIndex; + + // To be used by producer only -- consumer must use the ones in referenced by blockIndex + size_t pr_blockIndexSlotsUsed; + size_t pr_blockIndexSize; + size_t pr_blockIndexFront; // Next slot (not current) + BlockIndexEntry* pr_blockIndexEntries; + void* pr_blockIndexRaw; + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + public: + ExplicitProducer* nextExplicitProducer; + private: +#endif + +#if MCDBGQ_TRACKMEM + friend struct MemStats; +#endif + }; + + + ////////////////////////////////// + // Implicit queue + ////////////////////////////////// + + struct ImplicitProducer : public ProducerBase + { + ImplicitProducer(ConcurrentQueue* parent) : + ProducerBase(parent, false), + nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE), + blockIndex(nullptr) + { + new_block_index(); + } + + ~ImplicitProducer() + { + // Note that since we're in the destructor we can assume that all enqueue/dequeue operations + // completed already; this means that all undequeued elements are placed contiguously across + // contiguous blocks, and that only the first and last remaining blocks can be only partially + // empty (all other remaining blocks must be completely full). + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + // Unregister ourselves for thread termination notification + if (!this->inactive.load(std::memory_order_relaxed)) { + details::ThreadExitNotifier::unsubscribe(&threadExitListener); + } +#endif + + // Destroy all remaining elements! + auto tail = this->tailIndex.load(std::memory_order_relaxed); + auto index = this->headIndex.load(std::memory_order_relaxed); + Block* block = nullptr; + assert(index == tail || details::circular_less_than(index, tail)); + bool forceFreeLastBlock = index != tail; // If we enter the loop, then the last (tail) block will not be freed + while (index != tail) { + if ((index & static_cast(BLOCK_SIZE - 1)) == 0 || block == nullptr) { + if (block != nullptr) { + // Free the old block + this->parent->add_block_to_free_list(block); + } + + block = get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed); + } + + ((*block)[index])->~T(); + ++index; + } + // Even if the queue is empty, there's still one block that's not on the free list + // (unless the head index reached the end of it, in which case the tail will be poised + // to create a new block). + if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast(BLOCK_SIZE - 1)) != 0)) { + this->parent->add_block_to_free_list(this->tailBlock); + } + + // Destroy block index + auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); + if (localBlockIndex != nullptr) { + for (size_t i = 0; i != localBlockIndex->capacity; ++i) { + localBlockIndex->index[i]->~BlockIndexEntry(); + } + do { + auto prev = localBlockIndex->prev; + localBlockIndex->~BlockIndexHeader(); + (Traits::free)(localBlockIndex); + localBlockIndex = prev; + } while (localBlockIndex != nullptr); + } + } + + template + inline bool enqueue(U&& element) + { + index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed); + index_t newTailIndex = 1 + currentTailIndex; + if ((currentTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) { + // We reached the end of a block, start a new one + auto head = this->headIndex.load(std::memory_order_relaxed); + assert(!details::circular_less_than(currentTailIndex, head)); + if (!details::circular_less_than(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) { + return false; + } +#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + // Find out where we'll be inserting this block in the block index + BlockIndexEntry* idxEntry; + if (!insert_block_index_entry(idxEntry, currentTailIndex)) { + return false; + } + + // Get ahold of a new block + auto newBlock = this->parent->ConcurrentQueue::template requisition_block(); + if (newBlock == nullptr) { + rewind_block_index_tail(); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + return false; + } +#if MCDBGQ_TRACKMEM + newBlock->owner = this; +#endif + newBlock->ConcurrentQueue::Block::template reset_empty(); + + if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward(element)))) { + // May throw, try to insert now before we publish the fact that we have this new block + MOODYCAMEL_TRY { + new ((*newBlock)[currentTailIndex]) T(std::forward(element)); + } + MOODYCAMEL_CATCH (...) { + rewind_block_index_tail(); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + this->parent->add_block_to_free_list(newBlock); + MOODYCAMEL_RETHROW; + } + } + + // Insert the new block into the index + idxEntry->value.store(newBlock, std::memory_order_relaxed); + + this->tailBlock = newBlock; + + if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward(element)))) { + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + } + + // Enqueue + new ((*this->tailBlock)[currentTailIndex]) T(std::forward(element)); + + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + + template + bool dequeue(U& element) + { + // See ExplicitProducer::dequeue for rationale and explanation + index_t tail = this->tailIndex.load(std::memory_order_relaxed); + index_t overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); + if (details::circular_less_than(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) { + std::atomic_thread_fence(std::memory_order_acquire); + + index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed); + assert(overcommit <= myDequeueCount); + tail = this->tailIndex.load(std::memory_order_acquire); + if ((details::likely)(details::circular_less_than(myDequeueCount - overcommit, tail))) { + index_t index = this->headIndex.fetch_add(1, std::memory_order_acq_rel); + + // Determine which block the element is in + auto entry = get_block_index_entry_for_index(index); + + // Dequeue + auto block = entry->value.load(std::memory_order_relaxed); + auto& el = *((*block)[index]); + + if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) { +#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + // Note: Acquiring the mutex with every dequeue instead of only when a block + // is released is very sub-optimal, but it is, after all, purely debug code. + debug::DebugLock lock(producer->mutex); +#endif + struct Guard { + Block* block; + index_t index; + BlockIndexEntry* entry; + ConcurrentQueue* parent; + + ~Guard() + { + (*block)[index]->~T(); + if (block->ConcurrentQueue::Block::template set_empty(index)) { + entry->value.store(nullptr, std::memory_order_relaxed); + parent->add_block_to_free_list(block); + } + } + } guard = { block, index, entry, this->parent }; + + element = std::move(el); + } + else { + element = std::move(el); + el.~T(); + + if (block->ConcurrentQueue::Block::template set_empty(index)) { + { +#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + // Add the block back into the global free pool (and remove from block index) + entry->value.store(nullptr, std::memory_order_relaxed); + } + this->parent->add_block_to_free_list(block); // releases the above store + } + } + + return true; + } + else { + this->dequeueOvercommit.fetch_add(1, std::memory_order_release); + } + } + + return false; + } + + template + bool enqueue_bulk(It itemFirst, size_t count) + { + // First, we need to make sure we have enough room to enqueue all of the elements; + // this means pre-allocating blocks and putting them in the block index (but only if + // all the allocations succeeded). + + // Note that the tailBlock we start off with may not be owned by us any more; + // this happens if it was filled up exactly to the top (setting tailIndex to + // the first index of the next block which is not yet allocated), then dequeued + // completely (putting it on the free list) before we enqueue again. + + index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed); + auto startBlock = this->tailBlock; + Block* firstAllocatedBlock = nullptr; + auto endBlock = this->tailBlock; + + // Figure out how many blocks we'll need to allocate, and do so + size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1)); + index_t currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); + if (blockBaseDiff > 0) { +#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + do { + blockBaseDiff -= static_cast(BLOCK_SIZE); + currentTailIndex += static_cast(BLOCK_SIZE); + + // Find out where we'll be inserting this block in the block index + BlockIndexEntry* idxEntry = nullptr; // initialization here unnecessary but compiler can't always tell + Block* newBlock; + bool indexInserted = false; + auto head = this->headIndex.load(std::memory_order_relaxed); + assert(!details::circular_less_than(currentTailIndex, head)); + bool full = !details::circular_less_than(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); + if (full || !(indexInserted = insert_block_index_entry(idxEntry, currentTailIndex)) || (newBlock = this->parent->ConcurrentQueue::template requisition_block()) == nullptr) { + // Index allocation or block allocation failed; revert any other allocations + // and index insertions done so far for this operation + if (indexInserted) { + rewind_block_index_tail(); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + } + currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); + for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) { + currentTailIndex += static_cast(BLOCK_SIZE); + idxEntry = get_block_index_entry_for_index(currentTailIndex); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + rewind_block_index_tail(); + } + this->parent->add_blocks_to_free_list(firstAllocatedBlock); + this->tailBlock = startBlock; + + return false; + } + +#if MCDBGQ_TRACKMEM + newBlock->owner = this; +#endif + newBlock->ConcurrentQueue::Block::template reset_empty(); + newBlock->next = nullptr; + + // Insert the new block into the index + idxEntry->value.store(newBlock, std::memory_order_relaxed); + + // Store the chain of blocks so that we can undo if later allocations fail, + // and so that we can find the blocks when we do the actual enqueueing + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr) { + assert(this->tailBlock != nullptr); + this->tailBlock->next = newBlock; + } + this->tailBlock = newBlock; + endBlock = newBlock; + firstAllocatedBlock = firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock; + } while (blockBaseDiff > 0); + } + + // Enqueue, one block at a time + index_t newTailIndex = startTailIndex + static_cast(count); + currentTailIndex = startTailIndex; + this->tailBlock = startBlock; + assert((startTailIndex & static_cast(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0); + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) { + this->tailBlock = firstAllocatedBlock; + } + while (true) { + auto stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + if (details::circular_less_than(newTailIndex, stopIndex)) { + stopIndex = newTailIndex; + } + if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) { + while (currentTailIndex != stopIndex) { + new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); + } + } + else { + MOODYCAMEL_TRY { + while (currentTailIndex != stopIndex) { + new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); + ++currentTailIndex; + ++itemFirst; + } + } + MOODYCAMEL_CATCH (...) { + auto constructedStopIndex = currentTailIndex; + auto lastBlockEnqueued = this->tailBlock; + + if (!details::is_trivially_destructible::value) { + auto block = startBlock; + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) { + block = firstAllocatedBlock; + } + currentTailIndex = startTailIndex; + while (true) { + stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + if (details::circular_less_than(constructedStopIndex, stopIndex)) { + stopIndex = constructedStopIndex; + } + while (currentTailIndex != stopIndex) { + (*block)[currentTailIndex++]->~T(); + } + if (block == lastBlockEnqueued) { + break; + } + block = block->next; + } + } + + currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); + for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) { + currentTailIndex += static_cast(BLOCK_SIZE); + auto idxEntry = get_block_index_entry_for_index(currentTailIndex); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + rewind_block_index_tail(); + } + this->parent->add_blocks_to_free_list(firstAllocatedBlock); + this->tailBlock = startBlock; + MOODYCAMEL_RETHROW; + } + } + + if (this->tailBlock == endBlock) { + assert(currentTailIndex == newTailIndex); + break; + } + this->tailBlock = this->tailBlock->next; + } + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + + template + size_t dequeue_bulk(It& itemFirst, size_t max) + { + auto tail = this->tailIndex.load(std::memory_order_relaxed); + auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); + auto desiredCount = static_cast(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit)); + if (details::circular_less_than(0, desiredCount)) { + desiredCount = desiredCount < max ? desiredCount : max; + std::atomic_thread_fence(std::memory_order_acquire); + + auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed); + assert(overcommit <= myDequeueCount); + + tail = this->tailIndex.load(std::memory_order_acquire); + auto actualCount = static_cast(tail - (myDequeueCount - overcommit)); + if (details::circular_less_than(0, actualCount)) { + actualCount = desiredCount < actualCount ? desiredCount : actualCount; + if (actualCount < desiredCount) { + this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release); + } + + // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this + // will never exceed tail. + auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel); + + // Iterate the blocks and dequeue + auto index = firstIndex; + BlockIndexHeader* localBlockIndex; + auto indexIndex = get_block_index_index_for_index(index, localBlockIndex); + do { + auto blockStartIndex = index; + auto endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; + + auto entry = localBlockIndex->index[indexIndex]; + auto block = entry->value.load(std::memory_order_relaxed); + if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) { + while (index != endIndex) { + auto& el = *((*block)[index]); + *itemFirst++ = std::move(el); + el.~T(); + ++index; + } + } + else { + MOODYCAMEL_TRY { + while (index != endIndex) { + auto& el = *((*block)[index]); + *itemFirst = std::move(el); + ++itemFirst; + el.~T(); + ++index; + } + } + MOODYCAMEL_CATCH (...) { + do { + entry = localBlockIndex->index[indexIndex]; + block = entry->value.load(std::memory_order_relaxed); + while (index != endIndex) { + (*block)[index++]->~T(); + } + + if (block->ConcurrentQueue::Block::template set_many_empty(blockStartIndex, static_cast(endIndex - blockStartIndex))) { +#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + entry->value.store(nullptr, std::memory_order_relaxed); + this->parent->add_block_to_free_list(block); + } + indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1); + + blockStartIndex = index; + endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; + } while (index != firstIndex + actualCount); + + MOODYCAMEL_RETHROW; + } + } + if (block->ConcurrentQueue::Block::template set_many_empty(blockStartIndex, static_cast(endIndex - blockStartIndex))) { + { +#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + // Note that the set_many_empty above did a release, meaning that anybody who acquires the block + // we're about to free can use it safely since our writes (and reads!) will have happened-before then. + entry->value.store(nullptr, std::memory_order_relaxed); + } + this->parent->add_block_to_free_list(block); // releases the above store + } + indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1); + } while (index != firstIndex + actualCount); + + return actualCount; + } + else { + this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release); + } + } + + return 0; + } + + private: + // The block size must be > 1, so any number with the low bit set is an invalid block base index + static const index_t INVALID_BLOCK_BASE = 1; + + struct BlockIndexEntry + { + std::atomic key; + std::atomic value; + }; + + struct BlockIndexHeader + { + size_t capacity; + std::atomic tail; + BlockIndexEntry* entries; + BlockIndexEntry** index; + BlockIndexHeader* prev; + }; + + template + inline bool insert_block_index_entry(BlockIndexEntry*& idxEntry, index_t blockStartIndex) + { + auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); // We're the only writer thread, relaxed is OK + if (localBlockIndex == nullptr) { + return false; // this can happen if new_block_index failed in the constructor + } + auto newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1); + idxEntry = localBlockIndex->index[newTail]; + if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE || + idxEntry->value.load(std::memory_order_relaxed) == nullptr) { + + idxEntry->key.store(blockStartIndex, std::memory_order_relaxed); + localBlockIndex->tail.store(newTail, std::memory_order_release); + return true; + } + + // No room in the old block index, try to allocate another one! + if (allocMode == CannotAlloc || !new_block_index()) { + return false; + } + localBlockIndex = blockIndex.load(std::memory_order_relaxed); + newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1); + idxEntry = localBlockIndex->index[newTail]; + assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE); + idxEntry->key.store(blockStartIndex, std::memory_order_relaxed); + localBlockIndex->tail.store(newTail, std::memory_order_release); + return true; + } + + inline void rewind_block_index_tail() + { + auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); + localBlockIndex->tail.store((localBlockIndex->tail.load(std::memory_order_relaxed) - 1) & (localBlockIndex->capacity - 1), std::memory_order_relaxed); + } + + inline BlockIndexEntry* get_block_index_entry_for_index(index_t index) const + { + BlockIndexHeader* localBlockIndex; + auto idx = get_block_index_index_for_index(index, localBlockIndex); + return localBlockIndex->index[idx]; + } + + inline size_t get_block_index_index_for_index(index_t index, BlockIndexHeader*& localBlockIndex) const + { +#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + index &= ~static_cast(BLOCK_SIZE - 1); + localBlockIndex = blockIndex.load(std::memory_order_acquire); + auto tail = localBlockIndex->tail.load(std::memory_order_acquire); + auto tailBase = localBlockIndex->index[tail]->key.load(std::memory_order_relaxed); + assert(tailBase != INVALID_BLOCK_BASE); + // Note: Must use division instead of shift because the index may wrap around, causing a negative + // offset, whose negativity we want to preserve + auto offset = static_cast(static_cast::type>(index - tailBase) / BLOCK_SIZE); + size_t idx = (tail + offset) & (localBlockIndex->capacity - 1); + assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == index && localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != nullptr); + return idx; + } + + bool new_block_index() + { + auto prev = blockIndex.load(std::memory_order_relaxed); + size_t prevCapacity = prev == nullptr ? 0 : prev->capacity; + auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity; + auto raw = static_cast((Traits::malloc)( + sizeof(BlockIndexHeader) + + std::alignment_of::value - 1 + sizeof(BlockIndexEntry) * entryCount + + std::alignment_of::value - 1 + sizeof(BlockIndexEntry*) * nextBlockIndexCapacity)); + if (raw == nullptr) { + return false; + } + + auto header = new (raw) BlockIndexHeader; + auto entries = reinterpret_cast(details::align_for(raw + sizeof(BlockIndexHeader))); + auto index = reinterpret_cast(details::align_for(reinterpret_cast(entries) + sizeof(BlockIndexEntry) * entryCount)); + if (prev != nullptr) { + auto prevTail = prev->tail.load(std::memory_order_relaxed); + auto prevPos = prevTail; + size_t i = 0; + do { + prevPos = (prevPos + 1) & (prev->capacity - 1); + index[i++] = prev->index[prevPos]; + } while (prevPos != prevTail); + assert(i == prevCapacity); + } + for (size_t i = 0; i != entryCount; ++i) { + new (entries + i) BlockIndexEntry; + entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed); + index[prevCapacity + i] = entries + i; + } + header->prev = prev; + header->entries = entries; + header->index = index; + header->capacity = nextBlockIndexCapacity; + header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), std::memory_order_relaxed); + + blockIndex.store(header, std::memory_order_release); + + nextBlockIndexCapacity <<= 1; + + return true; + } + + private: + size_t nextBlockIndexCapacity; + std::atomic blockIndex; + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + public: + details::ThreadExitListener threadExitListener; + private: +#endif + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + public: + ImplicitProducer* nextImplicitProducer; + private: +#endif + +#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + mutable debug::DebugMutex mutex; +#endif +#if MCDBGQ_TRACKMEM + friend struct MemStats; +#endif + }; + + + ////////////////////////////////// + // Block pool manipulation + ////////////////////////////////// + + void populate_initial_block_list(size_t blockCount) + { + initialBlockPoolSize = blockCount; + if (initialBlockPoolSize == 0) { + initialBlockPool = nullptr; + return; + } + + initialBlockPool = create_array(blockCount); + if (initialBlockPool == nullptr) { + initialBlockPoolSize = 0; + } + for (size_t i = 0; i < initialBlockPoolSize; ++i) { + initialBlockPool[i].dynamicallyAllocated = false; + } + } + + inline Block* try_get_block_from_initial_pool() + { + if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= initialBlockPoolSize) { + return nullptr; + } + + auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed); + + return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr; + } + + inline void add_block_to_free_list(Block* block) + { +#if MCDBGQ_TRACKMEM + block->owner = nullptr; +#endif + freeList.add(block); + } + + inline void add_blocks_to_free_list(Block* block) + { + while (block != nullptr) { + auto next = block->next; + add_block_to_free_list(block); + block = next; + } + } + + inline Block* try_get_block_from_free_list() + { + return freeList.try_get(); + } + + // Gets a free block from one of the memory pools, or allocates a new one (if applicable) + template + Block* requisition_block() + { + auto block = try_get_block_from_initial_pool(); + if (block != nullptr) { + return block; + } + + block = try_get_block_from_free_list(); + if (block != nullptr) { + return block; + } + + if (canAlloc == CanAlloc) { + return create(); + } + + return nullptr; + } + + +#if MCDBGQ_TRACKMEM + public: + struct MemStats { + size_t allocatedBlocks; + size_t usedBlocks; + size_t freeBlocks; + size_t ownedBlocksExplicit; + size_t ownedBlocksImplicit; + size_t implicitProducers; + size_t explicitProducers; + size_t elementsEnqueued; + size_t blockClassBytes; + size_t queueClassBytes; + size_t implicitBlockIndexBytes; + size_t explicitBlockIndexBytes; + + friend class ConcurrentQueue; + + private: + static MemStats getFor(ConcurrentQueue* q) + { + MemStats stats = { 0 }; + + stats.elementsEnqueued = q->size_approx(); + + auto block = q->freeList.head_unsafe(); + while (block != nullptr) { + ++stats.allocatedBlocks; + ++stats.freeBlocks; + block = block->freeListNext.load(std::memory_order_relaxed); + } + + for (auto ptr = q->producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + bool implicit = dynamic_cast(ptr) != nullptr; + stats.implicitProducers += implicit ? 1 : 0; + stats.explicitProducers += implicit ? 0 : 1; + + if (implicit) { + auto prod = static_cast(ptr); + stats.queueClassBytes += sizeof(ImplicitProducer); + auto head = prod->headIndex.load(std::memory_order_relaxed); + auto tail = prod->tailIndex.load(std::memory_order_relaxed); + auto hash = prod->blockIndex.load(std::memory_order_relaxed); + if (hash != nullptr) { + for (size_t i = 0; i != hash->capacity; ++i) { + if (hash->index[i]->key.load(std::memory_order_relaxed) != ImplicitProducer::INVALID_BLOCK_BASE && hash->index[i]->value.load(std::memory_order_relaxed) != nullptr) { + ++stats.allocatedBlocks; + ++stats.ownedBlocksImplicit; + } + } + stats.implicitBlockIndexBytes += hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry); + for (; hash != nullptr; hash = hash->prev) { + stats.implicitBlockIndexBytes += sizeof(typename ImplicitProducer::BlockIndexHeader) + hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry*); + } + } + for (; details::circular_less_than(head, tail); head += BLOCK_SIZE) { + //auto block = prod->get_block_index_entry_for_index(head); + ++stats.usedBlocks; + } + } + else { + auto prod = static_cast(ptr); + stats.queueClassBytes += sizeof(ExplicitProducer); + auto tailBlock = prod->tailBlock; + bool wasNonEmpty = false; + if (tailBlock != nullptr) { + auto block = tailBlock; + do { + ++stats.allocatedBlocks; + if (!block->ConcurrentQueue::Block::template is_empty() || wasNonEmpty) { + ++stats.usedBlocks; + wasNonEmpty = wasNonEmpty || block != tailBlock; + } + ++stats.ownedBlocksExplicit; + block = block->next; + } while (block != tailBlock); + } + auto index = prod->blockIndex.load(std::memory_order_relaxed); + while (index != nullptr) { + stats.explicitBlockIndexBytes += sizeof(typename ExplicitProducer::BlockIndexHeader) + index->size * sizeof(typename ExplicitProducer::BlockIndexEntry); + index = static_cast(index->prev); + } + } + } + + auto freeOnInitialPool = q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= q->initialBlockPoolSize ? 0 : q->initialBlockPoolSize - q->initialBlockPoolIndex.load(std::memory_order_relaxed); + stats.allocatedBlocks += freeOnInitialPool; + stats.freeBlocks += freeOnInitialPool; + + stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks; + stats.queueClassBytes += sizeof(ConcurrentQueue); + + return stats; + } + }; + + // For debugging only. Not thread-safe. + MemStats getMemStats() + { + return MemStats::getFor(this); + } + private: + friend struct MemStats; +#endif + + + ////////////////////////////////// + // Producer list manipulation + ////////////////////////////////// + + ProducerBase* recycle_or_create_producer(bool isExplicit) + { + bool recycled; + return recycle_or_create_producer(isExplicit, recycled); + } + + ProducerBase* recycle_or_create_producer(bool isExplicit, bool& recycled) + { +#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + debug::DebugLock lock(implicitProdMutex); +#endif + // Try to re-use one first + for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + if (ptr->inactive.load(std::memory_order_relaxed) && ptr->isExplicit == isExplicit) { + bool expected = true; + if (ptr->inactive.compare_exchange_strong(expected, /* desired */ false, std::memory_order_acquire, std::memory_order_relaxed)) { + // We caught one! It's been marked as activated, the caller can have it + recycled = true; + return ptr; + } + } + } + + recycled = false; + return add_producer(isExplicit ? static_cast(create(this)) : create(this)); + } + + ProducerBase* add_producer(ProducerBase* producer) + { + // Handle failed memory allocation + if (producer == nullptr) { + return nullptr; + } + + producerCount.fetch_add(1, std::memory_order_relaxed); + + // Add it to the lock-free list + auto prevTail = producerListTail.load(std::memory_order_relaxed); + do { + producer->next = prevTail; + } while (!producerListTail.compare_exchange_weak(prevTail, producer, std::memory_order_release, std::memory_order_relaxed)); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + if (producer->isExplicit) { + auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed); + do { + static_cast(producer)->nextExplicitProducer = prevTailExplicit; + } while (!explicitProducers.compare_exchange_weak(prevTailExplicit, static_cast(producer), std::memory_order_release, std::memory_order_relaxed)); + } + else { + auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed); + do { + static_cast(producer)->nextImplicitProducer = prevTailImplicit; + } while (!implicitProducers.compare_exchange_weak(prevTailImplicit, static_cast(producer), std::memory_order_release, std::memory_order_relaxed)); + } +#endif + + return producer; + } + + void reown_producers() + { + // After another instance is moved-into/swapped-with this one, all the + // producers we stole still think their parents are the other queue. + // So fix them up! + for (auto ptr = producerListTail.load(std::memory_order_relaxed); ptr != nullptr; ptr = ptr->next_prod()) { + ptr->parent = this; + } + } + + + ////////////////////////////////// + // Implicit producer hash + ////////////////////////////////// + + struct ImplicitProducerKVP + { + std::atomic key; + ImplicitProducer* value; // No need for atomicity since it's only read by the thread that sets it in the first place + + ImplicitProducerKVP() : value(nullptr) { } + + ImplicitProducerKVP(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT + { + key.store(other.key.load(std::memory_order_relaxed), std::memory_order_relaxed); + value = other.value; + } + + inline ImplicitProducerKVP& operator=(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT + { + swap(other); + return *this; + } + + inline void swap(ImplicitProducerKVP& other) MOODYCAMEL_NOEXCEPT + { + if (this != &other) { + details::swap_relaxed(key, other.key); + std::swap(value, other.value); + } + } + }; + + template + friend void moodycamel::swap(typename ConcurrentQueue::ImplicitProducerKVP&, typename ConcurrentQueue::ImplicitProducerKVP&) MOODYCAMEL_NOEXCEPT; + + struct ImplicitProducerHash + { + size_t capacity; + ImplicitProducerKVP* entries; + ImplicitProducerHash* prev; + }; + + inline void populate_initial_implicit_producer_hash() + { + if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return; + + implicitProducerHashCount.store(0, std::memory_order_relaxed); + auto hash = &initialImplicitProducerHash; + hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; + hash->entries = &initialImplicitProducerHashEntries[0]; + for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) { + initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed); + } + hash->prev = nullptr; + implicitProducerHash.store(hash, std::memory_order_relaxed); + } + + void swap_implicit_producer_hashes(ConcurrentQueue& other) + { + if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return; + + // Swap (assumes our implicit producer hash is initialized) + initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries); + initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0]; + other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0]; + + details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount); + + details::swap_relaxed(implicitProducerHash, other.implicitProducerHash); + if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) { + implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed); + } + else { + ImplicitProducerHash* hash; + for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) { + continue; + } + hash->prev = &initialImplicitProducerHash; + } + if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) { + other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed); + } + else { + ImplicitProducerHash* hash; + for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; hash = hash->prev) { + continue; + } + hash->prev = &other.initialImplicitProducerHash; + } + } + + // Only fails (returns nullptr) if memory allocation fails + ImplicitProducer* get_or_add_implicit_producer() + { + // Note that since the data is essentially thread-local (key is thread ID), + // there's a reduced need for fences (memory ordering is already consistent + // for any individual thread), except for the current table itself. + + // Start by looking for the thread ID in the current and all previous hash tables. + // If it's not found, it must not be in there yet, since this same thread would + // have added it previously to one of the tables that we traversed. + + // Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table + +#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + debug::DebugLock lock(implicitProdMutex); +#endif + + auto id = details::thread_id(); + auto hashedId = details::hash_thread_id(id); + + auto mainHash = implicitProducerHash.load(std::memory_order_acquire); + for (auto hash = mainHash; hash != nullptr; hash = hash->prev) { + // Look for the id in this hash + auto index = hashedId; + while (true) { // Not an infinite loop because at least one slot is free in the hash table + index &= hash->capacity - 1; + + auto probedKey = hash->entries[index].key.load(std::memory_order_relaxed); + if (probedKey == id) { + // Found it! If we had to search several hashes deep, though, we should lazily add it + // to the current main hash table to avoid the extended search next time. + // Note there's guaranteed to be room in the current hash table since every subsequent + // table implicitly reserves space for all previous tables (there's only one + // implicitProducerHashCount). + auto value = hash->entries[index].value; + if (hash != mainHash) { + index = hashedId; + while (true) { + index &= mainHash->capacity - 1; + probedKey = mainHash->entries[index].key.load(std::memory_order_relaxed); + auto empty = details::invalid_thread_id; +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + auto reusable = details::invalid_thread_id2; + if ((probedKey == empty && mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed, std::memory_order_relaxed)) || + (probedKey == reusable && mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_acquire, std::memory_order_acquire))) { +#else + if ((probedKey == empty && mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed, std::memory_order_relaxed))) { +#endif + mainHash->entries[index].value = value; + break; + } + ++index; + } + } + + return value; + } + if (probedKey == details::invalid_thread_id) { + break; // Not in this hash table + } + ++index; + } + } + + // Insert! + auto newCount = 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed); + while (true) { + if (newCount >= (mainHash->capacity >> 1) && !implicitProducerHashResizeInProgress.test_and_set(std::memory_order_acquire)) { + // We've acquired the resize lock, try to allocate a bigger hash table. + // Note the acquire fence synchronizes with the release fence at the end of this block, and hence when + // we reload implicitProducerHash it must be the most recent version (it only gets changed within this + // locked block). + mainHash = implicitProducerHash.load(std::memory_order_acquire); + if (newCount >= (mainHash->capacity >> 1)) { + auto newCapacity = mainHash->capacity << 1; + while (newCount >= (newCapacity >> 1)) { + newCapacity <<= 1; + } + auto raw = static_cast((Traits::malloc)(sizeof(ImplicitProducerHash) + std::alignment_of::value - 1 + sizeof(ImplicitProducerKVP) * newCapacity)); + if (raw == nullptr) { + // Allocation failed + implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); + implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); + return nullptr; + } + + auto newHash = new (raw) ImplicitProducerHash; + newHash->capacity = newCapacity; + newHash->entries = reinterpret_cast(details::align_for(raw + sizeof(ImplicitProducerHash))); + for (size_t i = 0; i != newCapacity; ++i) { + new (newHash->entries + i) ImplicitProducerKVP; + newHash->entries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed); + } + newHash->prev = mainHash; + implicitProducerHash.store(newHash, std::memory_order_release); + implicitProducerHashResizeInProgress.clear(std::memory_order_release); + mainHash = newHash; + } + else { + implicitProducerHashResizeInProgress.clear(std::memory_order_release); + } + } + + // If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table + // to finish being allocated by another thread (and if we just finished allocating above, the condition will + // always be true) + if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) { + bool recycled; + auto producer = static_cast(recycle_or_create_producer(false, recycled)); + if (producer == nullptr) { + implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); + return nullptr; + } + if (recycled) { + implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); + } + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + producer->threadExitListener.callback = &ConcurrentQueue::implicit_producer_thread_exited_callback; + producer->threadExitListener.userData = producer; + details::ThreadExitNotifier::subscribe(&producer->threadExitListener); +#endif + + auto index = hashedId; + while (true) { + index &= mainHash->capacity - 1; + auto probedKey = mainHash->entries[index].key.load(std::memory_order_relaxed); + + auto empty = details::invalid_thread_id; +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + auto reusable = details::invalid_thread_id2; + if ((probedKey == empty && mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed, std::memory_order_relaxed)) || + (probedKey == reusable && mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_acquire, std::memory_order_acquire))) { +#else + if ((probedKey == empty && mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed, std::memory_order_relaxed))) { +#endif + mainHash->entries[index].value = producer; + break; + } + ++index; + } + return producer; + } + + // Hmm, the old hash is quite full and somebody else is busy allocating a new one. + // We need to wait for the allocating thread to finish (if it succeeds, we add, if not, + // we try to allocate ourselves). + mainHash = implicitProducerHash.load(std::memory_order_acquire); + } + } + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + void implicit_producer_thread_exited(ImplicitProducer* producer) + { + // Remove from thread exit listeners + details::ThreadExitNotifier::unsubscribe(&producer->threadExitListener); + + // Remove from hash +#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + debug::DebugLock lock(implicitProdMutex); +#endif + auto hash = implicitProducerHash.load(std::memory_order_acquire); + assert(hash != nullptr); // The thread exit listener is only registered if we were added to a hash in the first place + auto id = details::thread_id(); + auto hashedId = details::hash_thread_id(id); + details::thread_id_t probedKey; + + // We need to traverse all the hashes just in case other threads aren't on the current one yet and are + // trying to add an entry thinking there's a free slot (because they reused a producer) + for (; hash != nullptr; hash = hash->prev) { + auto index = hashedId; + do { + index &= hash->capacity - 1; + probedKey = hash->entries[index].key.load(std::memory_order_relaxed); + if (probedKey == id) { + hash->entries[index].key.store(details::invalid_thread_id2, std::memory_order_release); + break; + } + ++index; + } while (probedKey != details::invalid_thread_id); // Can happen if the hash has changed but we weren't put back in it yet, or if we weren't added to this hash in the first place + } + + // Mark the queue as being recyclable + producer->inactive.store(true, std::memory_order_release); + } + + static void implicit_producer_thread_exited_callback(void* userData) + { + auto producer = static_cast(userData); + auto queue = producer->parent; + queue->implicit_producer_thread_exited(producer); + } +#endif + + ////////////////////////////////// + // Utility functions + ////////////////////////////////// + + template + static inline U* create_array(size_t count) + { + assert(count > 0); + auto p = static_cast((Traits::malloc)(sizeof(U) * count)); + if (p == nullptr) { + return nullptr; + } + + for (size_t i = 0; i != count; ++i) { + new (p + i) U(); + } + return p; + } + + template + static inline void destroy_array(U* p, size_t count) + { + if (p != nullptr) { + assert(count > 0); + for (size_t i = count; i != 0; ) { + (p + --i)->~U(); + } + (Traits::free)(p); + } + } + + template + static inline U* create() + { + auto p = (Traits::malloc)(sizeof(U)); + return p != nullptr ? new (p) U : nullptr; + } + + template + static inline U* create(A1&& a1) + { + auto p = (Traits::malloc)(sizeof(U)); + return p != nullptr ? new (p) U(std::forward(a1)) : nullptr; + } + + template + static inline void destroy(U* p) + { + if (p != nullptr) { + p->~U(); + } + (Traits::free)(p); + } + +private: + std::atomic producerListTail; + std::atomic producerCount; + + std::atomic initialBlockPoolIndex; + Block* initialBlockPool; + size_t initialBlockPoolSize; + +#if !MCDBGQ_USEDEBUGFREELIST + FreeList freeList; +#else + debug::DebugFreeList freeList; +#endif + + std::atomic implicitProducerHash; + std::atomic implicitProducerHashCount; // Number of slots logically used + ImplicitProducerHash initialImplicitProducerHash; + std::array initialImplicitProducerHashEntries; + std::atomic_flag implicitProducerHashResizeInProgress; + + std::atomic nextExplicitConsumerId; + std::atomic globalExplicitConsumerOffset; + +#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + debug::DebugMutex implicitProdMutex; +#endif + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + std::atomic explicitProducers; + std::atomic implicitProducers; +#endif +}; + + +template +ProducerToken::ProducerToken(ConcurrentQueue& queue) + : producer(queue.recycle_or_create_producer(true)) +{ + if (producer != nullptr) { + producer->token = this; + } +} + +template +ProducerToken::ProducerToken(BlockingConcurrentQueue& queue) + : producer(reinterpret_cast*>(&queue)->recycle_or_create_producer(true)) +{ + if (producer != nullptr) { + producer->token = this; + } +} + +template +ConsumerToken::ConsumerToken(ConcurrentQueue& queue) + : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr) +{ + initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release); + lastKnownGlobalOffset = -1; +} + +template +ConsumerToken::ConsumerToken(BlockingConcurrentQueue& queue) + : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr) +{ + initialOffset = reinterpret_cast*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release); + lastKnownGlobalOffset = -1; +} + +template +inline void swap(ConcurrentQueue& a, ConcurrentQueue& b) MOODYCAMEL_NOEXCEPT +{ + a.swap(b); +} + +inline void swap(ProducerToken& a, ProducerToken& b) MOODYCAMEL_NOEXCEPT +{ + a.swap(b); +} + +inline void swap(ConsumerToken& a, ConsumerToken& b) MOODYCAMEL_NOEXCEPT +{ + a.swap(b); +} + +template +inline void swap(typename ConcurrentQueue::ImplicitProducerKVP& a, typename ConcurrentQueue::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT +{ + a.swap(b); +} + +} + +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif diff --git a/c_src/enlfq/enlfq.cc b/c_src/enlfq/enlfq.cc new file mode 100644 index 0000000..4ccc7a9 --- /dev/null +++ b/c_src/enlfq/enlfq.cc @@ -0,0 +1,84 @@ +#include "enlfq.h" +#include "enlfq_nif.h" + +#include "nif_utils.h" + +#include "concurrentqueue.h" + + +struct q_item { + ErlNifEnv *env; + ERL_NIF_TERM term; +}; + +struct squeue { + moodycamel::ConcurrentQueue *queue; +}; + + +void nif_enlfq_free(ErlNifEnv *, void *obj) { + squeue *inst = static_cast(obj); + + if (inst != nullptr) { + q_item item; + while (inst->queue->try_dequeue(item)) { + enif_free_env(item.env); + } + delete inst->queue; + } +} + +ERL_NIF_TERM nif_enlfq_new(ErlNifEnv *env, int, const ERL_NIF_TERM *) { + shared_data *data = static_cast(enif_priv_data(env)); + + + squeue *qinst = static_cast(enif_alloc_resource(data->resQueueInstance, sizeof(squeue))); + qinst->queue = new moodycamel::ConcurrentQueue; + + if (qinst == NULL) + return make_error(env, "enif_alloc_resource failed"); + + ERL_NIF_TERM term = enif_make_resource(env, qinst); + enif_release_resource(qinst); + return enif_make_tuple2(env, ATOMS.atomOk, term); +} + +ERL_NIF_TERM nif_enlfq_push(ErlNifEnv *env, int, const ERL_NIF_TERM argv[]) { + shared_data *data = static_cast(enif_priv_data(env)); + + squeue *inst; + + if (!enif_get_resource(env, argv[0], data->resQueueInstance, (void **) &inst)) { + return enif_make_badarg(env); + } + + q_item item; + + item.env = enif_alloc_env(); + item.term = enif_make_copy(item.env, argv[1]); + + inst->queue->enqueue(item); + + return ATOMS.atomTrue; +} + +ERL_NIF_TERM nif_enlfq_pop(ErlNifEnv *env, int, const ERL_NIF_TERM argv[]) { + shared_data *data = static_cast(enif_priv_data(env)); + squeue *inst = NULL; + + if (!enif_get_resource(env, argv[0], data->resQueueInstance, (void **) &inst)) { + return enif_make_badarg(env); + } + + ERL_NIF_TERM term; + q_item item; + + if (inst->queue->try_dequeue(item)) { + term = enif_make_copy(env, item.term); + enif_free_env(item.env); + return enif_make_tuple2(env, ATOMS.atomOk, term); + } else { + return ATOMS.atomEmpty; + } + +} diff --git a/c_src/enlfq/enlfq.h b/c_src/enlfq/enlfq.h new file mode 100644 index 0000000..08b2ca4 --- /dev/null +++ b/c_src/enlfq/enlfq.h @@ -0,0 +1,10 @@ +#pragma once + +#include "erl_nif.h" + +extern "C" { +void nif_enlfq_free(ErlNifEnv *env, void *obj); +ERL_NIF_TERM nif_enlfq_new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +ERL_NIF_TERM nif_enlfq_push(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +ERL_NIF_TERM nif_enlfq_pop(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +} \ No newline at end of file diff --git a/c_src/enlfq/enlfq_nif.cc b/c_src/enlfq/enlfq_nif.cc new file mode 100644 index 0000000..cf5c4f2 --- /dev/null +++ b/c_src/enlfq/enlfq_nif.cc @@ -0,0 +1,57 @@ +#include +#include "enlfq_nif.h" +#include "enlfq.h" +#include "nif_utils.h" + +const char kAtomOk[] = "ok"; +const char kAtomError[] = "error"; +const char kAtomTrue[] = "true"; +//const char kAtomFalse[] = "false"; +//const char kAtomUndefined[] = "undefined"; +const char kAtomEmpty[] = "empty"; + +atoms ATOMS; + +void open_resources(ErlNifEnv *env, shared_data *data) { + ErlNifResourceFlags flags = static_cast(ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER); + data->resQueueInstance = enif_open_resource_type(env, NULL, "enlfq_instance", nif_enlfq_free, flags, NULL); +} + +int on_nif_load(ErlNifEnv *env, void **priv_data, ERL_NIF_TERM) { + + ATOMS.atomOk = make_atom(env, kAtomOk); + ATOMS.atomError = make_atom(env, kAtomError); + ATOMS.atomTrue = make_atom(env, kAtomTrue); +// ATOMS.atomFalse = make_atom(env, kAtomFalse); +// ATOMS.atomUndefined = make_atom(env, kAtomUndefined); + ATOMS.atomEmpty = make_atom(env, kAtomEmpty); + + shared_data *data = static_cast(enif_alloc(sizeof(shared_data))); + open_resources(env, data); + + *priv_data = data; + return 0; +} + +void on_nif_unload(ErlNifEnv *, void *priv_data) { + shared_data *data = static_cast(priv_data); + enif_free(data); +} + +int on_nif_upgrade(ErlNifEnv *env, void **priv, void **, ERL_NIF_TERM) { + shared_data *data = static_cast(enif_alloc(sizeof(shared_data))); + open_resources(env, data); + + *priv = data; + return 0; +} + +static ErlNifFunc nif_funcs[] = + { + {"new", 0, nif_enlfq_new}, + {"push", 2, nif_enlfq_push}, + {"pop", 1, nif_enlfq_pop} + }; + +ERL_NIF_INIT(enlfq, nif_funcs, on_nif_load, NULL, on_nif_upgrade, on_nif_unload) + diff --git a/c_src/enlfq/enlfq_nif.h b/c_src/enlfq/enlfq_nif.h new file mode 100644 index 0000000..88f7da5 --- /dev/null +++ b/c_src/enlfq/enlfq_nif.h @@ -0,0 +1,19 @@ +#pragma once +#include "erl_nif.h" + +struct atoms +{ + ERL_NIF_TERM atomOk; + ERL_NIF_TERM atomError; + ERL_NIF_TERM atomTrue; +// ERL_NIF_TERM atomFalse; +// ERL_NIF_TERM atomUndefined; + ERL_NIF_TERM atomEmpty; +}; + +struct shared_data +{ + ErlNifResourceType* resQueueInstance; +}; + +extern atoms ATOMS; diff --git a/c_src/enlfq/nif_utils.cc b/c_src/enlfq/nif_utils.cc new file mode 100644 index 0000000..a32e17d --- /dev/null +++ b/c_src/enlfq/nif_utils.cc @@ -0,0 +1,27 @@ +#include "nif_utils.h" +#include "enlfq_nif.h" + +#include + +ERL_NIF_TERM make_atom(ErlNifEnv* env, const char* name) +{ + ERL_NIF_TERM ret; + + if(enif_make_existing_atom(env, name, &ret, ERL_NIF_LATIN1)) + return ret; + + return enif_make_atom(env, name); +} + +ERL_NIF_TERM make_binary(ErlNifEnv* env, const char* buff, size_t length) +{ + ERL_NIF_TERM term; + unsigned char *destination_buffer = enif_make_new_binary(env, length, &term); + memcpy(destination_buffer, buff, length); + return term; +} + +ERL_NIF_TERM make_error(ErlNifEnv* env, const char* error) +{ + return enif_make_tuple2(env, ATOMS.atomError, make_binary(env, error, strlen(error))); +} diff --git a/c_src/enlfq/nif_utils.h b/c_src/enlfq/nif_utils.h new file mode 100644 index 0000000..3b0a929 --- /dev/null +++ b/c_src/enlfq/nif_utils.h @@ -0,0 +1,6 @@ +#pragma once +#include "erl_nif.h" + +ERL_NIF_TERM make_atom(ErlNifEnv* env, const char* name); +ERL_NIF_TERM make_error(ErlNifEnv* env, const char* error); +ERL_NIF_TERM make_binary(ErlNifEnv* env, const char* buff, size_t length); \ No newline at end of file diff --git a/c_src/enlfq/rebar.config b/c_src/enlfq/rebar.config new file mode 100644 index 0000000..da73819 --- /dev/null +++ b/c_src/enlfq/rebar.config @@ -0,0 +1,7 @@ +{port_specs, [ + {"../../priv/enlfq.so", ["*.cc"]} +]}. + + + + diff --git a/c_src/etsq/etsq.cpp b/c_src/etsq/etsq.cpp new file mode 100644 index 0000000..15e3c67 --- /dev/null +++ b/c_src/etsq/etsq.cpp @@ -0,0 +1,172 @@ +#include "etsq.h" + +ErlNifRWLock *qinfo_map_rwlock; +QInfoMap qinfo_map; + +// Function finds the queue from map and returns QueueInfo +// Not thread safe. +QueueInfo* get_q_info(char* name) +{ + //std::cout<<"Info: "<< name<second; + } + return NULL; +} + +void new_q(char* name) +{ + //std::cout<<"Create: " << name<pmutex); + pqueue_info->queue.push(erl_term); + return true; + } + return false; +} + +// Returns new ErlTerm. Caller should delete it +ErlTerm* pop(char* name, bool read_only) +{ + QueueInfo *pqueue_info = NULL; + ReadLock read_lock(qinfo_map_rwlock); + if (NULL != (pqueue_info = get_q_info(name))) + { + Mutex mutex(pqueue_info->pmutex); + if (!pqueue_info->queue.empty()) + { + ErlTerm *erl_term = pqueue_info->queue.front(); + if(read_only) + { + return new ErlTerm(erl_term); + } + pqueue_info->queue.pop(); + return erl_term; + } + return new ErlTerm("empty"); + } + return NULL; +} + +static ERL_NIF_TERM new_queue(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + int size = 100; + char *name = new char(size); + enif_get_atom(env, argv[0], name, size, ERL_NIF_LATIN1); + { + QueueInfo *pqueue_info = NULL; + ReadLock read_lock(qinfo_map_rwlock); + if (NULL != (pqueue_info = get_q_info(name))) + { + return enif_make_error(env, "already_exists"); + } + } + new_q(name); + return enif_make_atom(env, "ok"); +} + +static ERL_NIF_TERM info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + int size = 100; + char name[100]; + enif_get_atom(env, argv[0], name, size, ERL_NIF_LATIN1); + int queue_size = 0; + { + QueueInfo *pqueue_info = NULL; + ReadLock read_lock(qinfo_map_rwlock); + if (NULL == (pqueue_info = get_q_info(name))) + return enif_make_badarg(env); + queue_size = pqueue_info->queue.size(); + } + return enif_make_list2(env, + enif_make_tuple2(env, enif_make_atom(env, "name"), enif_make_atom(env, name)), + enif_make_tuple2(env, enif_make_atom(env, "size"), enif_make_int(env, queue_size))); +} + +static ERL_NIF_TERM push_back(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + int size = 100; + char name[100]; + enif_get_atom(env, argv[0], name, size, ERL_NIF_LATIN1); + ErlTerm *erl_term = new ErlTerm(argv[1]); + if (push(name, erl_term)) + return enif_make_atom(env, "ok"); + delete erl_term; + return enif_make_badarg(env); +} + +static ERL_NIF_TERM pop_front(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + int size = 100; + char name[100]; + enif_get_atom(env, argv[0], name, size, ERL_NIF_LATIN1); + ErlTerm *erl_term = NULL; + if (NULL == (erl_term = pop(name, false))) + return enif_make_badarg(env); + ERL_NIF_TERM return_term = enif_make_copy(env, erl_term->term); + delete erl_term; + return return_term; +} + +static ERL_NIF_TERM get_front(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + int size = 100; + char name[100]; + enif_get_atom(env, argv[0], name, size, ERL_NIF_LATIN1); + ErlTerm *erl_term = NULL; + if (NULL == (erl_term = pop(name, true))) + return enif_make_badarg(env); + ERL_NIF_TERM return_term = enif_make_copy(env, erl_term->term); + delete erl_term; + return return_term; +} + +static int is_ok_load_info(ErlNifEnv* env, ERL_NIF_TERM load_info) +{ + int i; + return enif_get_int(env, load_info, &i) && i == 1; +} + +static int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info) +{ + if (!is_ok_load_info(env, load_info)) + return -1; + qinfo_map_rwlock = enif_rwlock_create((char*)"qinfo"); + return 0; +} + +static int upgrade(ErlNifEnv* env, void** priv_data, void** old_priv_data, ERL_NIF_TERM load_info) +{ + if (!is_ok_load_info(env, load_info)) + return -1; + return 0; +} + +static void unload(ErlNifEnv* env, void* priv_data) +{ + enif_rwlock_destroy(qinfo_map_rwlock); +} + +static ErlNifFunc nif_funcs[] = { + {"new", 1, new_queue}, + {"info", 1, info}, + {"push_back", 2, push_back}, + {"pop_front", 1, pop_front}, + {"get_front", 1, get_front} +}; + +ERL_NIF_INIT(etsq, nif_funcs, load, NULL, upgrade, unload) diff --git a/c_src/etsq/etsq.h b/c_src/etsq/etsq.h new file mode 100644 index 0000000..ef0d346 --- /dev/null +++ b/c_src/etsq/etsq.h @@ -0,0 +1,130 @@ +/* + * etsq.h + * + * Created on: Mar 21, 2016 + * Author: Vinod + */ + +#ifndef ETSQ_H_ +#define ETSQ_H_ + +#include // std::cin, std::cout +#include // std::map +#include // std::queue +#include +#include "erl_nif.h" + +#define enif_make_error(env, error) enif_make_tuple2(env, \ + enif_make_atom(env, "error"), enif_make_atom(env, error)) + +struct cmp_str +{ + bool operator()(char *a, char *b) const + { + return strcmp(a, b) < 0; + } +}; + +class ErlTerm +{ +public: + ErlNifEnv *term_env; + ERL_NIF_TERM term; +public: + ErlTerm(ERL_NIF_TERM erl_nif_term) + { + term_env = enif_alloc_env(); + this->term = enif_make_copy(term_env, erl_nif_term); + } + ErlTerm(ErlTerm *erl_term) + { + term_env = enif_alloc_env(); + this->term = enif_make_copy(term_env, erl_term->term); + } + ErlTerm(int value) + { + term_env = enif_alloc_env(); + this->term = enif_make_int(term_env, value); + } + ErlTerm(const char *error) + { + term_env = enif_alloc_env(); + this->term = enif_make_error(term_env, error); + } + ~ErlTerm() + { + enif_free_env(term_env); + term_env = NULL; + } +}; + +typedef std::queue ErlQueue; + +class QueueInfo +{ +public: + ErlNifMutex* pmutex; + ErlQueue queue; +public: + QueueInfo(char* name) + { + pmutex = enif_mutex_create(name); + } + ~QueueInfo() + { + enif_mutex_destroy(pmutex); + } +}; + +typedef std::map QInfoMap; +typedef std::pair QInfoMapPair; + +// Class to handle Read lock +class ReadLock +{ + ErlNifRWLock *pread_lock; +public: + ReadLock(ErlNifRWLock *pread_lock) + { + this->pread_lock = pread_lock; + enif_rwlock_rlock(this->pread_lock); + }; + ~ReadLock() + { + enif_rwlock_runlock(pread_lock); + }; +}; + +// Class to handle Write lock +class WriteLock +{ + ErlNifRWLock *pwrite_lock; +public: + WriteLock(ErlNifRWLock *pwrite_lock) + { + this->pwrite_lock = pwrite_lock; + enif_rwlock_rwlock(this->pwrite_lock); + }; + ~WriteLock() + { + enif_rwlock_rwunlock(pwrite_lock); + }; +}; + +// Class to handle Mutex lock and unlock +class Mutex +{ + ErlNifMutex *pmtx; +public: + Mutex(ErlNifMutex *pmtx) + { + this->pmtx = pmtx; + enif_mutex_lock(this->pmtx); + }; + ~Mutex() + { + enif_mutex_unlock(pmtx); + }; +}; + +#endif /* ETSQ_H_ */ diff --git a/c_src/etsq/rebar.config b/c_src/etsq/rebar.config new file mode 100644 index 0000000..f6093dd --- /dev/null +++ b/c_src/etsq/rebar.config @@ -0,0 +1,7 @@ +{port_specs, [ + {"../../priv/etsq.so", ["*.cpp"]} +]}. + + + + diff --git a/c_src/gb_lru/binary.h b/c_src/gb_lru/binary.h new file mode 100644 index 0000000..dd21ae6 --- /dev/null +++ b/c_src/gb_lru/binary.h @@ -0,0 +1,103 @@ +#include +#include +#include + +class Binary { + public: + unsigned char *bin; + size_t size; + bool allocated; + + Binary() : bin(NULL), size(0), allocated(false) { } + Binary(const char *data) { + bin = (unsigned char *) data; + size = strlen(data); + allocated = false; + } + + Binary(const Binary &b) { + bin = b.bin; + size = b.size; + allocated = false; + } + + ~Binary() { + if (allocated) { + delete bin; + } + } + + operator std::string() { + return (const char *) bin; + } + + friend std::ostream & operator<<(std::ostream & str, Binary const &b) { + return str << b.bin; + } + + bool operator<(const Binary &b) { + if(size < b.size) { + return true; + } else if (size > b.size) { + return false; + } else { + return memcmp(bin,b.bin,size) < 0; + } + } + + bool operator<(Binary &b) { + if(size < b.size) { + return true; + } else if (size > b.size) { + return false; + } else { + return memcmp(bin,b.bin,size) < 0; + } + } + + bool operator>(const Binary &b) { + if(size > b.size) { + return true; + } else if (size < b.size) { + return false; + } else { + return memcmp(bin,b.bin,size) > 0; + } + } + + bool operator== (const Binary &b) { + if (size == b.size ) { + return memcmp(bin,b.bin, std::min(size, b.size)) == 0; + } else { + return false; + } + } + operator std::string() const { + return (const char*) bin; + } + + Binary& set_data(const char *data) { + bin = (unsigned char *) data; + size = strlen(data); + return *this; + } + + void copy(char *inbin, size_t insize) { + bin = (unsigned char *) operator new(insize); + allocated = true; + size = insize; + memcpy(bin, inbin, size); + } +}; + +inline bool operator < (const Binary &a, const Binary &b) { + + if(a.size < b.size) { + return true; + } else if (a.size > b.size) { + return false; + } else { + return memcmp(a.bin,b.bin, std::min(a.size, b.size)) < 0; + } +} + diff --git a/c_src/gb_lru/btree.h b/c_src/gb_lru/btree.h new file mode 100644 index 0000000..5035835 --- /dev/null +++ b/c_src/gb_lru/btree.h @@ -0,0 +1,2394 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// A btree implementation of the STL set and map interfaces. A btree is both +// smaller and faster than STL set/map. The red-black tree implementation of +// STL set/map has an overhead of 3 pointers (left, right and parent) plus the +// node color information for each stored value. So a set consumes 20 +// bytes for each value stored. This btree implementation stores multiple +// values on fixed size nodes (usually 256 bytes) and doesn't store child +// pointers for leaf nodes. The result is that a btree_set may use much +// less memory per stored value. For the random insertion benchmark in +// btree_test.cc, a btree_set with node-size of 256 uses 4.9 bytes per +// stored value. +// +// The packing of multiple values on to each node of a btree has another effect +// besides better space utilization: better cache locality due to fewer cache +// lines being accessed. Better cache locality translates into faster +// operations. +// +// CAVEATS +// +// Insertions and deletions on a btree can cause splitting, merging or +// rebalancing of btree nodes. And even without these operations, insertions +// and deletions on a btree will move values around within a node. In both +// cases, the result is that insertions and deletions can invalidate iterators +// pointing to values other than the one being inserted/deleted. This is +// notably different from STL set/map which takes care to not invalidate +// iterators on insert/erase except, of course, for iterators pointing to the +// value being erased. A partial workaround when erasing is available: +// erase() returns an iterator pointing to the item just after the one that was +// erased (or end() if none exists). See also safe_btree. + +// PERFORMANCE +// +// btree_bench --benchmarks=. 2>&1 | ./benchmarks.awk +// +// Run on pmattis-warp.nyc (4 X 2200 MHz CPUs); 2010/03/04-15:23:06 +// Benchmark STL(ns) B-Tree(ns) @ +// -------------------------------------------------------- +// BM_set_int32_insert 1516 608 +59.89% <256> [40.0, 5.2] +// BM_set_int32_lookup 1160 414 +64.31% <256> [40.0, 5.2] +// BM_set_int32_fulllookup 960 410 +57.29% <256> [40.0, 4.4] +// BM_set_int32_delete 1741 528 +69.67% <256> [40.0, 5.2] +// BM_set_int32_queueaddrem 3078 1046 +66.02% <256> [40.0, 5.5] +// BM_set_int32_mixedaddrem 3600 1384 +61.56% <256> [40.0, 5.3] +// BM_set_int32_fifo 227 113 +50.22% <256> [40.0, 4.4] +// BM_set_int32_fwditer 158 26 +83.54% <256> [40.0, 5.2] +// BM_map_int32_insert 1551 636 +58.99% <256> [48.0, 10.5] +// BM_map_int32_lookup 1200 508 +57.67% <256> [48.0, 10.5] +// BM_map_int32_fulllookup 989 487 +50.76% <256> [48.0, 8.8] +// BM_map_int32_delete 1794 628 +64.99% <256> [48.0, 10.5] +// BM_map_int32_queueaddrem 3189 1266 +60.30% <256> [48.0, 11.6] +// BM_map_int32_mixedaddrem 3822 1623 +57.54% <256> [48.0, 10.9] +// BM_map_int32_fifo 151 134 +11.26% <256> [48.0, 8.8] +// BM_map_int32_fwditer 161 32 +80.12% <256> [48.0, 10.5] +// BM_set_int64_insert 1546 636 +58.86% <256> [40.0, 10.5] +// BM_set_int64_lookup 1200 512 +57.33% <256> [40.0, 10.5] +// BM_set_int64_fulllookup 971 487 +49.85% <256> [40.0, 8.8] +// BM_set_int64_delete 1745 616 +64.70% <256> [40.0, 10.5] +// BM_set_int64_queueaddrem 3163 1195 +62.22% <256> [40.0, 11.6] +// BM_set_int64_mixedaddrem 3760 1564 +58.40% <256> [40.0, 10.9] +// BM_set_int64_fifo 146 103 +29.45% <256> [40.0, 8.8] +// BM_set_int64_fwditer 162 31 +80.86% <256> [40.0, 10.5] +// BM_map_int64_insert 1551 720 +53.58% <256> [48.0, 20.7] +// BM_map_int64_lookup 1214 612 +49.59% <256> [48.0, 20.7] +// BM_map_int64_fulllookup 994 592 +40.44% <256> [48.0, 17.2] +// BM_map_int64_delete 1778 764 +57.03% <256> [48.0, 20.7] +// BM_map_int64_queueaddrem 3189 1547 +51.49% <256> [48.0, 20.9] +// BM_map_int64_mixedaddrem 3779 1887 +50.07% <256> [48.0, 21.6] +// BM_map_int64_fifo 147 145 +1.36% <256> [48.0, 17.2] +// BM_map_int64_fwditer 162 41 +74.69% <256> [48.0, 20.7] +// BM_set_string_insert 1989 1966 +1.16% <256> [64.0, 44.5] +// BM_set_string_lookup 1709 1600 +6.38% <256> [64.0, 44.5] +// BM_set_string_fulllookup 1573 1529 +2.80% <256> [64.0, 35.4] +// BM_set_string_delete 2520 1920 +23.81% <256> [64.0, 44.5] +// BM_set_string_queueaddrem 4706 4309 +8.44% <256> [64.0, 48.3] +// BM_set_string_mixedaddrem 5080 4654 +8.39% <256> [64.0, 46.7] +// BM_set_string_fifo 318 512 -61.01% <256> [64.0, 35.4] +// BM_set_string_fwditer 182 93 +48.90% <256> [64.0, 44.5] +// BM_map_string_insert 2600 2227 +14.35% <256> [72.0, 55.8] +// BM_map_string_lookup 2068 1730 +16.34% <256> [72.0, 55.8] +// BM_map_string_fulllookup 1859 1618 +12.96% <256> [72.0, 44.0] +// BM_map_string_delete 3168 2080 +34.34% <256> [72.0, 55.8] +// BM_map_string_queueaddrem 5840 4701 +19.50% <256> [72.0, 59.4] +// BM_map_string_mixedaddrem 6400 5200 +18.75% <256> [72.0, 57.8] +// BM_map_string_fifo 398 596 -49.75% <256> [72.0, 44.0] +// BM_map_string_fwditer 243 113 +53.50% <256> [72.0, 55.8] + +#ifndef UTIL_BTREE_BTREE_H__ +#define UTIL_BTREE_BTREE_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef NDEBUG +#define NDEBUG 1 +#endif + +namespace btree { + +// Inside a btree method, if we just call swap(), it will choose the +// btree::swap method, which we don't want. And we can't say ::swap +// because then MSVC won't pickup any std::swap() implementations. We +// can't just use std::swap() directly because then we don't get the +// specialization for types outside the std namespace. So the solution +// is to have a special swap helper function whose name doesn't +// collide with other swap functions defined by the btree classes. +template +inline void btree_swap_helper(T &a, T &b) { + using std::swap; + swap(a, b); +} + +// A template helper used to select A or B based on a condition. +template +struct if_{ + typedef A type; +}; + +template +struct if_ { + typedef B type; +}; + +// Types small_ and big_ are promise that sizeof(small_) < sizeof(big_) +typedef char small_; + +struct big_ { + char dummy[2]; +}; + +// A compile-time assertion. +template +struct CompileAssert { +}; + +#define COMPILE_ASSERT(expr, msg) \ + typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : 0] + +// A helper type used to indicate that a key-compare-to functor has been +// provided. A user can specify a key-compare-to functor by doing: +// +// struct MyStringComparer +// : public util::btree::btree_key_compare_to_tag { +// int operator()(const string &a, const string &b) const { +// return a.compare(b); +// } +// }; +// +// Note that the return type is an int and not a bool. There is a +// COMPILE_ASSERT which enforces this return type. +struct btree_key_compare_to_tag { +}; + +// A helper class that indicates if the Compare parameter is derived from +// btree_key_compare_to_tag. +template +struct btree_is_key_compare_to + : public std::is_convertible { +}; + +// A helper class to convert a boolean comparison into a three-way +// "compare-to" comparison that returns a negative value to indicate +// less-than, zero to indicate equality and a positive value to +// indicate greater-than. This helper class is specialized for +// less and greater. The btree_key_compare_to_adapter +// class is provided so that btree users automatically get the more +// efficient compare-to code when using common google string types +// with common comparison functors. +template +struct btree_key_compare_to_adapter : Compare { + btree_key_compare_to_adapter() { } + btree_key_compare_to_adapter(const Compare &c) : Compare(c) { } + btree_key_compare_to_adapter(const btree_key_compare_to_adapter &c) + : Compare(c) { + } +}; + +template <> +struct btree_key_compare_to_adapter > + : public btree_key_compare_to_tag { + btree_key_compare_to_adapter() {} + btree_key_compare_to_adapter(const std::less&) {} + btree_key_compare_to_adapter( + const btree_key_compare_to_adapter >&) {} + int operator()(const std::string &a, const std::string &b) const { + return a.compare(b); + } +}; + +template <> +struct btree_key_compare_to_adapter > + : public btree_key_compare_to_tag { + btree_key_compare_to_adapter() {} + btree_key_compare_to_adapter(const std::greater&) {} + btree_key_compare_to_adapter( + const btree_key_compare_to_adapter >&) {} + int operator()(const std::string &a, const std::string &b) const { + return b.compare(a); + } +}; + +// A helper class that allows a compare-to functor to behave like a plain +// compare functor. This specialization is used when we do not have a +// compare-to functor. +template +struct btree_key_comparer { + btree_key_comparer() {} + btree_key_comparer(Compare c) : comp(c) {} + static bool bool_compare(const Compare &comp, const Key &x, const Key &y) { + return comp(x, y); + } + bool operator()(const Key &x, const Key &y) const { + return bool_compare(comp, x, y); + } + Compare comp; +}; + +// A specialization of btree_key_comparer when a compare-to functor is +// present. We need a plain (boolean) comparison in some parts of the btree +// code, such as insert-with-hint. +template +struct btree_key_comparer { + btree_key_comparer() {} + btree_key_comparer(Compare c) : comp(c) {} + static bool bool_compare(const Compare &comp, const Key &x, const Key &y) { + return comp(x, y) < 0; + } + bool operator()(const Key &x, const Key &y) const { + return bool_compare(comp, x, y); + } + Compare comp; +}; + +// A helper function to compare to keys using the specified compare +// functor. This dispatches to the appropriate btree_key_comparer comparison, +// depending on whether we have a compare-to functor or not (which depends on +// whether Compare is derived from btree_key_compare_to_tag). +template +static bool btree_compare_keys( + const Compare &comp, const Key &x, const Key &y) { + typedef btree_key_comparer::value> key_comparer; + return key_comparer::bool_compare(comp, x, y); +} + +template +struct btree_common_params { + // If Compare is derived from btree_key_compare_to_tag then use it as the + // key_compare type. Otherwise, use btree_key_compare_to_adapter<> which will + // fall-back to Compare if we don't have an appropriate specialization. + typedef typename if_< + btree_is_key_compare_to::value, + Compare, btree_key_compare_to_adapter >::type key_compare; + // A type which indicates if we have a key-compare-to functor or a plain old + // key-compare functor. + typedef btree_is_key_compare_to is_key_compare_to; + + typedef Alloc allocator_type; + typedef Key key_type; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + + enum { + kTargetNodeSize = TargetNodeSize, + + // Available space for values. This is largest for leaf nodes, + // which has overhead no fewer than two pointers. + kNodeValueSpace = TargetNodeSize - 2 * sizeof(void*), + }; + + // This is an integral type large enough to hold as many + // ValueSize-values as will fit a node of TargetNodeSize bytes. + typedef typename if_< + (kNodeValueSpace / ValueSize) >= 256, + uint16_t, + uint8_t>::type node_count_type; +}; + +// A parameters structure for holding the type parameters for a btree_map. +template +struct btree_map_params + : public btree_common_params { + typedef Data data_type; + typedef Data mapped_type; + typedef std::pair value_type; + typedef std::pair mutable_value_type; + typedef value_type* pointer; + typedef const value_type* const_pointer; + typedef value_type& reference; + typedef const value_type& const_reference; + + enum { + kValueSize = sizeof(Key) + sizeof(data_type), + }; + + static const Key& key(const value_type &x) { return x.first; } + static const Key& key(const mutable_value_type &x) { return x.first; } + static void swap(mutable_value_type *a, mutable_value_type *b) { + btree_swap_helper(a->first, b->first); + btree_swap_helper(a->second, b->second); + } +}; + +// A parameters structure for holding the type parameters for a btree_set. +template +struct btree_set_params + : public btree_common_params { + typedef std::false_type data_type; + typedef std::false_type mapped_type; + typedef Key value_type; + typedef value_type mutable_value_type; + typedef value_type* pointer; + typedef const value_type* const_pointer; + typedef value_type& reference; + typedef const value_type& const_reference; + + enum { + kValueSize = sizeof(Key), + }; + + static const Key& key(const value_type &x) { return x; } + static void swap(mutable_value_type *a, mutable_value_type *b) { + btree_swap_helper(*a, *b); + } +}; + +// An adapter class that converts a lower-bound compare into an upper-bound +// compare. +template +struct btree_upper_bound_adapter : public Compare { + btree_upper_bound_adapter(Compare c) : Compare(c) {} + bool operator()(const Key &a, const Key &b) const { + return !static_cast(*this)(b, a); + } +}; + +template +struct btree_upper_bound_compare_to_adapter : public CompareTo { + btree_upper_bound_compare_to_adapter(CompareTo c) : CompareTo(c) {} + int operator()(const Key &a, const Key &b) const { + return static_cast(*this)(b, a); + } +}; + +// Dispatch helper class for using linear search with plain compare. +template +struct btree_linear_search_plain_compare { + static int lower_bound(const K &k, const N &n, Compare comp) { + return n.linear_search_plain_compare(k, 0, n.count(), comp); + } + static int upper_bound(const K &k, const N &n, Compare comp) { + typedef btree_upper_bound_adapter upper_compare; + return n.linear_search_plain_compare(k, 0, n.count(), upper_compare(comp)); + } +}; + +// Dispatch helper class for using linear search with compare-to +template +struct btree_linear_search_compare_to { + static int lower_bound(const K &k, const N &n, CompareTo comp) { + return n.linear_search_compare_to(k, 0, n.count(), comp); + } + static int upper_bound(const K &k, const N &n, CompareTo comp) { + typedef btree_upper_bound_adapter > upper_compare; + return n.linear_search_plain_compare(k, 0, n.count(), upper_compare(comp)); + } +}; + +// Dispatch helper class for using binary search with plain compare. +template +struct btree_binary_search_plain_compare { + static int lower_bound(const K &k, const N &n, Compare comp) { + return n.binary_search_plain_compare(k, 0, n.count(), comp); + } + static int upper_bound(const K &k, const N &n, Compare comp) { + typedef btree_upper_bound_adapter upper_compare; + return n.binary_search_plain_compare(k, 0, n.count(), upper_compare(comp)); + } +}; + +// Dispatch helper class for using binary search with compare-to. +template +struct btree_binary_search_compare_to { + static int lower_bound(const K &k, const N &n, CompareTo comp) { + return n.binary_search_compare_to(k, 0, n.count(), CompareTo()); + } + static int upper_bound(const K &k, const N &n, CompareTo comp) { + typedef btree_upper_bound_adapter > upper_compare; + return n.linear_search_plain_compare(k, 0, n.count(), upper_compare(comp)); + } +}; + +// A node in the btree holding. The same node type is used for both internal +// and leaf nodes in the btree, though the nodes are allocated in such a way +// that the children array is only valid in internal nodes. +template +class btree_node { + public: + typedef Params params_type; + typedef btree_node self_type; + typedef typename Params::key_type key_type; + typedef typename Params::data_type data_type; + typedef typename Params::value_type value_type; + typedef typename Params::mutable_value_type mutable_value_type; + typedef typename Params::pointer pointer; + typedef typename Params::const_pointer const_pointer; + typedef typename Params::reference reference; + typedef typename Params::const_reference const_reference; + typedef typename Params::key_compare key_compare; + typedef typename Params::size_type size_type; + typedef typename Params::difference_type difference_type; + // Typedefs for the various types of node searches. + typedef btree_linear_search_plain_compare< + key_type, self_type, key_compare> linear_search_plain_compare_type; + typedef btree_linear_search_compare_to< + key_type, self_type, key_compare> linear_search_compare_to_type; + typedef btree_binary_search_plain_compare< + key_type, self_type, key_compare> binary_search_plain_compare_type; + typedef btree_binary_search_compare_to< + key_type, self_type, key_compare> binary_search_compare_to_type; + // If we have a valid key-compare-to type, use linear_search_compare_to, + // otherwise use linear_search_plain_compare. + typedef typename if_< + Params::is_key_compare_to::value, + linear_search_compare_to_type, + linear_search_plain_compare_type>::type linear_search_type; + // If we have a valid key-compare-to type, use binary_search_compare_to, + // otherwise use binary_search_plain_compare. + typedef typename if_< + Params::is_key_compare_to::value, + binary_search_compare_to_type, + binary_search_plain_compare_type>::type binary_search_type; + // If the key is an integral or floating point type, use linear search which + // is faster than binary search for such types. Might be wise to also + // configure linear search based on node-size. + typedef typename if_< + std::is_integral::value || + std::is_floating_point::value, + linear_search_type, binary_search_type>::type search_type; + + struct base_fields { + typedef typename Params::node_count_type field_type; + + // A boolean indicating whether the node is a leaf or not. + bool leaf; + // The position of the node in the node's parent. + field_type position; + // The maximum number of values the node can hold. + field_type max_count; + // The count of the number of values in the node. + field_type count; + // A pointer to the node's parent. + btree_node *parent; + }; + + enum { + kValueSize = params_type::kValueSize, + kTargetNodeSize = params_type::kTargetNodeSize, + + // Compute how many values we can fit onto a leaf node. + kNodeTargetValues = (kTargetNodeSize - sizeof(base_fields)) / kValueSize, + // We need a minimum of 3 values per internal node in order to perform + // splitting (1 value for the two nodes involved in the split and 1 value + // propagated to the parent as the delimiter for the split). + kNodeValues = kNodeTargetValues >= 3 ? kNodeTargetValues : 3, + + kExactMatch = 1 << 30, + kMatchMask = kExactMatch - 1, + }; + + struct leaf_fields : public base_fields { + // The array of values. Only the first count of these values have been + // constructed and are valid. + mutable_value_type values[kNodeValues]; + }; + + struct internal_fields : public leaf_fields { + // The array of child pointers. The keys in children_[i] are all less than + // key(i). The keys in children_[i + 1] are all greater than key(i). There + // are always count + 1 children. + btree_node *children[kNodeValues + 1]; + }; + + struct root_fields : public internal_fields { + btree_node *rightmost; + size_type size; + }; + + public: + // Getter/setter for whether this is a leaf node or not. This value doesn't + // change after the node is created. + bool leaf() const { return fields_.leaf; } + + // Getter for the position of this node in its parent. + int position() const { return fields_.position; } + void set_position(int v) { fields_.position = v; } + + // Getter/setter for the number of values stored in this node. + int count() const { return fields_.count; } + void set_count(int v) { fields_.count = v; } + int max_count() const { return fields_.max_count; } + + // Getter for the parent of this node. + btree_node* parent() const { return fields_.parent; } + // Getter for whether the node is the root of the tree. The parent of the + // root of the tree is the leftmost node in the tree which is guaranteed to + // be a leaf. + bool is_root() const { return parent()->leaf(); } + void make_root() { + assert(parent()->is_root()); + fields_.parent = fields_.parent->parent(); + } + + // Getter for the rightmost root node field. Only valid on the root node. + btree_node* rightmost() const { return fields_.rightmost; } + btree_node** mutable_rightmost() { return &fields_.rightmost; } + + // Getter for the size root node field. Only valid on the root node. + size_type size() const { return fields_.size; } + size_type* mutable_size() { return &fields_.size; } + + // Getters for the key/value at position i in the node. + const key_type& key(int i) const { + return params_type::key(fields_.values[i]); + } + reference value(int i) { + return reinterpret_cast(fields_.values[i]); + } + const_reference value(int i) const { + return reinterpret_cast(fields_.values[i]); + } + mutable_value_type* mutable_value(int i) { + return &fields_.values[i]; + } + + // Swap value i in this node with value j in node x. + void value_swap(int i, btree_node *x, int j) { + params_type::swap(mutable_value(i), x->mutable_value(j)); + } + + // Getters/setter for the child at position i in the node. + btree_node* child(int i) const { return fields_.children[i]; } + btree_node** mutable_child(int i) { return &fields_.children[i]; } + void set_child(int i, btree_node *c) { + *mutable_child(i) = c; + c->fields_.parent = this; + c->fields_.position = i; + } + + // Returns the position of the first value whose key is not less than k. + template + int lower_bound(const key_type &k, const Compare &comp) const { + return search_type::lower_bound(k, *this, comp); + } + // Returns the position of the first value whose key is greater than k. + template + int upper_bound(const key_type &k, const Compare &comp) const { + return search_type::upper_bound(k, *this, comp); + } + + // Returns the position of the first value whose key is not less than k using + // linear search performed using plain compare. + template + int linear_search_plain_compare( + const key_type &k, int s, int e, const Compare &comp) const { + while (s < e) { + if (!btree_compare_keys(comp, key(s), k)) { + break; + } + ++s; + } + return s; + } + + // Returns the position of the first value whose key is not less than k using + // linear search performed using compare-to. + template + int linear_search_compare_to( + const key_type &k, int s, int e, const Compare &comp) const { + while (s < e) { + int c = comp(key(s), k); + if (c == 0) { + return s | kExactMatch; + } else if (c > 0) { + break; + } + ++s; + } + return s; + } + + // Returns the position of the first value whose key is not less than k using + // binary search performed using plain compare. + template + int binary_search_plain_compare( + const key_type &k, int s, int e, const Compare &comp) const { + while (s != e) { + int mid = (s + e) / 2; + if (btree_compare_keys(comp, key(mid), k)) { + s = mid + 1; + } else { + e = mid; + } + } + return s; + } + + // Returns the position of the first value whose key is not less than k using + // binary search performed using compare-to. + template + int binary_search_compare_to( + const key_type &k, int s, int e, const CompareTo &comp) const { + while (s != e) { + int mid = (s + e) / 2; + int c = comp(key(mid), k); + if (c < 0) { + s = mid + 1; + } else if (c > 0) { + e = mid; + } else { + // Need to return the first value whose key is not less than k, which + // requires continuing the binary search. Note that we are guaranteed + // that the result is an exact match because if "key(mid-1) < k" the + // call to binary_search_compare_to() will return "mid". + s = binary_search_compare_to(k, s, mid, comp); + return s | kExactMatch; + } + } + return s; + } + + // Inserts the value x at position i, shifting all existing values and + // children at positions >= i to the right by 1. + void insert_value(int i, const value_type &x); + + // Removes the value at position i, shifting all existing values and children + // at positions > i to the left by 1. + void remove_value(int i); + + // Rebalances a node with its right sibling. + void rebalance_right_to_left(btree_node *sibling, int to_move); + void rebalance_left_to_right(btree_node *sibling, int to_move); + + // Splits a node, moving a portion of the node's values to its right sibling. + void split(btree_node *sibling, int insert_position); + + // Merges a node with its right sibling, moving all of the values and the + // delimiting key in the parent node onto itself. + void merge(btree_node *sibling); + + // Swap the contents of "this" and "src". + void swap(btree_node *src); + + // Node allocation/deletion routines. + static btree_node* init_leaf( + leaf_fields *f, btree_node *parent, int max_count) { + btree_node *n = reinterpret_cast(f); + f->leaf = 1; + f->position = 0; + f->max_count = max_count; + f->count = 0; + f->parent = parent; + if (!NDEBUG) { + memset(&f->values, 0, max_count * sizeof(value_type)); + } + return n; + } + static btree_node* init_internal(internal_fields *f, btree_node *parent) { + btree_node *n = init_leaf(f, parent, kNodeValues); + f->leaf = 0; + if (!NDEBUG) { + memset(f->children, 0, sizeof(f->children)); + } + return n; + } + static btree_node* init_root(root_fields *f, btree_node *parent) { + btree_node *n = init_internal(f, parent); + f->rightmost = parent; + f->size = parent->count(); + return n; + } + void destroy() { + for (int i = 0; i < count(); ++i) { + value_destroy(i); + } + } + + private: + void value_init(int i) { + new (&fields_.values[i]) mutable_value_type; + } + void value_init(int i, const value_type &x) { + new (&fields_.values[i]) mutable_value_type(x); + } + void value_destroy(int i) { + fields_.values[i].~mutable_value_type(); + } + + private: + root_fields fields_; + + private: + btree_node(const btree_node&); + void operator=(const btree_node&); +}; + +template +struct btree_iterator { + typedef typename Node::key_type key_type; + typedef typename Node::size_type size_type; + typedef typename Node::difference_type difference_type; + typedef typename Node::params_type params_type; + + typedef Node node_type; + typedef typename std::remove_const::type normal_node; + typedef const Node const_node; + typedef typename params_type::value_type value_type; + typedef typename params_type::pointer normal_pointer; + typedef typename params_type::reference normal_reference; + typedef typename params_type::const_pointer const_pointer; + typedef typename params_type::const_reference const_reference; + + typedef Pointer pointer; + typedef Reference reference; + typedef std::bidirectional_iterator_tag iterator_category; + + typedef btree_iterator< + normal_node, normal_reference, normal_pointer> iterator; + typedef btree_iterator< + const_node, const_reference, const_pointer> const_iterator; + typedef btree_iterator self_type; + + btree_iterator() + : node(NULL), + position(-1) { + } + btree_iterator(Node *n, int p) + : node(n), + position(p) { + } + btree_iterator(const iterator &x) + : node(x.node), + position(x.position) { + } + + // Increment/decrement the iterator. + void increment() { + if (node->leaf() && ++position < node->count()) { + return; + } + increment_slow(); + } + void increment_by(int count); + void increment_slow(); + + void decrement() { + if (node->leaf() && --position >= 0) { + return; + } + decrement_slow(); + } + void decrement_slow(); + + bool operator==(const const_iterator &x) const { + return node == x.node && position == x.position; + } + bool operator!=(const const_iterator &x) const { + return node != x.node || position != x.position; + } + + // Accessors for the key/value the iterator is pointing at. + const key_type& key() const { + return node->key(position); + } + reference operator*() const { + return node->value(position); + } + pointer operator->() const { + return &node->value(position); + } + + self_type& operator++() { + increment(); + return *this; + } + self_type& operator--() { + decrement(); + return *this; + } + self_type operator++(int) { + self_type tmp = *this; + ++*this; + return tmp; + } + self_type operator--(int) { + self_type tmp = *this; + --*this; + return tmp; + } + + // The node in the tree the iterator is pointing at. + Node *node; + // The position within the node of the tree the iterator is pointing at. + int position; +}; + +// Dispatch helper class for using btree::internal_locate with plain compare. +struct btree_internal_locate_plain_compare { + template + static std::pair dispatch(const K &k, const T &t, Iter iter) { + return t.internal_locate_plain_compare(k, iter); + } +}; + +// Dispatch helper class for using btree::internal_locate with compare-to. +struct btree_internal_locate_compare_to { + template + static std::pair dispatch(const K &k, const T &t, Iter iter) { + return t.internal_locate_compare_to(k, iter); + } +}; + +template +class btree : public Params::key_compare { + typedef btree self_type; + typedef btree_node node_type; + typedef typename node_type::base_fields base_fields; + typedef typename node_type::leaf_fields leaf_fields; + typedef typename node_type::internal_fields internal_fields; + typedef typename node_type::root_fields root_fields; + typedef typename Params::is_key_compare_to is_key_compare_to; + + friend struct btree_internal_locate_plain_compare; + friend struct btree_internal_locate_compare_to; + typedef typename if_< + is_key_compare_to::value, + btree_internal_locate_compare_to, + btree_internal_locate_plain_compare>::type internal_locate_type; + + enum { + kNodeValues = node_type::kNodeValues, + kMinNodeValues = kNodeValues / 2, + kValueSize = node_type::kValueSize, + kExactMatch = node_type::kExactMatch, + kMatchMask = node_type::kMatchMask, + }; + + // A helper class to get the empty base class optimization for 0-size + // allocators. Base is internal_allocator_type. + // (e.g. empty_base_handle). If Base is + // 0-size, the compiler doesn't have to reserve any space for it and + // sizeof(empty_base_handle) will simply be sizeof(Data). Google [empty base + // class optimization] for more details. + template + struct empty_base_handle : public Base { + empty_base_handle(const Base &b, const Data &d) + : Base(b), + data(d) { + } + Data data; + }; + + struct node_stats { + node_stats(size_t l, size_t i) + : leaf_nodes(l), + internal_nodes(i) { + } + + node_stats& operator+=(const node_stats &x) { + leaf_nodes += x.leaf_nodes; + internal_nodes += x.internal_nodes; + return *this; + } + + size_t leaf_nodes; + size_t internal_nodes; + }; + + public: + typedef Params params_type; + typedef typename Params::key_type key_type; + typedef typename Params::data_type data_type; + typedef typename Params::mapped_type mapped_type; + typedef typename Params::value_type value_type; + typedef typename Params::key_compare key_compare; + typedef typename Params::pointer pointer; + typedef typename Params::const_pointer const_pointer; + typedef typename Params::reference reference; + typedef typename Params::const_reference const_reference; + typedef typename Params::size_type size_type; + typedef typename Params::difference_type difference_type; + typedef btree_iterator iterator; + typedef typename iterator::const_iterator const_iterator; + typedef std::reverse_iterator const_reverse_iterator; + typedef std::reverse_iterator reverse_iterator; + + typedef typename Params::allocator_type allocator_type; + typedef typename allocator_type::template rebind::other + internal_allocator_type; + + public: + // Default constructor. + btree(const key_compare &comp, const allocator_type &alloc); + + // Copy constructor. + btree(const self_type &x); + + // Destructor. + ~btree() { + clear(); + } + + // Iterator routines. + iterator begin() { + return iterator(leftmost(), 0); + } + const_iterator begin() const { + return const_iterator(leftmost(), 0); + } + iterator end() { + return iterator(rightmost(), rightmost() ? rightmost()->count() : 0); + } + const_iterator end() const { + return const_iterator(rightmost(), rightmost() ? rightmost()->count() : 0); + } + reverse_iterator rbegin() { + return reverse_iterator(end()); + } + const_reverse_iterator rbegin() const { + return const_reverse_iterator(end()); + } + reverse_iterator rend() { + return reverse_iterator(begin()); + } + const_reverse_iterator rend() const { + return const_reverse_iterator(begin()); + } + + // Finds the first element whose key is not less than key. + iterator lower_bound(const key_type &key) { + return internal_end( + internal_lower_bound(key, iterator(root(), 0))); + } + const_iterator lower_bound(const key_type &key) const { + return internal_end( + internal_lower_bound(key, const_iterator(root(), 0))); + } + + // Finds the first element whose key is greater than key. + iterator upper_bound(const key_type &key) { + return internal_end( + internal_upper_bound(key, iterator(root(), 0))); + } + const_iterator upper_bound(const key_type &key) const { + return internal_end( + internal_upper_bound(key, const_iterator(root(), 0))); + } + + // Finds the range of values which compare equal to key. The first member of + // the returned pair is equal to lower_bound(key). The second member pair of + // the pair is equal to upper_bound(key). + std::pair equal_range(const key_type &key) { + return std::make_pair(lower_bound(key), upper_bound(key)); + } + std::pair equal_range(const key_type &key) const { + return std::make_pair(lower_bound(key), upper_bound(key)); + } + + // Inserts a value into the btree only if it does not already exist. The + // boolean return value indicates whether insertion succeeded or failed. The + // ValuePointer type is used to avoid instatiating the value unless the key + // is being inserted. Value is not dereferenced if the key already exists in + // the btree. See btree_map::operator[]. + template + std::pair insert_unique(const key_type &key, ValuePointer value); + + // Inserts a value into the btree only if it does not already exist. The + // boolean return value indicates whether insertion succeeded or failed. + std::pair insert_unique(const value_type &v) { + return insert_unique(params_type::key(v), &v); + } + + // Insert with hint. Check to see if the value should be placed immediately + // before position in the tree. If it does, then the insertion will take + // amortized constant time. If not, the insertion will take amortized + // logarithmic time as if a call to insert_unique(v) were made. + iterator insert_unique(iterator position, const value_type &v); + + // Insert a range of values into the btree. + template + void insert_unique(InputIterator b, InputIterator e); + + // Inserts a value into the btree. The ValuePointer type is used to avoid + // instatiating the value unless the key is being inserted. Value is not + // dereferenced if the key already exists in the btree. See + // btree_map::operator[]. + template + iterator insert_multi(const key_type &key, ValuePointer value); + + // Inserts a value into the btree. + iterator insert_multi(const value_type &v) { + return insert_multi(params_type::key(v), &v); + } + + // Insert with hint. Check to see if the value should be placed immediately + // before position in the tree. If it does, then the insertion will take + // amortized constant time. If not, the insertion will take amortized + // logarithmic time as if a call to insert_multi(v) were made. + iterator insert_multi(iterator position, const value_type &v); + + // Insert a range of values into the btree. + template + void insert_multi(InputIterator b, InputIterator e); + + void assign(const self_type &x); + + // Erase the specified iterator from the btree. The iterator must be valid + // (i.e. not equal to end()). Return an iterator pointing to the node after + // the one that was erased (or end() if none exists). + iterator erase(iterator iter); + + // Erases range. Returns the number of keys erased. + int erase(iterator begin, iterator end); + + // Erases the specified key from the btree. Returns 1 if an element was + // erased and 0 otherwise. + int erase_unique(const key_type &key); + + // Erases all of the entries matching the specified key from the + // btree. Returns the number of elements erased. + int erase_multi(const key_type &key); + + // Finds the iterator corresponding to a key or returns end() if the key is + // not present. + iterator find_unique(const key_type &key) { + return internal_end( + internal_find_unique(key, iterator(root(), 0))); + } + const_iterator find_unique(const key_type &key) const { + return internal_end( + internal_find_unique(key, const_iterator(root(), 0))); + } + iterator find_multi(const key_type &key) { + return internal_end( + internal_find_multi(key, iterator(root(), 0))); + } + const_iterator find_multi(const key_type &key) const { + return internal_end( + internal_find_multi(key, const_iterator(root(), 0))); + } + + // Returns a count of the number of times the key appears in the btree. + size_type count_unique(const key_type &key) const { + const_iterator begin = internal_find_unique( + key, const_iterator(root(), 0)); + if (!begin.node) { + // The key doesn't exist in the tree. + return 0; + } + return 1; + } + // Returns a count of the number of times the key appears in the btree. + size_type count_multi(const key_type &key) const { + return distance(lower_bound(key), upper_bound(key)); + } + + // Clear the btree, deleting all of the values it contains. + void clear(); + + // Swap the contents of *this and x. + void swap(self_type &x); + + // Assign the contents of x to *this. + self_type& operator=(const self_type &x) { + if (&x == this) { + // Don't copy onto ourselves. + return *this; + } + assign(x); + return *this; + } + + key_compare* mutable_key_comp() { + return this; + } + const key_compare& key_comp() const { + return *this; + } + bool compare_keys(const key_type &x, const key_type &y) const { + return btree_compare_keys(key_comp(), x, y); + } + + // Dump the btree to the specified ostream. Requires that operator<< is + // defined for Key and Value. + void dump(std::ostream &os) const { + if (root() != NULL) { + internal_dump(os, root(), 0); + } + } + + // Verifies the structure of the btree. + void verify() const; + + // Size routines. Note that empty() is slightly faster than doing size()==0. + size_type size() const { + if (empty()) return 0; + if (root()->leaf()) return root()->count(); + return root()->size(); + } + size_type max_size() const { return std::numeric_limits::max(); } + bool empty() const { return root() == NULL; } + + // The height of the btree. An empty tree will have height 0. + size_type height() const { + size_type h = 0; + if (root()) { + // Count the length of the chain from the leftmost node up to the + // root. We actually count from the root back around to the level below + // the root, but the calculation is the same because of the circularity + // of that traversal. + const node_type *n = root(); + do { + ++h; + n = n->parent(); + } while (n != root()); + } + return h; + } + + // The number of internal, leaf and total nodes used by the btree. + size_type leaf_nodes() const { + return internal_stats(root()).leaf_nodes; + } + size_type internal_nodes() const { + return internal_stats(root()).internal_nodes; + } + size_type nodes() const { + node_stats stats = internal_stats(root()); + return stats.leaf_nodes + stats.internal_nodes; + } + + // The total number of bytes used by the btree. + size_type bytes_used() const { + node_stats stats = internal_stats(root()); + if (stats.leaf_nodes == 1 && stats.internal_nodes == 0) { + return sizeof(*this) + + sizeof(base_fields) + root()->max_count() * sizeof(value_type); + } else { + return sizeof(*this) + + sizeof(root_fields) - sizeof(internal_fields) + + stats.leaf_nodes * sizeof(leaf_fields) + + stats.internal_nodes * sizeof(internal_fields); + } + } + + // The average number of bytes used per value stored in the btree. + static double average_bytes_per_value() { + // Returns the number of bytes per value on a leaf node that is 75% + // full. Experimentally, this matches up nicely with the computed number of + // bytes per value in trees that had their values inserted in random order. + return sizeof(leaf_fields) / (kNodeValues * 0.75); + } + + // The fullness of the btree. Computed as the number of elements in the btree + // divided by the maximum number of elements a tree with the current number + // of nodes could hold. A value of 1 indicates perfect space + // utilization. Smaller values indicate space wastage. + double fullness() const { + return double(size()) / (nodes() * kNodeValues); + } + // The overhead of the btree structure in bytes per node. Computed as the + // total number of bytes used by the btree minus the number of bytes used for + // storing elements divided by the number of elements. + double overhead() const { + if (empty()) { + return 0.0; + } + return (bytes_used() - size() * kValueSize) / double(size()); + } + + private: + // Internal accessor routines. + node_type* root() { return root_.data; } + const node_type* root() const { return root_.data; } + node_type** mutable_root() { return &root_.data; } + + // The rightmost node is stored in the root node. + node_type* rightmost() { + return (!root() || root()->leaf()) ? root() : root()->rightmost(); + } + const node_type* rightmost() const { + return (!root() || root()->leaf()) ? root() : root()->rightmost(); + } + node_type** mutable_rightmost() { return root()->mutable_rightmost(); } + + // The leftmost node is stored as the parent of the root node. + node_type* leftmost() { return root() ? root()->parent() : NULL; } + const node_type* leftmost() const { return root() ? root()->parent() : NULL; } + + // The size of the tree is stored in the root node. + size_type* mutable_size() { return root()->mutable_size(); } + + // Allocator routines. + internal_allocator_type* mutable_internal_allocator() { + return static_cast(&root_); + } + const internal_allocator_type& internal_allocator() const { + return *static_cast(&root_); + } + + // Node creation/deletion routines. + node_type* new_internal_node(node_type *parent) { + internal_fields *p = reinterpret_cast( + mutable_internal_allocator()->allocate(sizeof(internal_fields))); + return node_type::init_internal(p, parent); + } + node_type* new_internal_root_node() { + root_fields *p = reinterpret_cast( + mutable_internal_allocator()->allocate(sizeof(root_fields))); + return node_type::init_root(p, root()->parent()); + } + node_type* new_leaf_node(node_type *parent) { + leaf_fields *p = reinterpret_cast( + mutable_internal_allocator()->allocate(sizeof(leaf_fields))); + return node_type::init_leaf(p, parent, kNodeValues); + } + node_type* new_leaf_root_node(int max_count) { + leaf_fields *p = reinterpret_cast( + mutable_internal_allocator()->allocate( + sizeof(base_fields) + max_count * sizeof(value_type))); + return node_type::init_leaf(p, reinterpret_cast(p), max_count); + } + void delete_internal_node(node_type *node) { + node->destroy(); + assert(node != root()); + mutable_internal_allocator()->deallocate( + reinterpret_cast(node), sizeof(internal_fields)); + } + void delete_internal_root_node() { + root()->destroy(); + mutable_internal_allocator()->deallocate( + reinterpret_cast(root()), sizeof(root_fields)); + } + void delete_leaf_node(node_type *node) { + node->destroy(); + mutable_internal_allocator()->deallocate( + reinterpret_cast(node), + sizeof(base_fields) + node->max_count() * sizeof(value_type)); + } + + // Rebalances or splits the node iter points to. + void rebalance_or_split(iterator *iter); + + // Merges the values of left, right and the delimiting key on their parent + // onto left, removing the delimiting key and deleting right. + void merge_nodes(node_type *left, node_type *right); + + // Tries to merge node with its left or right sibling, and failing that, + // rebalance with its left or right sibling. Returns true if a merge + // occurred, at which point it is no longer valid to access node. Returns + // false if no merging took place. + bool try_merge_or_rebalance(iterator *iter); + + // Tries to shrink the height of the tree by 1. + void try_shrink(); + + iterator internal_end(iterator iter) { + return iter.node ? iter : end(); + } + const_iterator internal_end(const_iterator iter) const { + return iter.node ? iter : end(); + } + + // Inserts a value into the btree immediately before iter. Requires that + // key(v) <= iter.key() and (--iter).key() <= key(v). + iterator internal_insert(iterator iter, const value_type &v); + + // Returns an iterator pointing to the first value >= the value "iter" is + // pointing at. Note that "iter" might be pointing to an invalid location as + // iter.position == iter.node->count(). This routine simply moves iter up in + // the tree to a valid location. + template + static IterType internal_last(IterType iter); + + // Returns an iterator pointing to the leaf position at which key would + // reside in the tree. We provide 2 versions of internal_locate. The first + // version (internal_locate_plain_compare) always returns 0 for the second + // field of the pair. The second version (internal_locate_compare_to) is for + // the key-compare-to specialization and returns either kExactMatch (if the + // key was found in the tree) or -kExactMatch (if it wasn't) in the second + // field of the pair. The compare_to specialization allows the caller to + // avoid a subsequent comparison to determine if an exact match was made, + // speeding up string keys. + template + std::pair internal_locate( + const key_type &key, IterType iter) const; + template + std::pair internal_locate_plain_compare( + const key_type &key, IterType iter) const; + template + std::pair internal_locate_compare_to( + const key_type &key, IterType iter) const; + + // Internal routine which implements lower_bound(). + template + IterType internal_lower_bound( + const key_type &key, IterType iter) const; + + // Internal routine which implements upper_bound(). + template + IterType internal_upper_bound( + const key_type &key, IterType iter) const; + + // Internal routine which implements find_unique(). + template + IterType internal_find_unique( + const key_type &key, IterType iter) const; + + // Internal routine which implements find_multi(). + template + IterType internal_find_multi( + const key_type &key, IterType iter) const; + + // Deletes a node and all of its children. + void internal_clear(node_type *node); + + // Dumps a node and all of its children to the specified ostream. + void internal_dump(std::ostream &os, const node_type *node, int level) const; + + // Verifies the tree structure of node. + int internal_verify(const node_type *node, + const key_type *lo, const key_type *hi) const; + + node_stats internal_stats(const node_type *node) const { + if (!node) { + return node_stats(0, 0); + } + if (node->leaf()) { + return node_stats(1, 0); + } + node_stats res(0, 1); + for (int i = 0; i <= node->count(); ++i) { + res += internal_stats(node->child(i)); + } + return res; + } + + private: + empty_base_handle root_; + + private: + // A never instantiated helper function that returns big_ if we have a + // key-compare-to functor or if R is bool and small_ otherwise. + template + static typename if_< + if_, + std::is_same >::type::value, + big_, small_>::type key_compare_checker(R); + + // A never instantiated helper function that returns the key comparison + // functor. + static key_compare key_compare_helper(); + + // Verify that key_compare returns a bool. This is similar to the way + // is_convertible in base/type_traits.h works. Note that key_compare_checker + // is never actually invoked. The compiler will select which + // key_compare_checker() to instantiate and then figure out the size of the + // return type of key_compare_checker() at compile time which we then check + // against the sizeof of big_. + COMPILE_ASSERT( + sizeof(key_compare_checker(key_compare_helper()(key_type(), key_type()))) == + sizeof(big_), + key_comparison_function_must_return_bool); + + // Note: We insist on kTargetValues, which is computed from + // Params::kTargetNodeSize, must fit the base_fields::field_type. + COMPILE_ASSERT(kNodeValues < + (1 << (8 * sizeof(typename base_fields::field_type))), + target_node_size_too_large); + + // Test the assumption made in setting kNodeValueSpace. + COMPILE_ASSERT(sizeof(base_fields) >= 2 * sizeof(void*), + node_space_assumption_incorrect); +}; + +//// +// btree_node methods +template +inline void btree_node

::insert_value(int i, const value_type &x) { + assert(i <= count()); + value_init(count(), x); + for (int j = count(); j > i; --j) { + value_swap(j, this, j - 1); + } + set_count(count() + 1); + + if (!leaf()) { + ++i; + for (int j = count(); j > i; --j) { + *mutable_child(j) = child(j - 1); + child(j)->set_position(j); + } + *mutable_child(i) = NULL; + } +} + +template +inline void btree_node

::remove_value(int i) { + if (!leaf()) { + assert(child(i + 1)->count() == 0); + for (int j = i + 1; j < count(); ++j) { + *mutable_child(j) = child(j + 1); + child(j)->set_position(j); + } + *mutable_child(count()) = NULL; + } + + set_count(count() - 1); + for (; i < count(); ++i) { + value_swap(i, this, i + 1); + } + value_destroy(i); +} + +template +void btree_node

::rebalance_right_to_left(btree_node *src, int to_move) { + assert(parent() == src->parent()); + assert(position() + 1 == src->position()); + assert(src->count() >= count()); + assert(to_move >= 1); + assert(to_move <= src->count()); + + // Make room in the left node for the new values. + for (int i = 0; i < to_move; ++i) { + value_init(i + count()); + } + + // Move the delimiting value to the left node and the new delimiting value + // from the right node. + value_swap(count(), parent(), position()); + parent()->value_swap(position(), src, to_move - 1); + + // Move the values from the right to the left node. + for (int i = 1; i < to_move; ++i) { + value_swap(count() + i, src, i - 1); + } + // Shift the values in the right node to their correct position. + for (int i = to_move; i < src->count(); ++i) { + src->value_swap(i - to_move, src, i); + } + for (int i = 1; i <= to_move; ++i) { + src->value_destroy(src->count() - i); + } + + if (!leaf()) { + // Move the child pointers from the right to the left node. + for (int i = 0; i < to_move; ++i) { + set_child(1 + count() + i, src->child(i)); + } + for (int i = 0; i <= src->count() - to_move; ++i) { + assert(i + to_move <= src->max_count()); + src->set_child(i, src->child(i + to_move)); + *src->mutable_child(i + to_move) = NULL; + } + } + + // Fixup the counts on the src and dest nodes. + set_count(count() + to_move); + src->set_count(src->count() - to_move); +} + +template +void btree_node

::rebalance_left_to_right(btree_node *dest, int to_move) { + assert(parent() == dest->parent()); + assert(position() + 1 == dest->position()); + assert(count() >= dest->count()); + assert(to_move >= 1); + assert(to_move <= count()); + + // Make room in the right node for the new values. + for (int i = 0; i < to_move; ++i) { + dest->value_init(i + dest->count()); + } + for (int i = dest->count() - 1; i >= 0; --i) { + dest->value_swap(i, dest, i + to_move); + } + + // Move the delimiting value to the right node and the new delimiting value + // from the left node. + dest->value_swap(to_move - 1, parent(), position()); + parent()->value_swap(position(), this, count() - to_move); + value_destroy(count() - to_move); + + // Move the values from the left to the right node. + for (int i = 1; i < to_move; ++i) { + value_swap(count() - to_move + i, dest, i - 1); + value_destroy(count() - to_move + i); + } + + if (!leaf()) { + // Move the child pointers from the left to the right node. + for (int i = dest->count(); i >= 0; --i) { + dest->set_child(i + to_move, dest->child(i)); + *dest->mutable_child(i) = NULL; + } + for (int i = 1; i <= to_move; ++i) { + dest->set_child(i - 1, child(count() - to_move + i)); + *mutable_child(count() - to_move + i) = NULL; + } + } + + // Fixup the counts on the src and dest nodes. + set_count(count() - to_move); + dest->set_count(dest->count() + to_move); +} + +template +void btree_node

::split(btree_node *dest, int insert_position) { + assert(dest->count() == 0); + + // We bias the split based on the position being inserted. If we're + // inserting at the beginning of the left node then bias the split to put + // more values on the right node. If we're inserting at the end of the + // right node then bias the split to put more values on the left node. + if (insert_position == 0) { + dest->set_count(count() - 1); + } else if (insert_position == max_count()) { + dest->set_count(0); + } else { + dest->set_count(count() / 2); + } + set_count(count() - dest->count()); + assert(count() >= 1); + + // Move values from the left sibling to the right sibling. + for (int i = 0; i < dest->count(); ++i) { + dest->value_init(i); + value_swap(count() + i, dest, i); + value_destroy(count() + i); + } + + // The split key is the largest value in the left sibling. + set_count(count() - 1); + parent()->insert_value(position(), value_type()); + value_swap(count(), parent(), position()); + value_destroy(count()); + parent()->set_child(position() + 1, dest); + + if (!leaf()) { + for (int i = 0; i <= dest->count(); ++i) { + assert(child(count() + i + 1) != NULL); + dest->set_child(i, child(count() + i + 1)); + *mutable_child(count() + i + 1) = NULL; + } + } +} + +template +void btree_node

::merge(btree_node *src) { + assert(parent() == src->parent()); + assert(position() + 1 == src->position()); + + // Move the delimiting value to the left node. + value_init(count()); + value_swap(count(), parent(), position()); + + // Move the values from the right to the left node. + for (int i = 0; i < src->count(); ++i) { + value_init(1 + count() + i); + value_swap(1 + count() + i, src, i); + src->value_destroy(i); + } + + if (!leaf()) { + // Move the child pointers from the right to the left node. + for (int i = 0; i <= src->count(); ++i) { + set_child(1 + count() + i, src->child(i)); + *src->mutable_child(i) = NULL; + } + } + + // Fixup the counts on the src and dest nodes. + set_count(1 + count() + src->count()); + src->set_count(0); + + // Remove the value on the parent node. + parent()->remove_value(position()); +} + +template +void btree_node

::swap(btree_node *x) { + assert(leaf() == x->leaf()); + + // Swap the values. + for (int i = count(); i < x->count(); ++i) { + value_init(i); + } + for (int i = x->count(); i < count(); ++i) { + x->value_init(i); + } + int n = std::max(count(), x->count()); + for (int i = 0; i < n; ++i) { + value_swap(i, x, i); + } + for (int i = count(); i < x->count(); ++i) { + x->value_destroy(i); + } + for (int i = x->count(); i < count(); ++i) { + value_destroy(i); + } + + if (!leaf()) { + // Swap the child pointers. + for (int i = 0; i <= n; ++i) { + btree_swap_helper(*mutable_child(i), *x->mutable_child(i)); + } + for (int i = 0; i <= count(); ++i) { + x->child(i)->fields_.parent = x; + } + for (int i = 0; i <= x->count(); ++i) { + child(i)->fields_.parent = this; + } + } + + // Swap the counts. + btree_swap_helper(fields_.count, x->fields_.count); +} + +//// +// btree_iterator methods +template +void btree_iterator::increment_slow() { + if (node->leaf()) { + assert(position >= node->count()); + self_type save(*this); + while (position == node->count() && !node->is_root()) { + assert(node->parent()->child(node->position()) == node); + position = node->position(); + node = node->parent(); + } + if (position == node->count()) { + *this = save; + } + } else { + assert(position < node->count()); + node = node->child(position + 1); + while (!node->leaf()) { + node = node->child(0); + } + position = 0; + } +} + +template +void btree_iterator::increment_by(int count) { + while (count > 0) { + if (node->leaf()) { + int rest = node->count() - position; + position += std::min(rest, count); + count = count - rest; + if (position < node->count()) { + return; + } + } else { + --count; + } + increment_slow(); + } +} + +template +void btree_iterator::decrement_slow() { + if (node->leaf()) { + assert(position <= -1); + self_type save(*this); + while (position < 0 && !node->is_root()) { + assert(node->parent()->child(node->position()) == node); + position = node->position() - 1; + node = node->parent(); + } + if (position < 0) { + *this = save; + } + } else { + assert(position >= 0); + node = node->child(position); + while (!node->leaf()) { + node = node->child(node->count()); + } + position = node->count() - 1; + } +} + +//// +// btree methods +template +btree

::btree(const key_compare &comp, const allocator_type &alloc) + : key_compare(comp), + root_(alloc, NULL) { +} + +template +btree

::btree(const self_type &x) + : key_compare(x.key_comp()), + root_(x.internal_allocator(), NULL) { + assign(x); +} + +template template +std::pair::iterator, bool> +btree

::insert_unique(const key_type &key, ValuePointer value) { + if (empty()) { + *mutable_root() = new_leaf_root_node(1); + } + + std::pair res = internal_locate(key, iterator(root(), 0)); + iterator &iter = res.first; + if (res.second == kExactMatch) { + // The key already exists in the tree, do nothing. + return std::make_pair(internal_last(iter), false); + } else if (!res.second) { + iterator last = internal_last(iter); + if (last.node && !compare_keys(key, last.key())) { + // The key already exists in the tree, do nothing. + return std::make_pair(last, false); + } + } + + return std::make_pair(internal_insert(iter, *value), true); +} + +template +inline typename btree

::iterator +btree

::insert_unique(iterator position, const value_type &v) { + if (!empty()) { + const key_type &key = params_type::key(v); + if (position == end() || compare_keys(key, position.key())) { + iterator prev = position; + if (position == begin() || compare_keys((--prev).key(), key)) { + // prev.key() < key < position.key() + return internal_insert(position, v); + } + } else if (compare_keys(position.key(), key)) { + iterator next = position; + ++next; + if (next == end() || compare_keys(key, next.key())) { + // position.key() < key < next.key() + return internal_insert(next, v); + } + } else { + // position.key() == key + return position; + } + } + return insert_unique(v).first; +} + +template template +void btree

::insert_unique(InputIterator b, InputIterator e) { + for (; b != e; ++b) { + insert_unique(end(), *b); + } +} + +template template +typename btree

::iterator +btree

::insert_multi(const key_type &key, ValuePointer value) { + if (empty()) { + *mutable_root() = new_leaf_root_node(1); + } + + iterator iter = internal_upper_bound(key, iterator(root(), 0)); + if (!iter.node) { + iter = end(); + } + return internal_insert(iter, *value); +} + +template +typename btree

::iterator +btree

::insert_multi(iterator position, const value_type &v) { + if (!empty()) { + const key_type &key = params_type::key(v); + if (position == end() || !compare_keys(position.key(), key)) { + iterator prev = position; + if (position == begin() || !compare_keys(key, (--prev).key())) { + // prev.key() <= key <= position.key() + return internal_insert(position, v); + } + } else { + iterator next = position; + ++next; + if (next == end() || !compare_keys(next.key(), key)) { + // position.key() < key <= next.key() + return internal_insert(next, v); + } + } + } + return insert_multi(v); +} + +template template +void btree

::insert_multi(InputIterator b, InputIterator e) { + for (; b != e; ++b) { + insert_multi(end(), *b); + } +} + +template +void btree

::assign(const self_type &x) { + clear(); + + *mutable_key_comp() = x.key_comp(); + *mutable_internal_allocator() = x.internal_allocator(); + + // Assignment can avoid key comparisons because we know the order of the + // values is the same order we'll store them in. + for (const_iterator iter = x.begin(); iter != x.end(); ++iter) { + if (empty()) { + insert_multi(*iter); + } else { + // If the btree is not empty, we can just insert the new value at the end + // of the tree! + internal_insert(end(), *iter); + } + } +} + +template +typename btree

::iterator btree

::erase(iterator iter) { + bool internal_delete = false; + if (!iter.node->leaf()) { + // Deletion of a value on an internal node. Swap the key with the largest + // value of our left child. This is easy, we just decrement iter. + iterator tmp_iter(iter--); + assert(iter.node->leaf()); + assert(!compare_keys(tmp_iter.key(), iter.key())); + iter.node->value_swap(iter.position, tmp_iter.node, tmp_iter.position); + internal_delete = true; + --*mutable_size(); + } else if (!root()->leaf()) { + --*mutable_size(); + } + + // Delete the key from the leaf. + iter.node->remove_value(iter.position); + + // We want to return the next value after the one we just erased. If we + // erased from an internal node (internal_delete == true), then the next + // value is ++(++iter). If we erased from a leaf node (internal_delete == + // false) then the next value is ++iter. Note that ++iter may point to an + // internal node and the value in the internal node may move to a leaf node + // (iter.node) when rebalancing is performed at the leaf level. + + // Merge/rebalance as we walk back up the tree. + iterator res(iter); + for (;;) { + if (iter.node == root()) { + try_shrink(); + if (empty()) { + return end(); + } + break; + } + if (iter.node->count() >= kMinNodeValues) { + break; + } + bool merged = try_merge_or_rebalance(&iter); + if (iter.node->leaf()) { + res = iter; + } + if (!merged) { + break; + } + iter.node = iter.node->parent(); + } + + // Adjust our return value. If we're pointing at the end of a node, advance + // the iterator. + if (res.position == res.node->count()) { + res.position = res.node->count() - 1; + ++res; + } + // If we erased from an internal node, advance the iterator. + if (internal_delete) { + ++res; + } + return res; +} + +template +int btree

::erase(iterator begin, iterator end) { + int count = distance(begin, end); + for (int i = 0; i < count; i++) { + begin = erase(begin); + } + return count; +} + +template +int btree

::erase_unique(const key_type &key) { + iterator iter = internal_find_unique(key, iterator(root(), 0)); + if (!iter.node) { + // The key doesn't exist in the tree, return nothing done. + return 0; + } + erase(iter); + return 1; +} + +template +int btree

::erase_multi(const key_type &key) { + iterator begin = internal_lower_bound(key, iterator(root(), 0)); + if (!begin.node) { + // The key doesn't exist in the tree, return nothing done. + return 0; + } + // Delete all of the keys between begin and upper_bound(key). + iterator end = internal_end( + internal_upper_bound(key, iterator(root(), 0))); + return erase(begin, end); +} + +template +void btree

::clear() { + if (root() != NULL) { + internal_clear(root()); + } + *mutable_root() = NULL; +} + +template +void btree

::swap(self_type &x) { + std::swap(static_cast(*this), static_cast(x)); + std::swap(root_, x.root_); +} + +template +void btree

::verify() const { + if (root() != NULL) { + assert(size() == internal_verify(root(), NULL, NULL)); + assert(leftmost() == (++const_iterator(root(), -1)).node); + assert(rightmost() == (--const_iterator(root(), root()->count())).node); + assert(leftmost()->leaf()); + assert(rightmost()->leaf()); + } else { + assert(size() == 0); + assert(leftmost() == NULL); + assert(rightmost() == NULL); + } +} + +template +void btree

::rebalance_or_split(iterator *iter) { + node_type *&node = iter->node; + int &insert_position = iter->position; + assert(node->count() == node->max_count()); + + // First try to make room on the node by rebalancing. + node_type *parent = node->parent(); + if (node != root()) { + if (node->position() > 0) { + // Try rebalancing with our left sibling. + node_type *left = parent->child(node->position() - 1); + if (left->count() < left->max_count()) { + // We bias rebalancing based on the position being inserted. If we're + // inserting at the end of the right node then we bias rebalancing to + // fill up the left node. + int to_move = (left->max_count() - left->count()) / + (1 + (insert_position < left->max_count())); + to_move = std::max(1, to_move); + + if (((insert_position - to_move) >= 0) || + ((left->count() + to_move) < left->max_count())) { + left->rebalance_right_to_left(node, to_move); + + assert(node->max_count() - node->count() == to_move); + insert_position = insert_position - to_move; + if (insert_position < 0) { + insert_position = insert_position + left->count() + 1; + node = left; + } + + assert(node->count() < node->max_count()); + return; + } + } + } + + if (node->position() < parent->count()) { + // Try rebalancing with our right sibling. + node_type *right = parent->child(node->position() + 1); + if (right->count() < right->max_count()) { + // We bias rebalancing based on the position being inserted. If we're + // inserting at the beginning of the left node then we bias rebalancing + // to fill up the right node. + int to_move = (right->max_count() - right->count()) / + (1 + (insert_position > 0)); + to_move = std::max(1, to_move); + + if ((insert_position <= (node->count() - to_move)) || + ((right->count() + to_move) < right->max_count())) { + node->rebalance_left_to_right(right, to_move); + + if (insert_position > node->count()) { + insert_position = insert_position - node->count() - 1; + node = right; + } + + assert(node->count() < node->max_count()); + return; + } + } + } + + // Rebalancing failed, make sure there is room on the parent node for a new + // value. + if (parent->count() == parent->max_count()) { + iterator parent_iter(node->parent(), node->position()); + rebalance_or_split(&parent_iter); + } + } else { + // Rebalancing not possible because this is the root node. + if (root()->leaf()) { + // The root node is currently a leaf node: create a new root node and set + // the current root node as the child of the new root. + parent = new_internal_root_node(); + parent->set_child(0, root()); + *mutable_root() = parent; + assert(*mutable_rightmost() == parent->child(0)); + } else { + // The root node is an internal node. We do not want to create a new root + // node because the root node is special and holds the size of the tree + // and a pointer to the rightmost node. So we create a new internal node + // and move all of the items on the current root into the new node. + parent = new_internal_node(parent); + parent->set_child(0, parent); + parent->swap(root()); + node = parent; + } + } + + // Split the node. + node_type *split_node; + if (node->leaf()) { + split_node = new_leaf_node(parent); + node->split(split_node, insert_position); + if (rightmost() == node) { + *mutable_rightmost() = split_node; + } + } else { + split_node = new_internal_node(parent); + node->split(split_node, insert_position); + } + + if (insert_position > node->count()) { + insert_position = insert_position - node->count() - 1; + node = split_node; + } +} + +template +void btree

::merge_nodes(node_type *left, node_type *right) { + left->merge(right); + if (right->leaf()) { + if (rightmost() == right) { + *mutable_rightmost() = left; + } + delete_leaf_node(right); + } else { + delete_internal_node(right); + } +} + +template +bool btree

::try_merge_or_rebalance(iterator *iter) { + node_type *parent = iter->node->parent(); + if (iter->node->position() > 0) { + // Try merging with our left sibling. + node_type *left = parent->child(iter->node->position() - 1); + if ((1 + left->count() + iter->node->count()) <= left->max_count()) { + iter->position += 1 + left->count(); + merge_nodes(left, iter->node); + iter->node = left; + return true; + } + } + if (iter->node->position() < parent->count()) { + // Try merging with our right sibling. + node_type *right = parent->child(iter->node->position() + 1); + if ((1 + iter->node->count() + right->count()) <= right->max_count()) { + merge_nodes(iter->node, right); + return true; + } + // Try rebalancing with our right sibling. We don't perform rebalancing if + // we deleted the first element from iter->node and the node is not + // empty. This is a small optimization for the common pattern of deleting + // from the front of the tree. + if ((right->count() > kMinNodeValues) && + ((iter->node->count() == 0) || + (iter->position > 0))) { + int to_move = (right->count() - iter->node->count()) / 2; + to_move = std::min(to_move, right->count() - 1); + iter->node->rebalance_right_to_left(right, to_move); + return false; + } + } + if (iter->node->position() > 0) { + // Try rebalancing with our left sibling. We don't perform rebalancing if + // we deleted the last element from iter->node and the node is not + // empty. This is a small optimization for the common pattern of deleting + // from the back of the tree. + node_type *left = parent->child(iter->node->position() - 1); + if ((left->count() > kMinNodeValues) && + ((iter->node->count() == 0) || + (iter->position < iter->node->count()))) { + int to_move = (left->count() - iter->node->count()) / 2; + to_move = std::min(to_move, left->count() - 1); + left->rebalance_left_to_right(iter->node, to_move); + iter->position += to_move; + return false; + } + } + return false; +} + +template +void btree

::try_shrink() { + if (root()->count() > 0) { + return; + } + // Deleted the last item on the root node, shrink the height of the tree. + if (root()->leaf()) { + assert(size() == 0); + delete_leaf_node(root()); + *mutable_root() = NULL; + } else { + node_type *child = root()->child(0); + if (child->leaf()) { + // The child is a leaf node so simply make it the root node in the tree. + child->make_root(); + delete_internal_root_node(); + *mutable_root() = child; + } else { + // The child is an internal node. We want to keep the existing root node + // so we move all of the values from the child node into the existing + // (empty) root node. + child->swap(root()); + delete_internal_node(child); + } + } +} + +template template +inline IterType btree

::internal_last(IterType iter) { + while (iter.node && iter.position == iter.node->count()) { + iter.position = iter.node->position(); + iter.node = iter.node->parent(); + if (iter.node->leaf()) { + iter.node = NULL; + } + } + return iter; +} + +template +inline typename btree

::iterator +btree

::internal_insert(iterator iter, const value_type &v) { + if (!iter.node->leaf()) { + // We can't insert on an internal node. Instead, we'll insert after the + // previous value which is guaranteed to be on a leaf node. + --iter; + ++iter.position; + } + if (iter.node->count() == iter.node->max_count()) { + // Make room in the leaf for the new item. + if (iter.node->max_count() < kNodeValues) { + // Insertion into the root where the root is smaller that the full node + // size. Simply grow the size of the root node. + assert(iter.node == root()); + iter.node = new_leaf_root_node( + std::min(kNodeValues, 2 * iter.node->max_count())); + iter.node->swap(root()); + delete_leaf_node(root()); + *mutable_root() = iter.node; + } else { + rebalance_or_split(&iter); + ++*mutable_size(); + } + } else if (!root()->leaf()) { + ++*mutable_size(); + } + iter.node->insert_value(iter.position, v); + return iter; +} + +template template +inline std::pair btree

::internal_locate( + const key_type &key, IterType iter) const { + return internal_locate_type::dispatch(key, *this, iter); +} + +template template +inline std::pair btree

::internal_locate_plain_compare( + const key_type &key, IterType iter) const { + for (;;) { + iter.position = iter.node->lower_bound(key, key_comp()); + if (iter.node->leaf()) { + break; + } + iter.node = iter.node->child(iter.position); + } + return std::make_pair(iter, 0); +} + +template template +inline std::pair btree

::internal_locate_compare_to( + const key_type &key, IterType iter) const { + for (;;) { + int res = iter.node->lower_bound(key, key_comp()); + iter.position = res & kMatchMask; + if (res & kExactMatch) { + return std::make_pair(iter, static_cast(kExactMatch)); + } + if (iter.node->leaf()) { + break; + } + iter.node = iter.node->child(iter.position); + } + return std::make_pair(iter, -kExactMatch); +} + +template template +IterType btree

::internal_lower_bound( + const key_type &key, IterType iter) const { + if (iter.node) { + for (;;) { + iter.position = + iter.node->lower_bound(key, key_comp()) & kMatchMask; + if (iter.node->leaf()) { + break; + } + iter.node = iter.node->child(iter.position); + } + iter = internal_last(iter); + } + return iter; +} + +template template +IterType btree

::internal_upper_bound( + const key_type &key, IterType iter) const { + if (iter.node) { + for (;;) { + iter.position = iter.node->upper_bound(key, key_comp()); + if (iter.node->leaf()) { + break; + } + iter.node = iter.node->child(iter.position); + } + iter = internal_last(iter); + } + return iter; +} + +template template +IterType btree

::internal_find_unique( + const key_type &key, IterType iter) const { + if (iter.node) { + std::pair res = internal_locate(key, iter); + if (res.second == kExactMatch) { + return res.first; + } + if (!res.second) { + iter = internal_last(res.first); + if (iter.node && !compare_keys(key, iter.key())) { + return iter; + } + } + } + return IterType(NULL, 0); +} + +template template +IterType btree

::internal_find_multi( + const key_type &key, IterType iter) const { + if (iter.node) { + iter = internal_lower_bound(key, iter); + if (iter.node) { + iter = internal_last(iter); + if (iter.node && !compare_keys(key, iter.key())) { + return iter; + } + } + } + return IterType(NULL, 0); +} + +template +void btree

::internal_clear(node_type *node) { + if (!node->leaf()) { + for (int i = 0; i <= node->count(); ++i) { + internal_clear(node->child(i)); + } + if (node == root()) { + delete_internal_root_node(); + } else { + delete_internal_node(node); + } + } else { + delete_leaf_node(node); + } +} + +template +void btree

::internal_dump( + std::ostream &os, const node_type *node, int level) const { + for (int i = 0; i < node->count(); ++i) { + if (!node->leaf()) { + internal_dump(os, node->child(i), level + 1); + } + for (int j = 0; j < level; ++j) { + os << " "; + } + os << node->key(i) << " [" << level << "]\n"; + } + if (!node->leaf()) { + internal_dump(os, node->child(node->count()), level + 1); + } +} + +template +int btree

::internal_verify( + const node_type *node, const key_type *lo, const key_type *hi) const { + assert(node->count() > 0); + assert(node->count() <= node->max_count()); + if (lo) { + assert(!compare_keys(node->key(0), *lo)); + } + if (hi) { + assert(!compare_keys(*hi, node->key(node->count() - 1))); + } + for (int i = 1; i < node->count(); ++i) { + assert(!compare_keys(node->key(i), node->key(i - 1))); + } + int count = node->count(); + if (!node->leaf()) { + for (int i = 0; i <= node->count(); ++i) { + assert(node->child(i) != NULL); + assert(node->child(i)->parent() == node); + assert(node->child(i)->position() == i); + count += internal_verify( + node->child(i), + (i == 0) ? lo : &node->key(i - 1), + (i == node->count()) ? hi : &node->key(i)); + } + } + return count; +} + +} // namespace btree + +#endif // UTIL_BTREE_BTREE_H__ diff --git a/c_src/gb_lru/btree_container.h b/c_src/gb_lru/btree_container.h new file mode 100644 index 0000000..fb617ab --- /dev/null +++ b/c_src/gb_lru/btree_container.h @@ -0,0 +1,349 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef UTIL_BTREE_BTREE_CONTAINER_H__ +#define UTIL_BTREE_BTREE_CONTAINER_H__ + +#include +#include + +#include "btree.h" + +namespace btree { + +// A common base class for btree_set, btree_map, btree_multiset and +// btree_multimap. +template +class btree_container { + typedef btree_container self_type; + + public: + typedef typename Tree::params_type params_type; + typedef typename Tree::key_type key_type; + typedef typename Tree::value_type value_type; + typedef typename Tree::key_compare key_compare; + typedef typename Tree::allocator_type allocator_type; + typedef typename Tree::pointer pointer; + typedef typename Tree::const_pointer const_pointer; + typedef typename Tree::reference reference; + typedef typename Tree::const_reference const_reference; + typedef typename Tree::size_type size_type; + typedef typename Tree::difference_type difference_type; + typedef typename Tree::iterator iterator; + typedef typename Tree::const_iterator const_iterator; + typedef typename Tree::reverse_iterator reverse_iterator; + typedef typename Tree::const_reverse_iterator const_reverse_iterator; + + public: + // Default constructor. + btree_container(const key_compare &comp, const allocator_type &alloc) + : tree_(comp, alloc) { + } + + // Copy constructor. + btree_container(const self_type &x) + : tree_(x.tree_) { + } + + // Iterator routines. + iterator begin() { return tree_.begin(); } + const_iterator begin() const { return tree_.begin(); } + iterator end() { return tree_.end(); } + const_iterator end() const { return tree_.end(); } + reverse_iterator rbegin() { return tree_.rbegin(); } + const_reverse_iterator rbegin() const { return tree_.rbegin(); } + reverse_iterator rend() { return tree_.rend(); } + const_reverse_iterator rend() const { return tree_.rend(); } + + // Lookup routines. + iterator lower_bound(const key_type &key) { + return tree_.lower_bound(key); + } + const_iterator lower_bound(const key_type &key) const { + return tree_.lower_bound(key); + } + iterator upper_bound(const key_type &key) { + return tree_.upper_bound(key); + } + const_iterator upper_bound(const key_type &key) const { + return tree_.upper_bound(key); + } + std::pair equal_range(const key_type &key) { + return tree_.equal_range(key); + } + std::pair equal_range(const key_type &key) const { + return tree_.equal_range(key); + } + + // Utility routines. + void clear() { + tree_.clear(); + } + void swap(self_type &x) { + tree_.swap(x.tree_); + } + void dump(std::ostream &os) const { + tree_.dump(os); + } + void verify() const { + tree_.verify(); + } + + // Size routines. + size_type size() const { return tree_.size(); } + size_type max_size() const { return tree_.max_size(); } + bool empty() const { return tree_.empty(); } + size_type height() const { return tree_.height(); } + size_type internal_nodes() const { return tree_.internal_nodes(); } + size_type leaf_nodes() const { return tree_.leaf_nodes(); } + size_type nodes() const { return tree_.nodes(); } + size_type bytes_used() const { return tree_.bytes_used(); } + static double average_bytes_per_value() { + return Tree::average_bytes_per_value(); + } + double fullness() const { return tree_.fullness(); } + double overhead() const { return tree_.overhead(); } + + bool operator==(const self_type& x) const { + if (size() != x.size()) { + return false; + } + for (const_iterator i = begin(), xi = x.begin(); i != end(); ++i, ++xi) { + if (*i != *xi) { + return false; + } + } + return true; + } + + bool operator!=(const self_type& other) const { + return !operator==(other); + } + + + protected: + Tree tree_; +}; + +template +inline std::ostream& operator<<(std::ostream &os, const btree_container &b) { + b.dump(os); + return os; +} + +// A common base class for btree_set and safe_btree_set. +template +class btree_unique_container : public btree_container { + typedef btree_unique_container self_type; + typedef btree_container super_type; + + public: + typedef typename Tree::key_type key_type; + typedef typename Tree::value_type value_type; + typedef typename Tree::size_type size_type; + typedef typename Tree::key_compare key_compare; + typedef typename Tree::allocator_type allocator_type; + typedef typename Tree::iterator iterator; + typedef typename Tree::const_iterator const_iterator; + + public: + // Default constructor. + btree_unique_container(const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(comp, alloc) { + } + + // Copy constructor. + btree_unique_container(const self_type &x) + : super_type(x) { + } + + // Range constructor. + template + btree_unique_container(InputIterator b, InputIterator e, + const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(comp, alloc) { + insert(b, e); + } + + // Lookup routines. + iterator find(const key_type &key) { + return this->tree_.find_unique(key); + } + const_iterator find(const key_type &key) const { + return this->tree_.find_unique(key); + } + size_type count(const key_type &key) const { + return this->tree_.count_unique(key); + } + + // Insertion routines. + std::pair insert(const value_type &x) { + return this->tree_.insert_unique(x); + } + iterator insert(iterator position, const value_type &x) { + return this->tree_.insert_unique(position, x); + } + template + void insert(InputIterator b, InputIterator e) { + this->tree_.insert_unique(b, e); + } + + // Deletion routines. + int erase(const key_type &key) { + return this->tree_.erase_unique(key); + } + // Erase the specified iterator from the btree. The iterator must be valid + // (i.e. not equal to end()). Return an iterator pointing to the node after + // the one that was erased (or end() if none exists). + iterator erase(const iterator &iter) { + return this->tree_.erase(iter); + } + void erase(const iterator &first, const iterator &last) { + this->tree_.erase(first, last); + } +}; + +// A common base class for btree_map and safe_btree_map. +template +class btree_map_container : public btree_unique_container { + typedef btree_map_container self_type; + typedef btree_unique_container super_type; + + public: + typedef typename Tree::key_type key_type; + typedef typename Tree::data_type data_type; + typedef typename Tree::value_type value_type; + typedef typename Tree::mapped_type mapped_type; + typedef typename Tree::key_compare key_compare; + typedef typename Tree::allocator_type allocator_type; + + private: + // A pointer-like object which only generates its value when + // dereferenced. Used by operator[] to avoid constructing an empty data_type + // if the key already exists in the map. + struct generate_value { + generate_value(const key_type &k) + : key(k) { + } + value_type operator*() const { + return std::make_pair(key, data_type()); + } + const key_type &key; + }; + + public: + // Default constructor. + btree_map_container(const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(comp, alloc) { + } + + // Copy constructor. + btree_map_container(const self_type &x) + : super_type(x) { + } + + // Range constructor. + template + btree_map_container(InputIterator b, InputIterator e, + const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(b, e, comp, alloc) { + } + + // Insertion routines. + data_type& operator[](const key_type &key) { + return this->tree_.insert_unique(key, generate_value(key)).first->second; + } +}; + +// A common base class for btree_multiset and btree_multimap. +template +class btree_multi_container : public btree_container { + typedef btree_multi_container self_type; + typedef btree_container super_type; + + public: + typedef typename Tree::key_type key_type; + typedef typename Tree::value_type value_type; + typedef typename Tree::size_type size_type; + typedef typename Tree::key_compare key_compare; + typedef typename Tree::allocator_type allocator_type; + typedef typename Tree::iterator iterator; + typedef typename Tree::const_iterator const_iterator; + + public: + // Default constructor. + btree_multi_container(const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(comp, alloc) { + } + + // Copy constructor. + btree_multi_container(const self_type &x) + : super_type(x) { + } + + // Range constructor. + template + btree_multi_container(InputIterator b, InputIterator e, + const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(comp, alloc) { + insert(b, e); + } + + // Lookup routines. + iterator find(const key_type &key) { + return this->tree_.find_multi(key); + } + const_iterator find(const key_type &key) const { + return this->tree_.find_multi(key); + } + size_type count(const key_type &key) const { + return this->tree_.count_multi(key); + } + + // Insertion routines. + iterator insert(const value_type &x) { + return this->tree_.insert_multi(x); + } + iterator insert(iterator position, const value_type &x) { + return this->tree_.insert_multi(position, x); + } + template + void insert(InputIterator b, InputIterator e) { + this->tree_.insert_multi(b, e); + } + + // Deletion routines. + int erase(const key_type &key) { + return this->tree_.erase_multi(key); + } + // Erase the specified iterator from the btree. The iterator must be valid + // (i.e. not equal to end()). Return an iterator pointing to the node after + // the one that was erased (or end() if none exists). + iterator erase(const iterator &iter) { + return this->tree_.erase(iter); + } + void erase(const iterator &first, const iterator &last) { + this->tree_.erase(first, last); + } +}; + +} // namespace btree + +#endif // UTIL_BTREE_BTREE_CONTAINER_H__ diff --git a/c_src/gb_lru/btree_map.h b/c_src/gb_lru/btree_map.h new file mode 100644 index 0000000..b83489f --- /dev/null +++ b/c_src/gb_lru/btree_map.h @@ -0,0 +1,130 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// A btree_map<> implements the STL unique sorted associative container +// interface and the pair associative container interface (a.k.a map<>) using a +// btree. A btree_multimap<> implements the STL multiple sorted associative +// container interface and the pair associtive container interface (a.k.a +// multimap<>) using a btree. See btree.h for details of the btree +// implementation and caveats. + +#ifndef UTIL_BTREE_BTREE_MAP_H__ +#define UTIL_BTREE_BTREE_MAP_H__ + +#include +#include +#include +#include +#include + +#include "btree.h" +#include "btree_container.h" + +namespace btree { + +// The btree_map class is needed mainly for its constructors. +template , + typename Alloc = std::allocator >, + int TargetNodeSize = 256> +class btree_map : public btree_map_container< + btree > > { + + typedef btree_map self_type; + typedef btree_map_params< + Key, Value, Compare, Alloc, TargetNodeSize> params_type; + typedef btree btree_type; + typedef btree_map_container super_type; + + public: + typedef typename btree_type::key_compare key_compare; + typedef typename btree_type::allocator_type allocator_type; + + public: + // Default constructor. + btree_map(const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(comp, alloc) { + } + + // Copy constructor. + btree_map(const self_type &x) + : super_type(x) { + } + + // Range constructor. + template + btree_map(InputIterator b, InputIterator e, + const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(b, e, comp, alloc) { + } +}; + +template +inline void swap(btree_map &x, + btree_map &y) { + x.swap(y); +} + +// The btree_multimap class is needed mainly for its constructors. +template , + typename Alloc = std::allocator >, + int TargetNodeSize = 256> +class btree_multimap : public btree_multi_container< + btree > > { + + typedef btree_multimap self_type; + typedef btree_map_params< + Key, Value, Compare, Alloc, TargetNodeSize> params_type; + typedef btree btree_type; + typedef btree_multi_container super_type; + + public: + typedef typename btree_type::key_compare key_compare; + typedef typename btree_type::allocator_type allocator_type; + typedef typename btree_type::data_type data_type; + typedef typename btree_type::mapped_type mapped_type; + + public: + // Default constructor. + btree_multimap(const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(comp, alloc) { + } + + // Copy constructor. + btree_multimap(const self_type &x) + : super_type(x) { + } + + // Range constructor. + template + btree_multimap(InputIterator b, InputIterator e, + const key_compare &comp = key_compare(), + const allocator_type &alloc = allocator_type()) + : super_type(b, e, comp, alloc) { + } +}; + +template +inline void swap(btree_multimap &x, + btree_multimap &y) { + x.swap(y); +} + +} // namespace btree + +#endif // UTIL_BTREE_BTREE_MAP_H__ diff --git a/c_src/gb_lru/btreelru_nif.cpp b/c_src/gb_lru/btreelru_nif.cpp new file mode 100644 index 0000000..ce0712d --- /dev/null +++ b/c_src/gb_lru/btreelru_nif.cpp @@ -0,0 +1,619 @@ +#include +#include +#include +#include "erl_nif.h" +#include "erlterm.h" +#include "lru.h" + + +using namespace std; + +namespace { /* anonymous namespace starts */ + +typedef struct _obj_resource { + bool allocated; + void *object; + ErlNifMutex *emtx; +} object_resource; + +ErlNifResourceFlags resource_flags = (ErlNifResourceFlags)(ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER); + +ErlNifResourceType* lruResource; +ErlNifResourceType* iteratorResource; + +/* atoms */ +ERL_NIF_TERM atom_ok; +ERL_NIF_TERM atom_key; +ERL_NIF_TERM atom_error; +ERL_NIF_TERM atom_invalid; +ERL_NIF_TERM atom_value; +ERL_NIF_TERM atom_max_size; +ERL_NIF_TERM atom_tab; +ERL_NIF_TERM atom_lru_old; + +void lru_dtor(ErlNifEnv* env, void *lru); +void iterator_dtor(ErlNifEnv* env, void *it); + +int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info){ + lruResource = enif_open_resource_type(env, + "btreelru_nif", + "lru", + lru_dtor, + resource_flags, + NULL); + + iteratorResource = enif_open_resource_type(env, + "btreelru_nif", + "iterator", + iterator_dtor, + resource_flags, + NULL); + + atom_ok = enif_make_atom(env, "ok"); + atom_key = enif_make_atom(env, "key"); + atom_error = enif_make_atom(env, "error"); + atom_invalid = enif_make_atom(env, "invalid"); + atom_value = enif_make_atom(env, "value"); + atom_max_size = enif_make_atom(env, "max_size"); + atom_tab = enif_make_atom(env, "tab"); + atom_lru_old = enif_make_atom(env, "lru_old"); + + return 0; +} + +int reload(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info){ + return 0; +} + +int upgrade(ErlNifEnv* env, void** priv_data, void** old_priv_data,ERL_NIF_TERM load_info){ + return 0; +} + +void lru_dtor(ErlNifEnv* env, void* _lru_btree) { + object_resource *lru_btree = (object_resource*) _lru_btree; + if (lru_btree->allocated) + delete (LRUBtree*) lru_btree->object; +} + +void iterator_dtor(ErlNifEnv* env, void* _lru_iterator) { + object_resource *lru_iterator = (object_resource*) _lru_iterator; + if (lru_iterator->allocated) + delete (LRUBtree::iterator*) lru_iterator->object; +} + +void node_free(LRUBtree *bt_lru, LRUNode *node) { + enif_free_env((ErlNifEnv*)node->kvenv); + return; +} + +void node_kickout(LRUBtree *bt_lru, LRUNode *node, void *currenv) { + ErlNifEnv *env = (ErlNifEnv *) currenv; + + if (bt_lru->pid_set) { + enif_send(env, &bt_lru->pid, NULL, enif_make_tuple3(env, atom_lru_old, node->key.t, node->data.t)); + } + + return; +} + +ERL_NIF_TERM next(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + object_resource *lru; + LRUBtree *bt_lru; + LRUNode *node; + ErlTerm key; + ErlTerm value; + + if (argc != 2) { + return enif_make_badarg(env); + } + + if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) { + return enif_make_badarg(env); + } + + bt_lru = (LRUBtree *) lru->object; + + key.t = argv[1]; + node = bt_lru->get(key); + + if (!node) + return enif_make_tuple2(env, atom_error, atom_invalid); + + node = node->next; + if (!node) + return enif_make_tuple2(env, atom_error, atom_invalid); + + key.t = enif_make_copy(env, node->key.t); + value.t = enif_make_copy(env, node->data.t); + + return enif_make_tuple2(env, key.t, value.t); +} + +ERL_NIF_TERM prev(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + object_resource *lru; + LRUBtree *bt_lru; + LRUNode *node; + ErlTerm key; + ErlTerm value; + + if (argc != 2) { + return enif_make_badarg(env); + } + + if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) { + return enif_make_badarg(env); + } + + bt_lru = (LRUBtree *) lru->object; + + key.t = argv[1]; + node = bt_lru->get(key); + + if (!node) + return enif_make_tuple2(env, atom_error, atom_invalid); + + node = node->prev; + if (!node) + return enif_make_tuple2(env, atom_error, atom_invalid); + + key.t = enif_make_copy(env, node->key.t); + value.t = enif_make_copy(env, node->data.t); + + return enif_make_tuple2(env, key.t, value.t); +} + + +ERL_NIF_TERM create(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + unsigned long max_size; + object_resource *lru; + LRUBtree *bt_lru; + ERL_NIF_TERM lru_term; + + /* get max_size */ + if (enif_get_ulong(env, argv[0], &max_size) < 1){ + return enif_make_tuple2(env, atom_error, atom_max_size); + } + + if (!(bt_lru = new LRUBtree(max_size, node_free, node_kickout))) { + return enif_make_tuple2(env, atom_error, enif_make_atom(env, "alloction")); + } + + lru = (object_resource *) enif_alloc_resource(lruResource, sizeof(object_resource)); + lru->object = bt_lru; + lru->allocated = true; + + lru_term = enif_make_resource(env, lru); + enif_release_resource(lru); + + return enif_make_tuple2(env, atom_ok, lru_term); + +} + +ERL_NIF_TERM seek(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + object_resource *lru; + object_resource *it; + LRUBtree *bt_lru; + LRUBtree::iterator *bt_it_; + LRUBtree::iterator bt_it; + ErlTerm key; + ERL_NIF_TERM it_term; + ERL_NIF_TERM kv; + + if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) { + return enif_make_badarg(env); + } + + key.t = argv[1]; + + bt_lru = (LRUBtree *)lru->object; + + bt_it = bt_lru->bmap.lower_bound(key); + if ( bt_it == bt_lru->bmap.end() ) { + return enif_make_tuple2(env, atom_error, atom_invalid); + } + + + bt_it_ = new LRUBtree::iterator; + *bt_it_ = bt_it; + it = (object_resource *) enif_alloc_resource(iteratorResource, sizeof(object_resource)); + it->object = bt_it_; + it->allocated = true; + + it_term = enif_make_resource(env, it); + enif_release_resource(it); + kv = enif_make_tuple2(env, bt_it->second->key.t, bt_it->second->data.t); + return enif_make_tuple2(env, kv, it_term); + +} + +ERL_NIF_TERM iterate_next(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + object_resource *lru; + object_resource *it; + LRUBtree::iterator *bt_it_; + LRUBtree *bt_lru; + ERL_NIF_TERM kv; + + if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) { + return enif_make_badarg(env); + } + + if (!enif_get_resource(env, argv[1], iteratorResource, (void **) &it)) { + return enif_make_badarg(env); + } + + bt_lru = (LRUBtree *)lru->object; + bt_it_ = (LRUBtree::iterator *) it->object; + + if (bt_it_ == NULL) + return enif_make_tuple2(env, atom_error, atom_invalid); + + (*bt_it_)++; + + if ( *bt_it_ == bt_lru->bmap.end() ) { + it->allocated = false; + delete bt_it_; + it->object = NULL; + return enif_make_tuple2(env, atom_error, atom_invalid); + } + + kv = enif_make_tuple2(env, (*bt_it_)->second->key.t, (*bt_it_)->second->data.t); + return enif_make_tuple2(env, atom_ok, kv); +} + +ERL_NIF_TERM close(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + object_resource *lru; + LRUBtree *bt_lru; + + if (argc != 1) { + return enif_make_badarg(env); + } + + if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) { + return enif_make_badarg(env); + } + + bt_lru = (LRUBtree *)lru->object; + lru->allocated = false; + delete bt_lru; + + + return atom_ok; +} + +ERL_NIF_TERM read(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + object_resource *lru; + LRUBtree *bt_lru; + LRUNode *node; + ErlTerm key; + ERL_NIF_TERM kv; + + if (argc != 2) { + return enif_make_badarg(env); + } + + if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) { + return enif_make_badarg(env); + } + + bt_lru = (LRUBtree *) lru->object; + + key.t = argv[1]; + node = bt_lru->get(key); + + if (!node) + return enif_make_tuple2(env, atom_error, atom_invalid); + + kv = enif_make_tuple2(env, enif_make_copy(env, node->key.t), enif_make_copy(env, node->data.t)); + + return enif_make_tuple2(env, atom_ok, kv); +} + +ERL_NIF_TERM remove(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + object_resource *lru; + LRUBtree *bt_lru; + ErlTerm key; + + if (argc != 2) { + return enif_make_badarg(env); + } + + if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) { + return enif_make_badarg(env); + } + + bt_lru = (LRUBtree *) lru->object; + + key.t = argv[1]; + bt_lru->erase(key); + + return atom_ok; +} + +ERL_NIF_TERM oldest(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + object_resource *lru; + LRUBtree *bt_lru; + LRUNode *node; + ERL_NIF_TERM key; + ERL_NIF_TERM value; + + if (argc != 1) { + return enif_make_badarg(env); + } + + if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) { + return enif_make_badarg(env); + } + + bt_lru = (LRUBtree *) lru->object; + + node = bt_lru->getOldest(); + + if (!node) + return enif_make_tuple2(env, atom_error, atom_invalid); + + key = enif_make_copy(env, node->key.t); + value = enif_make_copy(env, node->data.t); + + return enif_make_tuple2(env, key, value); +} + +ERL_NIF_TERM latest(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + object_resource *lru; + LRUBtree *bt_lru; + LRUNode *node; + ERL_NIF_TERM key; + ERL_NIF_TERM value; + + if (argc != 1) { + return enif_make_badarg(env); + } + + if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) { + return enif_make_badarg(env); + } + + bt_lru = (LRUBtree *) lru->object; + + // last is "last in" in the lru + node = bt_lru->getLatest(); + + if (!node) + return enif_make_tuple2(env, atom_error, atom_invalid); + + key = enif_make_copy(env, node->key.t); + value = enif_make_copy(env, node->data.t); + + return enif_make_tuple2(env, key, value); +} + +ERL_NIF_TERM last(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + object_resource *lru; + LRUBtree *bt_lru; + LRUNode *node; + ERL_NIF_TERM key; + ERL_NIF_TERM value; + + if (argc != 1) { + return enif_make_badarg(env); + } + + if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) { + return enif_make_badarg(env); + } + + bt_lru = (LRUBtree *) lru->object; + + node = bt_lru->bmap.rbegin()->second; + + if (!node) + return enif_make_tuple2(env, atom_error, atom_invalid); + + key = enif_make_copy(env, node->key.t); + value = enif_make_copy(env, node->data.t); + + return enif_make_tuple2(env, key, value); +} + +ERL_NIF_TERM first(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + object_resource *lru; + LRUBtree *bt_lru; + LRUNode *node; + ERL_NIF_TERM key; + ERL_NIF_TERM value; + + if (argc != 1) { + return enif_make_badarg(env); + } + + if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) { + return enif_make_badarg(env); + } + + bt_lru = (LRUBtree *) lru->object; + + node = bt_lru->bmap.begin()->second; + + if (!node) + return enif_make_tuple2(env, atom_error, atom_invalid); + + key = enif_make_copy(env, node->key.t); + value = enif_make_copy(env, node->data.t); + + return enif_make_tuple2(env, key, value); +} + +ERL_NIF_TERM write(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + object_resource *lru; + LRUBtree *bt_lru; + ErlTerm key; + ErlTerm value; + ErlNifEnv *kv_env; + size_t size; + + if (argc != 3) { + return enif_make_badarg(env); + } + + if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) { + return enif_make_badarg(env); + } + + bt_lru = (LRUBtree *) lru->object; + + kv_env = enif_alloc_env(); + key.t = enif_make_copy(kv_env, argv[1]); + value.t = enif_make_copy(kv_env, argv[2]); + + /* do not use the size of term + size = enif_size_term(key.t); + size += enif_size_term(value.t); + */ + + /* size based on entries */ + size = 1; + + bt_lru->put(key, value, kv_env, env, size); + + return atom_ok; +} + +ERL_NIF_TERM register_pid(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + object_resource *lru; + LRUBtree *bt_lru; + + if (argc != 2) { + return enif_make_badarg(env); + } + + if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) { + return enif_make_badarg(env); + } + bt_lru = (LRUBtree *) lru->object; + + if (!enif_get_local_pid(env, argv[1], &(bt_lru->pid))) { + return enif_make_badarg(env); + } + bt_lru->pid_set = true; + + return atom_ok; +} + +ERL_NIF_TERM unregister_pid(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + object_resource *lru; + LRUBtree *bt_lru; + + if (argc != 1) { + return enif_make_badarg(env); + } + + if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) { + return enif_make_badarg(env); + } + bt_lru = (LRUBtree *) lru->object; + + bt_lru->pid_set = false; + + return atom_ok; +} + +ERL_NIF_TERM get_registered_pid(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + object_resource *lru; + LRUBtree *bt_lru; + + if (argc != 1) { + return enif_make_badarg(env); + } + + if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) { + return enif_make_badarg(env); + } + bt_lru = (LRUBtree *) lru->object; + + if (!bt_lru->pid_set) { + return enif_make_tuple2(env, atom_error, atom_invalid); + } + + return enif_make_pid(env, &(bt_lru->pid)); +} + +ERL_NIF_TERM get_size(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + object_resource *lru; + LRUBtree *bt_lru; + + if (argc != 1) { + return enif_make_badarg(env); + } + + if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) { + return enif_make_badarg(env); + } + bt_lru = (LRUBtree *) lru->object; + + return enif_make_ulong(env, bt_lru->getSize()); +} + +ERL_NIF_TERM get_max_size(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + object_resource *lru; + LRUBtree *bt_lru; + + if (argc != 1) { + return enif_make_badarg(env); + } + + if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) { + return enif_make_badarg(env); + } + bt_lru = (LRUBtree *) lru->object; + + return enif_make_ulong(env, bt_lru->getMaxSize()); +} + +ERL_NIF_TERM set_max_size(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + object_resource *lru; + unsigned long max_size; + LRUBtree *bt_lru; + + if (argc != 2) { + return enif_make_badarg(env); + } + + if (!enif_get_resource(env, argv[0], lruResource, (void **) &lru)) { + return enif_make_badarg(env); + } + /* get max_size */ + if (enif_get_ulong(env, argv[1], &max_size) < 1){ + return enif_make_tuple2(env, atom_error, atom_max_size); + } + + bt_lru = (LRUBtree *) lru->object; + + bt_lru->setMaxSize(max_size); + + return atom_ok; +} + +ErlNifFunc nif_funcs[] = { + {"create", 1, create}, + {"close", 1, close, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"register_pid", 2, register_pid}, + {"unregister_pid", 1, unregister_pid}, + {"get_registered_pid", 1, get_registered_pid}, + {"get_size", 1, get_size}, + {"get_max_size", 1, get_max_size}, + {"set_max_size", 2, set_max_size}, + {"oldest", 1, oldest}, + {"latest", 1, latest}, + {"last", 1, last}, + {"first", 1, first}, + {"read", 2, read}, + {"next", 2, next}, + {"prev", 2, prev}, + {"seek", 2, seek}, + {"iterate_next", 2, iterate_next}, + {"remove", 2, remove}, + {"write", 3, write} +}; +} /* anonymouse namespace ends */ + + +ERL_NIF_INIT(btree_lru, nif_funcs, load, reload, upgrade, NULL) diff --git a/c_src/gb_lru/erlterm.h b/c_src/gb_lru/erlterm.h new file mode 100644 index 0000000..42627c8 --- /dev/null +++ b/c_src/gb_lru/erlterm.h @@ -0,0 +1,71 @@ +#include "erl_nif.h" + +class ErlTerm { + public: + ERL_NIF_TERM t; + + static void *operator new(size_t size) { + return enif_alloc(size); + } + + static void operator delete(void *block) { + enif_free(block); + } + + bool operator< (const ErlTerm &term) { + if (enif_compare(t, term.t) < 0) + return true; + return false; + } + + bool operator< (ErlTerm &term) { + if (enif_compare(t, term.t) < 0) + return true; + return false; + } + + bool operator> (const ErlTerm &term) { + if (enif_compare(t, term.t) > 0) + return true; + return false; + } + + bool operator> (ErlTerm &term) { + if (enif_compare(t, term.t) > 0) + return true; + return false; + } + + bool operator== (const ErlTerm &term) { + if (enif_compare(t, term.t) == 0) + return true; + return false; + } + + bool operator== (ErlTerm &term) { + if (enif_compare(t, term.t) == 0) + return true; + return false; + } +}; + +inline bool operator < (const ErlTerm &a, const ErlTerm &b) { + if (enif_compare(a.t, b.t) < 0) + return true; + return false; +} + + +#if 0 +// extend std::hash to understand ErlTerm used by hashmap not btree +namespace std { + template <> + struct hash + { + size_t operator()(const ErlTerm& term) const + { + return (size_t) enif_hash_term(term.t); + } + }; +} +#endif diff --git a/c_src/gb_lru/lru.h b/c_src/gb_lru/lru.h new file mode 100644 index 0000000..47567e4 --- /dev/null +++ b/c_src/gb_lru/lru.h @@ -0,0 +1,266 @@ +#include "btree_map.h" +#include +#include +#include "murmurhash2.h" +#include "binary.h" +#include "erl_nif.h" + +// extend std::hash to understand Binary type +namespace std { + template <> + struct hash + { + size_t operator()(const Binary& b) const + { + return MurmurHash2(b.bin, b.size, 4242); + } + }; +} + +template +struct LRUNode +{ + K key; + V data; + void *kvenv; + LRUNode *prev; + LRUNode *next; + size_t size; + LRUNode(void *kvenv = NULL, size_t size=0) : kvenv(kvenv), prev(NULL), next(NULL), size(size) { } + +/* + static void *LRUNode::operator new(size_t size) { + return enif_alloc(size); + } + + static void operator delete(void *block) { + enif_free(block); + } +*/ + + void printChain() { + LRUNode* node; + int i=11; + std::cout << "("; + for(node = this; node && i; node = node->next, i--) { + std::cout << node->key << " -> "; + } + if (node) { + std::cout << " loop detection end "; + } else { + std::cout << " end "; + } + std::cout << ")" << std::endl; + } + + void printNextPrevKey() { + std::cout << "("; + printNextKey(); + printPrevKey(); + std::cout << ")"; + } + + void printNextKey() { + if (next) { + std::cout << "next key " << next->key << " "; + } + } + + void printPrevKey() { + if (prev) { + std::cout << "prev key " << prev->key << " "; + } + } +}; + +template +class LRUBtree { + private: + LRUNode *oldest; + LRUNode *latest; + unsigned long size; + unsigned long max_size; + void (*node_free)(LRUBtree *lru, LRUNode *node); + void (*node_kickout)(LRUBtree *lru, LRUNode *node, void *call_env); + typedef btree::btree_map*> LRUBtree_map; + + public: + LRUBtree_map bmap; + bool pid_set = false; + ErlNifPid pid; + typedef typename LRUBtree_map::iterator iterator; + typedef typename LRUBtree_map::reverse_iterator reverse_iterator; + + void printLatest() { + if (latest) { + std::cout << " latest " << latest->key; + } else { + std::cout << " no data in lru "; + } + } + + private: + LRUNode* erase(LRUNode *node) { + if (node->next) { + node->next->prev = node->prev; + } + if (node->prev) { + node->prev->next = node->next; + } + + if (node == oldest) { + oldest = node->prev; + } + + if (node == latest) { + latest = node->next; + } + + if (node_free) { + node_free(this, node); + } + + node->next = NULL; + node->prev = NULL; + return node; + } + + void printOldest() { + if(oldest) { + std::cout << " oldest " << oldest->key; + } else { + std::cout << " no data in lru "; + } + } + + void check_size(void *call_env) { + if (size > max_size) { + if (oldest) { // remove check if oldest exist and rely on max_size always being positive + if (node_kickout) + node_kickout(this, oldest, call_env); + erase(oldest->key); + } + } + } + +#define SIZE_100MB 100*1024*1024 + public: + LRUBtree(unsigned long max_size = SIZE_100MB, + void (*node_free)(LRUBtree *lru, LRUNode *node) = NULL, + void (*node_kickout)(LRUBtree *lru, LRUNode *node, void *call_env) = NULL) + : oldest(NULL), latest(NULL), size(0), max_size(max_size), node_free(node_free), + node_kickout(node_kickout) { } + + ~LRUBtree() { + LRUNode *node; + LRUNode *next; + node = latest; + while(node) { + if (node_free) { + node_free(this, node); + } + next = node->next; + delete node; + node = next; + } + } + + void printSize() { + std::cout << "size " << size << std::endl; + } + + unsigned long getSize() { + return size; + } + + unsigned long getMaxSize() { + return max_size; + } + + void setMaxSize(unsigned long max_size) { + this->max_size = max_size; + } + + void erase(K key) { + LRUNode *node; + if ((node = bmap[key])) { + erase(node); + bmap.erase(key); + size -= node->size; + delete node; + } + } + + inline void put(K key, V data, + void *kvenv = NULL, void *call_env = NULL, + size_t size = 1) { + LRUNode *node; + + this->size += size; + check_size(call_env); + + // overwrite already existing key + if ((node = bmap[key])) { + this->size -= node->size; + erase(node); + node->kvenv = kvenv; + node->next = latest; + node->size = size; + if (node->next) { + node->next->prev = node; + } + if (!oldest) { + oldest = node; + } + latest = node; + node->key = key; + node->data = data; + } + + else if (!oldest) { + node = new LRUNode; + node->key = key; + node->data = data; + node->kvenv = kvenv; + node->size = size; + oldest = node; + latest = node; + bmap[node->key] = node; + } + + else { + node = new LRUNode; + node->key = key; + node->data = data; + node->kvenv = kvenv; + node->size = size; + latest->prev = node; + node->next = latest; + latest = node; + bmap[node->key] = node; + } + } + + LRUNode* get(K key) { + return bmap[key]; + } + + LRUNode* getOldest() { + return oldest; + } + + LRUNode* getLatest() { + return latest; + } + + LRUNode* getNext(LRUNode *node) { + return node->next; + } + + LRUNode* getPrev(LRUNode *node) { + return node->prev; + + } +}; + + diff --git a/c_src/gb_lru/murmurhash2.h b/c_src/gb_lru/murmurhash2.h new file mode 100644 index 0000000..12542a5 --- /dev/null +++ b/c_src/gb_lru/murmurhash2.h @@ -0,0 +1,73 @@ +//----------------------------------------------------------------------------- +// MurmurHash2, by Austin Appleby + +// Note - This code makes a few assumptions about how your machine behaves - + +// 1. We can read a 4-byte value from any address without crashing +// 2. sizeof(int) == 4 + +// And it has a few limitations - + +// 1. It will not wo +// +// rk incrementally. +// 2. It will not produce the same results on little-endian and big-endian +// machines. + +unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed ) +{ + // 'm' and 'r' are mixing constants generated offline. + // They're not really 'magic', they just happen to work well. + + const unsigned int m = 0x5bd1e995; + const int r = 24; + + // Initialize the hash to a 'random' value + + unsigned int h = seed ^ len; + + // Mix 4 bytes at a time into the hash + + const unsigned char * data = (const unsigned char *)key; + + while(len >= 4) + { + + unsigned int k = *(unsigned int *)data; + + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + // Handle the last few bytes of the input array + + switch(len) + { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; + h *= m; + }; + + // Do a few final mixes of t + // + // + // + // he hash to ensure the last few + // bytes are well-incorporated. + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + diff --git a/c_src/gb_lru/rebar.config b/c_src/gb_lru/rebar.config new file mode 100644 index 0000000..0ffcccf --- /dev/null +++ b/c_src/gb_lru/rebar.config @@ -0,0 +1,7 @@ +{port_specs, [ + {"../../priv/btreelru_nif.so", ["btreelru_nif.cpp"]} +]}. + + + + diff --git a/c_src/native_array/native_array_nif.c b/c_src/native_array/native_array_nif.c new file mode 100644 index 0000000..6ef3b5b --- /dev/null +++ b/c_src/native_array/native_array_nif.c @@ -0,0 +1,90 @@ +#include "erl_nif.h" + +#define A_OK(env) enif_make_atom(env, "ok") +#define assert_badarg(S, Env) if (! S) { return enif_make_badarg(env); } + +static ErlNifResourceType* array_handle = NULL; + +static void array_handle_cleanup(ErlNifEnv* env, void* arg) {} + +static int load(ErlNifEnv* env, void** priv, ERL_NIF_TERM load_info) +{ + ErlNifResourceFlags flags = ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER; + array_handle = enif_open_resource_type(env, "native_array_nif", "array_handle", + &array_handle_cleanup, flags, 0); + // 用于存储指针的数组, 最多1000个array + *priv = enif_alloc(1000 * sizeof(void*)); + return 0; +} + +static void unload(ErlNifEnv* env, void* priv) +{ + enif_free(priv); +} + +static ERL_NIF_TERM new_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + // 取参数 + int refindex; + assert_badarg(enif_get_int(env, argv[0], &refindex), env); + // 取参数length + unsigned long length; + assert_badarg(enif_get_ulong(env, argv[1], &length), env); + // 分配内存 + // unsigned char* ref = enif_alloc_resource(array_handle, length); + unsigned char* ref = enif_alloc(length); + // 保存指针 + *((unsigned char**)enif_priv_data(env) + refindex) = ref; + return A_OK(env); +} + +static ERL_NIF_TERM get_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + // 取参数ref + int refindex; + assert_badarg(enif_get_int(env, argv[0], &refindex), env); + unsigned char* ref = *((unsigned char**)enif_priv_data(env) + refindex); + assert_badarg(ref, env); + // 取参数offset + unsigned long offset; + assert_badarg(enif_get_ulong(env, argv[1], &offset), env); + return enif_make_int(env, (int)(*(ref + offset - 1))); +} + +static ERL_NIF_TERM put_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + // 取参数ref + int refindex; + assert_badarg(enif_get_int(env, argv[0], &refindex), env); + unsigned char* ref = *((unsigned char**)enif_priv_data(env) + refindex); + // 取参数offset + unsigned long offset; + assert_badarg(enif_get_ulong(env, argv[1], &offset), env); + // 取参数newval + unsigned int newval; + assert_badarg(enif_get_uint(env, argv[2], &newval), env); + // 赋值 + *(ref + offset - 1) = (unsigned char)newval; + return A_OK(env); +} + +static ERL_NIF_TERM delete_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + // 取参数ref + int refindex; + assert_badarg(enif_get_int(env, argv[0], &refindex), env); + unsigned char* ref = *((unsigned char**)enif_priv_data(env) + refindex); + //enif_release_resource(ref); + enif_free(ref); + return A_OK(env); +} + +static ErlNifFunc nif_funcs[] = { + {"new", 2, new_nif}, + {"get", 2, get_nif}, + {"put", 3, put_nif}, + {"delete", 1, delete_nif}, +}; + +ERL_NIF_INIT(native_array, nif_funcs, &load, NULL, NULL, &unload) + diff --git a/c_src/native_array/rebar.config b/c_src/native_array/rebar.config new file mode 100644 index 0000000..c6f912b --- /dev/null +++ b/c_src/native_array/rebar.config @@ -0,0 +1,7 @@ +{port_specs, [ + {"../../priv/native_array_nif.so", ["*.c"]} +]}. + + + + diff --git a/c_src/neural/NeuralTable.cpp b/c_src/neural/NeuralTable.cpp new file mode 100644 index 0000000..b8df3ce --- /dev/null +++ b/c_src/neural/NeuralTable.cpp @@ -0,0 +1,905 @@ +#include "NeuralTable.h" + +/* !!!! A NOTE ON KEYS !!!! + * Keys should be integer values passed from the erlang emulator, + * and should be generated by a hashing function. There is no easy + * way to hash an erlang term from a NIF, but ERTS is more than + * capable of doing so. + * + * Additionally, this workaround means that traditional collision + * handling mechanisms for hash tables will not work without + * special consideration. For instance, to compare keys as you + * would by storing linked lists, you must retrieve the stored + * tuple and call enif_compare or enif_is_identical on the key + * elements of each tuple. + */ + +table_set NeuralTable::tables; +atomic NeuralTable::running(true); +ErlNifMutex *NeuralTable::table_mutex; + +NeuralTable::NeuralTable(unsigned int kp) { + for (int i = 0; i < BUCKET_COUNT; ++i) { + ErlNifEnv *env = enif_alloc_env(); + env_buckets[i] = env; + locks[i] = enif_rwlock_create("neural_table"); + garbage_cans[i] = 0; + reclaimable[i] = enif_make_list(env, 0); + } + + start_gc(); + start_batch(); + + key_pos = kp; +} + +NeuralTable::~NeuralTable() { + stop_batch(); + stop_gc(); + for (int i = 0; i < BUCKET_COUNT; ++i) { + enif_rwlock_destroy(locks[i]); + enif_free_env(env_buckets[i]); + } +} + +/* ================================================================ + * MakeTable + * Allocates a new table, assuming a unique atom identifier. This + * table is stored in a static container. All interactions with + * the table must be performed through the static class API. + */ +ERL_NIF_TERM NeuralTable::MakeTable(ErlNifEnv *env, ERL_NIF_TERM name, ERL_NIF_TERM key_pos) { + char *atom; + string key; + unsigned int len = 0, + pos = 0; + ERL_NIF_TERM ret; + + // Allocate space for the name of the table + enif_get_atom_length(env, name, &len, ERL_NIF_LATIN1); + atom = (char*)enif_alloc(len + 1); + + // Fetch the value of the atom and store it in a string (because I can, that's why) + enif_get_atom(env, name, atom, len + 1, ERL_NIF_LATIN1); + key = atom; + + // Deallocate that space + enif_free(atom); + + // Get the key position value + enif_get_uint(env, key_pos, &pos); + + enif_mutex_lock(table_mutex); + if (NeuralTable::tables.find(key) != NeuralTable::tables.end()) { + // Table already exists? Bad monkey! + ret = enif_make_badarg(env); + } else { + // All good. Make the table + NeuralTable::tables[key] = new NeuralTable(pos); + ret = enif_make_atom(env, "ok"); + } + enif_mutex_unlock(table_mutex); + + return ret; +} + +/* ================================================================ + * GetTable + * Retrieves a handle to the table referenced by name, assuming + * such a table exists. If not, throw badarg. + */ +NeuralTable* NeuralTable::GetTable(ErlNifEnv *env, ERL_NIF_TERM name) { + char *atom = NULL; + string key; + unsigned len = 0; + NeuralTable *ret = NULL; + table_set::const_iterator it; + + // Allocate space for the table name + enif_get_atom_length(env, name, &len, ERL_NIF_LATIN1); + atom = (char*)enif_alloc(len + 1); + + // Copy the table name into a string + enif_get_atom(env, name, atom, len + 1, ERL_NIF_LATIN1); + key = atom; + + // Deallocate that space + enif_free(atom); + + // Look for the table and return its pointer if found + it = NeuralTable::tables.find(key); + if (it != NeuralTable::tables.end()) { + ret = it->second; + } + + return ret; +} + +/* ================================================================ + * Insert + * Inserts a tuple into the table with key. + */ +ERL_NIF_TERM NeuralTable::Insert(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM object) { + NeuralTable *tb; + ERL_NIF_TERM ret, old; + unsigned long int entry_key = 0; + + // Grab table or bail. + tb = GetTable(env, table); + if (tb == NULL) { + return enif_make_badarg(env); + } + + // Get key value. + enif_get_ulong(env, key, &entry_key); + + // Lock the key. + tb->rwlock(entry_key); + + // Attempt to lookup the value. If nonempty, increment + // discarded term counter and return a copy of the + // old value + if (tb->find(entry_key, old)) { + tb->reclaim(entry_key, old); + ret = enif_make_tuple2(env, enif_make_atom(env, "ok"), enif_make_copy(env, old)); + } else { + ret = enif_make_atom(env, "ok"); + } + + // Write that shit out + tb->put(entry_key, object); + + // Oh, and unlock the key if you would. + tb->rwunlock(entry_key); + + return ret; +} + +/* ================================================================ + * InsertNew + * Inserts a tuple into the table with key, assuming there is not + * a value with key already. Returns true if there was no value + * for key, or false if there was. + */ +ERL_NIF_TERM NeuralTable::InsertNew(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM object) { + NeuralTable *tb; + ERL_NIF_TERM ret, old; + unsigned long int entry_key = 0; + + // Get the table or bail + tb = GetTable(env, table); + if (tb == NULL) { + return enif_make_badarg(env); + } + + // Get the key value + enif_get_ulong(env, key, &entry_key); + + // Get write lock for the key + tb->rwlock(entry_key); + + if (tb->find(entry_key, old)) { + // Key was found. Return false and do not insert + ret = enif_make_atom(env, "false"); + } else { + // Key was not found. Return true and insert + tb->put(entry_key, object); + ret = enif_make_atom(env, "true"); + } + + // Release write lock for the key + tb->rwunlock(entry_key); + + return ret; +} + +/* ================================================================ + * Increment + * Processes a list of update operations. Each operation specifies + * a position in the stored tuple to update and an integer to add + * to it. + */ +ERL_NIF_TERM NeuralTable::Increment(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM ops) { + NeuralTable *tb; + ERL_NIF_TERM ret, old; + ERL_NIF_TERM it; + unsigned long int entry_key = 0; + + // Get table handle or bail + tb = GetTable(env, table); + if (tb == NULL) { + return enif_make_badarg(env); + } + + // Get key value + enif_get_ulong(env, key, &entry_key); + + // Acquire read/write lock for key + tb->rwlock(entry_key); + + // Try to read the value as it is + if (tb->find(entry_key, old)) { + // Value exists + ERL_NIF_TERM op_cell; + const ERL_NIF_TERM *tb_tpl; + const ERL_NIF_TERM *op_tpl; + ERL_NIF_TERM *new_tpl; + ErlNifEnv *bucket_env = tb->get_env(entry_key); + unsigned long int pos = 0; + long int incr = 0; + unsigned int ops_length = 0; + int op_arity = 0, + tb_arity = 0; + + // Expand tuple to work on elements + enif_get_tuple(bucket_env, old, &tb_arity, &tb_tpl); + + // Allocate space for a copy the contents of the table + // tuple and copy it in. All changes are to be made to + // the copy of the tuple. + new_tpl = (ERL_NIF_TERM*)enif_alloc(sizeof(ERL_NIF_TERM) * tb_arity); + memcpy(new_tpl, tb_tpl, sizeof(ERL_NIF_TERM) * tb_arity); + + // Create empty list cell for return value. + ret = enif_make_list(env, 0); + + // Set iterator to first cell of ops + it = ops; + while(!enif_is_empty_list(env, it)) { + long int value = 0; + enif_get_list_cell(env, it, &op_cell, &it); // op_cell = hd(it), it = tl(it) + enif_get_tuple(env, op_cell, &op_arity, &op_tpl); // op_arity = tuple_size(op_cell), op_tpl = [TplPos1, TplPos2] + enif_get_ulong(env, op_tpl[0], &pos); // pos = (uint64)op_tpl[0] + enif_get_long(env, op_tpl[1], &incr); // incr = (uint64)op_tpl[1] + + // Is the operation trying to modify a nonexistant + // position? + if (pos <= 0 || pos > tb_arity) { + ret = enif_make_badarg(env); + goto bailout; + } + + // Is the operation trying to add to a value that's + // not a number? + if (!enif_is_number(bucket_env, new_tpl[pos - 1])) { + ret = enif_make_badarg(env); + goto bailout; + } + + // Update the value stored in the tuple. + enif_get_long(env, new_tpl[pos - 1], &value); + tb->reclaim(entry_key, new_tpl[pos - 1]); + new_tpl[pos - 1] = enif_make_long(bucket_env, value + incr); + + // Copy the new value to the head of the return list + ret = enif_make_list_cell(env, enif_make_copy(env, new_tpl[pos - 1]), ret); + } + + tb->put(entry_key, enif_make_tuple_from_array(bucket_env, new_tpl, tb_arity)); + + // Bailout allows cancelling the update opertion + // in case something goes wrong. It must always + // come after tb->put and before enif_free and + // rwunlock +bailout: + enif_free(new_tpl); + } else { + ret = enif_make_badarg(env); + } + // Release the rwlock for entry_key + tb->rwunlock(entry_key); + + return ret; +} + +/* ================================================================ + * Unshift + * Processes a list of update operations. Each update operation is + * a tuple specifying the position of a list in the stored value to + * update and a list of values to append. Elements are shifted from + * the input list to the stored list, so: + * + * unshift([a,b,c,d]) results in [d,c,b,a] + */ +ERL_NIF_TERM NeuralTable::Unshift(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM ops) { + NeuralTable *tb; + ERL_NIF_TERM ret, old, it; + unsigned long int entry_key; + ErlNifEnv *bucket_env; + + tb = GetTable(env, table); + if (tb == NULL) { + return enif_make_badarg(env); + } + + enif_get_ulong(env, key, &entry_key); + + tb->rwlock(entry_key); + bucket_env = tb->get_env(entry_key); + if (tb->find(entry_key, old)) { + const ERL_NIF_TERM *old_tpl, + *op_tpl; + ERL_NIF_TERM *new_tpl; + int tb_arity = 0, + op_arity = 0; + unsigned long pos = 0; + unsigned int new_length = 0; + ERL_NIF_TERM op, + unshift, + copy_it, + copy_val; + + enif_get_tuple(bucket_env, old, &tb_arity, &old_tpl); + new_tpl = (ERL_NIF_TERM*)enif_alloc(sizeof(ERL_NIF_TERM) * tb_arity); + memcpy(new_tpl, old_tpl, sizeof(ERL_NIF_TERM) * tb_arity); + + it = ops; + ret = enif_make_list(env, 0); + + while (!enif_is_empty_list(env, it)) { + // Examine the operation. + enif_get_list_cell(env, it, &op, &it); // op = hd(it), it = tl(it) + enif_get_tuple(env, op, &op_arity, &op_tpl); // op_arity = tuple_size(op), op_tpl = [TplPos1, TplPos2] + enif_get_ulong(env, op_tpl[0], &pos); // Tuple position to modify + unshift = op_tpl[1]; // Values to unshfit + + // Argument 1 of the operation tuple is position; + // make sure it's within the bounds of the tuple + // in the table. + if (pos <= 0 || pos > tb_arity) { + ret = enif_make_badarg(env); + goto bailout; + } + + // Make sure we were passed a list of things to push + // onto the posth element of the entry + if (!enif_is_list(env, unshift)) { + ret = enif_make_badarg(env); + } + + // Now iterate over unshift, moving its values to + // the head of new_tpl[pos - 1] one by one + copy_it = unshift; + while (!enif_is_empty_list(env, copy_it)) { + enif_get_list_cell(env, copy_it, ©_val, ©_it); + new_tpl[pos - 1] = enif_make_list_cell(bucket_env, enif_make_copy(bucket_env, copy_val), new_tpl[pos - 1]); + } + enif_get_list_length(bucket_env, new_tpl[pos - 1], &new_length); + ret = enif_make_list_cell(env, enif_make_uint(env, new_length), ret); + } + + tb->put(entry_key, enif_make_tuple_from_array(bucket_env, new_tpl, tb_arity)); + +bailout: + enif_free(new_tpl); + } else { + ret = enif_make_badarg(env); + } + tb->rwunlock(entry_key); + + return ret; +} + +ERL_NIF_TERM NeuralTable::Shift(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM ops) { + NeuralTable *tb; + ERL_NIF_TERM ret, old, it; + unsigned long int entry_key; + ErlNifEnv *bucket_env; + + tb = GetTable(env, table); + if (tb == NULL) { + return enif_make_badarg(env); + } + + enif_get_ulong(env, key, &entry_key); + + tb->rwlock(entry_key); + bucket_env = tb->get_env(entry_key); + if (tb->find(entry_key, old)) { + const ERL_NIF_TERM *old_tpl; + const ERL_NIF_TERM *op_tpl; + ERL_NIF_TERM *new_tpl; + int tb_arity = 0, + op_arity = 0; + unsigned long pos = 0, + count = 0; + ERL_NIF_TERM op, list, shifted, reclaim; + + enif_get_tuple(bucket_env, old, &tb_arity, &old_tpl); + new_tpl = (ERL_NIF_TERM*)enif_alloc(tb_arity * sizeof(ERL_NIF_TERM)); + memcpy(new_tpl, old_tpl, sizeof(ERL_NIF_TERM) * tb_arity); + + it = ops; + ret = enif_make_list(env, 0); + reclaim = enif_make_list(bucket_env, 0); + + while(!enif_is_empty_list(env, it)) { + enif_get_list_cell(env, it, &op, &it); + enif_get_tuple(env, op, &op_arity, &op_tpl); + enif_get_ulong(env, op_tpl[0], &pos); + enif_get_ulong(env, op_tpl[1], &count); + + if (pos <= 0 || pos > tb_arity) { + ret = enif_make_badarg(env); + goto bailout; + } + + if (!enif_is_list(env, new_tpl[pos -1])) { + ret = enif_make_badarg(env); + goto bailout; + } + + shifted = enif_make_list(env, 0); + if (count > 0) { + ERL_NIF_TERM copy_it = new_tpl[pos - 1], + val; + int i = 0; + while (i < count && !enif_is_empty_list(bucket_env, copy_it)) { + enif_get_list_cell(bucket_env, copy_it, &val, ©_it); + ++i; + shifted = enif_make_list_cell(env, enif_make_copy(env, val), shifted); + reclaim = enif_make_list_cell(env, val, reclaim); + } + new_tpl[pos - 1] = copy_it; + } else if (count < 0) { + ERL_NIF_TERM copy_it = new_tpl[pos - 1], + val; + while (!enif_is_empty_list(bucket_env, copy_it)) { + enif_get_list_cell(bucket_env, copy_it, &val, ©_it); + shifted = enif_make_list_cell(env, enif_make_copy(env, val), shifted); + reclaim = enif_make_list_cell(env, val, reclaim); + } + new_tpl[pos - 1] = copy_it; + } + ret = enif_make_list_cell(env, shifted, ret); + } + + tb->put(entry_key, enif_make_tuple_from_array(bucket_env, new_tpl, tb_arity)); + tb->reclaim(entry_key, reclaim); +bailout: + enif_free(new_tpl); + } else { + ret = enif_make_badarg(env); + } + tb->rwunlock(entry_key); + + return ret; +} + +ERL_NIF_TERM NeuralTable::Swap(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM ops) { + NeuralTable *tb; + ERL_NIF_TERM ret, old, it; + unsigned long int entry_key; + ErlNifEnv *bucket_env; + + tb = GetTable(env, table); + if (tb == NULL) { + return enif_make_badarg(env); + } + + enif_get_ulong(env, key, &entry_key); + + tb->rwlock(entry_key); + bucket_env = tb->get_env(entry_key); + if (tb->find(entry_key, old)) { + const ERL_NIF_TERM *old_tpl; + const ERL_NIF_TERM *op_tpl; + ERL_NIF_TERM *new_tpl; + int tb_arity = 0, + op_arity = 0; + unsigned long pos = 0; + ERL_NIF_TERM op, list, shifted, reclaim; + + enif_get_tuple(bucket_env, old, &tb_arity, &old_tpl); + new_tpl = (ERL_NIF_TERM*)enif_alloc(tb_arity * sizeof(ERL_NIF_TERM)); + memcpy(new_tpl, old_tpl, sizeof(ERL_NIF_TERM) * tb_arity); + + it = ops; + ret = enif_make_list(env, 0); + reclaim = enif_make_list(bucket_env, 0); + + while (!enif_is_empty_list(env, it)) { + enif_get_list_cell(env, it, &op, &it); + enif_get_tuple(env, op, &op_arity, &op_tpl); + enif_get_ulong(env, op_tpl[0], &pos); + + if (pos <= 0 || pos > tb_arity) { + ret = enif_make_badarg(env); + goto bailout; + } + + reclaim = enif_make_list_cell(bucket_env, new_tpl[pos - 1], reclaim); + ret = enif_make_list_cell(env, enif_make_copy(env, new_tpl[pos -1]), ret); + new_tpl[pos - 1] = enif_make_copy(bucket_env, op_tpl[1]); + } + + tb->put(entry_key, enif_make_tuple_from_array(bucket_env, new_tpl, tb_arity)); + tb->reclaim(entry_key, reclaim); +bailout: + enif_free(new_tpl); + } else { + ret = enif_make_badarg(env); + } + tb->rwunlock(entry_key); + + return ret; +} + +ERL_NIF_TERM NeuralTable::Delete(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key) { + NeuralTable *tb; + ERL_NIF_TERM val, ret; + unsigned long int entry_key; + + tb = GetTable(env, table); + if (tb == NULL) { return enif_make_badarg(env); } + + enif_get_ulong(env, key, &entry_key); + + tb->rwlock(entry_key); + + if (tb->erase(entry_key, val)) { + tb->reclaim(entry_key, val); + ret = enif_make_copy(env, val); + } else { + ret = enif_make_atom(env, "undefined"); + } + + tb->rwunlock(entry_key); + + return ret; +} + +ERL_NIF_TERM NeuralTable::Empty(ErlNifEnv *env, ERL_NIF_TERM table) { + NeuralTable *tb; + int n = 0; + + tb = GetTable(env, table); + if (tb == NULL) { return enif_make_badarg(env); } + + // First, lock EVERY bucket. We want this to be an isolated operation. + for (n = 0; n < BUCKET_COUNT; ++n) { + enif_rwlock_rwlock(tb->locks[n]); + } + + // Now clear the table + for (n = 0; n < BUCKET_COUNT; ++n) { + tb->hash_buckets[n].clear(); + enif_clear_env(tb->env_buckets[n]); + tb->garbage_cans[n] = 0; + tb->reclaimable[n] = enif_make_list(tb->env_buckets[n], 0); + } + + // Now unlock every bucket. + for (n = 0; n < BUCKET_COUNT; ++n) { + enif_rwlock_rwunlock(tb->locks[n]); + } + + return enif_make_atom(env, "ok"); +} + +ERL_NIF_TERM NeuralTable::Get(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key) { + NeuralTable *tb; + ERL_NIF_TERM ret, val; + unsigned long int entry_key; + + // Acquire table handle, or quit if the table doesn't exist. + tb = GetTable(env, table); + if (tb == NULL) { return enif_make_badarg(env); } + + // Get key value + enif_get_ulong(env, key, &entry_key); + + // Lock the key + tb->rlock(entry_key); + + // Read current value + if (!tb->find(entry_key, val)) { + ret = enif_make_atom(env, "undefined"); + } else { + ret = enif_make_copy(env, val); + } + + tb->runlock(entry_key); + + return ret; +} + +ERL_NIF_TERM NeuralTable::Dump(ErlNifEnv *env, ERL_NIF_TERM table) { + NeuralTable *tb = GetTable(env, table); + ErlNifPid self; + ERL_NIF_TERM ret; + + if (tb == NULL) { return enif_make_badarg(env); } + + enif_self(env, &self); + + tb->add_batch_job(self, &NeuralTable::batch_dump); + + return enif_make_atom(env, "$neural_batch_wait"); +} + +ERL_NIF_TERM NeuralTable::Drain(ErlNifEnv *env, ERL_NIF_TERM table) { + NeuralTable *tb = GetTable(env, table); + ErlNifPid self; + int ret; + + if (tb == NULL) { return enif_make_badarg(env); } + + enif_self(env, &self); + + tb->add_batch_job(self, &NeuralTable::batch_drain); + + return enif_make_atom(env, "$neural_batch_wait"); +} + +ERL_NIF_TERM NeuralTable::GetKeyPosition(ErlNifEnv *env, ERL_NIF_TERM table) { + NeuralTable *tb = GetTable(env, table); + + if (tb == NULL) { return enif_make_badarg(env); } + return enif_make_uint(env, tb->key_pos); +} + +ERL_NIF_TERM NeuralTable::GarbageCollect(ErlNifEnv *env, ERL_NIF_TERM table) { + NeuralTable *tb = GetTable(env, table); + if (tb == NULL) { return enif_make_badarg(env); } + + enif_cond_signal(tb->gc_cond); + + return enif_make_atom(env, "ok"); +} + +ERL_NIF_TERM NeuralTable::GarbageSize(ErlNifEnv *env, ERL_NIF_TERM table) { + NeuralTable *tb = GetTable(env, table); + unsigned long int size = 0; + + if (tb == NULL) { return enif_make_badarg(env); } + + size = tb->garbage_size(); + + return enif_make_ulong(env, size); +} + +void* NeuralTable::DoGarbageCollection(void *table) { + NeuralTable *tb = (NeuralTable*)table; + + enif_mutex_lock(tb->gc_mutex); + + while (running.load(memory_order_acquire)) { + while (running.load(memory_order_acquire) && tb->garbage_size() < RECLAIM_THRESHOLD) { + enif_cond_wait(tb->gc_cond, tb->gc_mutex); + } + tb->gc(); + } + + enif_mutex_unlock(tb->gc_mutex); + + return NULL; +} + +void* NeuralTable::DoReclamation(void *table) { + const int max_eat = 5; + NeuralTable *tb = (NeuralTable*)table; + int i = 0, c = 0, t = 0;; + ERL_NIF_TERM tl, hd; + ErlNifEnv *env; + + while (running.load(memory_order_acquire)) { + for (i = 0; i < BUCKET_COUNT; ++i) { + c = 0; + t = 0; + tb->rwlock(i); + env = tb->get_env(i); + tl = tb->reclaimable[i]; + while (c++ < max_eat && !enif_is_empty_list(env, tl)) { + enif_get_list_cell(env, tl, &hd, &tl); + tb->garbage_cans[i] += estimate_size(env, hd); + t += tb->garbage_cans[i]; + } + tb->rwunlock(i); + + if (t >= RECLAIM_THRESHOLD) { + enif_cond_signal(tb->gc_cond); + } + } +#ifdef _WIN32 + Sleep(50); +#else + usleep(50000); +#endif + + } + + return NULL; +} + +void* NeuralTable::DoBatchOperations(void *table) { + NeuralTable *tb = (NeuralTable*)table; + + enif_mutex_lock(tb->batch_mutex); + + while (running.load(memory_order_acquire)) { + while (running.load(memory_order_acquire) && tb->batch_jobs.empty()) { + enif_cond_wait(tb->batch_cond, tb->batch_mutex); + } + BatchJob job = tb->batch_jobs.front(); + (tb->*job.fun)(job.pid); + tb->batch_jobs.pop(); + } + + enif_mutex_unlock(tb->batch_mutex); + + return NULL; +} + +void NeuralTable::start_gc() { + int ret; + + gc_mutex = enif_mutex_create("neural_table_gc"); + gc_cond = enif_cond_create("neural_table_gc"); + + ret = enif_thread_create("neural_garbage_collector", &gc_tid, NeuralTable::DoGarbageCollection, (void*)this, NULL); + if (ret != 0) { + printf("[neural_gc] Can't create GC thread. Error Code: %d\r\n", ret); + } + + // Start the reclaimer after the garbage collector. + ret = enif_thread_create("neural_reclaimer", &rc_tid, NeuralTable::DoReclamation, (void*)this, NULL); + if (ret != 0) { + printf("[neural_gc] Can't create reclamation thread. Error Code: %d\r\n", ret); + } +} + +void NeuralTable::stop_gc() { + enif_cond_signal(gc_cond); + // Join the reclaimer before the garbage collector. + enif_thread_join(rc_tid, NULL); + enif_thread_join(gc_tid, NULL); +} + +void NeuralTable::start_batch() { + int ret; + + batch_mutex = enif_mutex_create("neural_table_batch"); + batch_cond = enif_cond_create("neural_table_batch"); + + ret = enif_thread_create("neural_batcher", &batch_tid, NeuralTable::DoBatchOperations, (void*)this, NULL); + if (ret != 0) { + printf("[neural_batch] Can't create batch thread. Error Code: %d\r\n", ret); + } +} + +void NeuralTable::stop_batch() { + enif_cond_signal(batch_cond); + enif_thread_join(batch_tid, NULL); +} + +void NeuralTable::put(unsigned long int key, ERL_NIF_TERM tuple) { + ErlNifEnv *env = get_env(key); + hash_buckets[GET_BUCKET(key)][key] = enif_make_copy(env, tuple); +} + +ErlNifEnv* NeuralTable::get_env(unsigned long int key) { + return env_buckets[GET_BUCKET(key)]; +} + +bool NeuralTable::find(unsigned long int key, ERL_NIF_TERM &ret) { + hash_table *bucket = &hash_buckets[GET_BUCKET(key)]; + hash_table::iterator it = bucket->find(key); + if (bucket->end() == it) { + return false; + } else { + ret = it->second; + return true; + } +} + +bool NeuralTable::erase(unsigned long int key, ERL_NIF_TERM &val) { + hash_table *bucket = &hash_buckets[GET_BUCKET(key)]; + hash_table::iterator it = bucket->find(key); + bool ret = false; + if (it != bucket->end()) { + ret = true; + val = it->second; + bucket->erase(it); + } + return ret; +} + +void NeuralTable::add_batch_job(ErlNifPid pid, BatchFunction fun) { + BatchJob job; + job.pid = pid; + job.fun = fun; + + enif_mutex_lock(batch_mutex); + batch_jobs.push(job); + enif_mutex_unlock(batch_mutex); + + enif_cond_signal(batch_cond); +} + +void NeuralTable::batch_drain(ErlNifPid pid) { + ErlNifEnv *env = enif_alloc_env(); + ERL_NIF_TERM msg, value; + + value = enif_make_list(env, 0); + for (int i = 0; i < BUCKET_COUNT; ++i) { + enif_rwlock_rwlock(locks[i]); + + for (hash_table::iterator it = hash_buckets[i].begin(); it != hash_buckets[i].end(); ++it) { + value = enif_make_list_cell(env, enif_make_copy(env, it->second), value); + } + enif_clear_env(env_buckets[i]); + hash_buckets[i].clear(); + garbage_cans[i] = 0; + reclaimable[i] = enif_make_list(env_buckets[i], 0); + + enif_rwlock_rwunlock(locks[i]); + } + + msg = enif_make_tuple2(env, enif_make_atom(env, "$neural_batch_response"), value); + + enif_send(NULL, &pid, env, msg); + + enif_free_env(env); +} + +void NeuralTable::batch_dump(ErlNifPid pid) { + ErlNifEnv *env = enif_alloc_env(); + ERL_NIF_TERM msg, value; + + value = enif_make_list(env, 0); + for (int i = 0; i < BUCKET_COUNT; ++i) { + enif_rwlock_rlock(locks[i]); + for (hash_table::iterator it = hash_buckets[i].begin(); it != hash_buckets[i].end(); ++it) { + value = enif_make_list_cell(env, enif_make_copy(env, it->second), value); + } + enif_rwlock_runlock(locks[i]); + } + + msg = enif_make_tuple2(env, enif_make_atom(env, "$neural_batch_response"), value); + + enif_send(NULL, &pid, env, msg); + + enif_free_env(env); +} + +void NeuralTable::reclaim(unsigned long int key, ERL_NIF_TERM term) { + int bucket = GET_BUCKET(key); + ErlNifEnv *env = get_env(key); + reclaimable[bucket] = enif_make_list_cell(env, term, reclaimable[bucket]); +} + +void NeuralTable::gc() { + ErlNifEnv *fresh = NULL, + *old = NULL; + hash_table *bucket = NULL; + hash_table::iterator it; + unsigned int gc_curr = 0; + + for (; gc_curr < BUCKET_COUNT; ++gc_curr) { + bucket = &hash_buckets[gc_curr]; + old = env_buckets[gc_curr]; + fresh = enif_alloc_env(); + + enif_rwlock_rwlock(locks[gc_curr]); + for (it = bucket->begin(); it != bucket->end(); ++it) { + it->second = enif_make_copy(fresh, it->second); + } + + garbage_cans[gc_curr] = 0; + env_buckets[gc_curr] = fresh; + reclaimable[gc_curr] = enif_make_list(fresh, 0); + enif_free_env(old); + enif_rwlock_rwunlock(locks[gc_curr]); + } +} + +unsigned long int NeuralTable::garbage_size() { + unsigned long int size = 0; + for (int i = 0; i < BUCKET_COUNT; ++i) { + enif_rwlock_rlock(locks[i]); + size += garbage_cans[i]; + enif_rwlock_runlock(locks[i]); + } + return size; +} diff --git a/c_src/neural/NeuralTable.h b/c_src/neural/NeuralTable.h new file mode 100644 index 0000000..527c820 --- /dev/null +++ b/c_src/neural/NeuralTable.h @@ -0,0 +1,121 @@ +#ifndef NEURALTABLE_H +#define NEURALTABLE_H + +#include "erl_nif.h" +#include "neural_utils.h" +#include +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#include +#include +#else +#include +#endif + +#define BUCKET_COUNT 64 +#define BUCKET_MASK (BUCKET_COUNT - 1) +#define GET_BUCKET(key) key & BUCKET_MASK +#define GET_LOCK(key) key & BUCKET_MASK +#define RECLAIM_THRESHOLD 1048576 + +using namespace std; + +class NeuralTable; + +typedef unordered_map table_set; +typedef unordered_map hash_table; +typedef void (NeuralTable::*BatchFunction)(ErlNifPid pid); + +class NeuralTable { + public: + static ERL_NIF_TERM MakeTable(ErlNifEnv *env, ERL_NIF_TERM name, ERL_NIF_TERM keypos); + static ERL_NIF_TERM Insert(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM object); + static ERL_NIF_TERM InsertNew(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM object); + static ERL_NIF_TERM Delete(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key); + static ERL_NIF_TERM Empty(ErlNifEnv *env, ERL_NIF_TERM table); + static ERL_NIF_TERM Get(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key); + static ERL_NIF_TERM Increment(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM ops); + static ERL_NIF_TERM Shift(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM ops); + static ERL_NIF_TERM Unshift(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM ops); + static ERL_NIF_TERM Swap(ErlNifEnv *env, ERL_NIF_TERM table, ERL_NIF_TERM key, ERL_NIF_TERM ops); + static ERL_NIF_TERM Dump(ErlNifEnv *env, ERL_NIF_TERM table); + static ERL_NIF_TERM Drain(ErlNifEnv *env, ERL_NIF_TERM table); + static ERL_NIF_TERM GetKeyPosition(ErlNifEnv *env, ERL_NIF_TERM table); + static ERL_NIF_TERM GarbageCollect(ErlNifEnv *env, ERL_NIF_TERM table); + static ERL_NIF_TERM GarbageSize(ErlNifEnv *env, ERL_NIF_TERM table); + static NeuralTable* GetTable(ErlNifEnv *env, ERL_NIF_TERM name); + static void* DoGarbageCollection(void *table); + static void* DoBatchOperations(void *table); + static void* DoReclamation(void *table); + static void Initialize() { + table_mutex = enif_mutex_create("neural_table_maker"); + } + static void Shutdown() { + running = false; + table_set::iterator it(tables.begin()); + + while (it != tables.end()) { + delete it->second; + tables.erase(it); + it = tables.begin(); + } + + enif_mutex_destroy(table_mutex); + } + + void rlock(unsigned long int key) { enif_rwlock_rlock(locks[GET_LOCK(key)]); } + void runlock(unsigned long int key) { enif_rwlock_runlock(locks[GET_LOCK(key)]); } + void rwlock(unsigned long int key) { enif_rwlock_rwlock(locks[GET_LOCK(key)]); } + void rwunlock(unsigned long int key) { enif_rwlock_rwunlock(locks[GET_LOCK(key)]); } + + ErlNifEnv *get_env(unsigned long int key); + bool erase(unsigned long int key, ERL_NIF_TERM &ret); + bool find(unsigned long int key, ERL_NIF_TERM &ret); + void put(unsigned long int key, ERL_NIF_TERM tuple); + void batch_dump(ErlNifPid pid); + void batch_drain(ErlNifPid pid); + void start_gc(); + void stop_gc(); + void start_batch(); + void stop_batch(); + void gc(); + void reclaim(unsigned long int key, ERL_NIF_TERM reclaim); + unsigned long int garbage_size(); + void add_batch_job(ErlNifPid pid, BatchFunction fun); + + protected: + static table_set tables; + static atomic running; + static ErlNifMutex *table_mutex; + + struct BatchJob { + ErlNifPid pid; + BatchFunction fun; + }; + + NeuralTable(unsigned int kp); + ~NeuralTable(); + + unsigned int garbage_cans[BUCKET_COUNT]; + hash_table hash_buckets[BUCKET_COUNT]; + ErlNifEnv *env_buckets[BUCKET_COUNT]; + ERL_NIF_TERM reclaimable[BUCKET_COUNT]; + ErlNifRWLock *locks[BUCKET_COUNT]; + ErlNifCond *gc_cond; + ErlNifMutex *gc_mutex; + ErlNifTid gc_tid; + ErlNifTid rc_tid; + ErlNifCond *batch_cond; + ErlNifMutex *batch_mutex; + queue batch_jobs; + ErlNifTid batch_tid; + + unsigned int key_pos; +}; + +#endif diff --git a/c_src/neural/neural.cpp b/c_src/neural/neural.cpp new file mode 100644 index 0000000..0273324 --- /dev/null +++ b/c_src/neural/neural.cpp @@ -0,0 +1,134 @@ +#include "erl_nif.h" +#include "NeuralTable.h" +#include + +// Prototypes +static ERL_NIF_TERM neural_new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM neural_put(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM neural_put_new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM neural_increment(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM neural_unshift(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM neural_shift(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM neural_swap(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM neural_get(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM neural_delete(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM neural_garbage(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM neural_garbage_size(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM neural_empty(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM neural_drain(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM neural_dump(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); +static ERL_NIF_TERM neural_key_pos(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); + +static ErlNifFunc nif_funcs[] = +{ + {"make_table", 2, neural_new}, + {"do_fetch", 2, neural_get}, + {"do_delete", 2, neural_delete}, + {"do_dump", 1, neural_dump}, + {"do_drain", 1, neural_drain}, + {"empty", 1, neural_empty}, + {"insert", 3, neural_put}, + {"insert_new", 3, neural_put_new}, + {"do_increment", 3, neural_increment}, + {"do_unshift", 3, neural_unshift}, + {"do_shift", 3, neural_shift}, + {"do_swap", 3, neural_swap}, + {"garbage", 1, neural_garbage}, + {"garbage_size", 1, neural_garbage_size}, + {"key_pos", 1, neural_key_pos} +}; + +static ERL_NIF_TERM neural_key_pos(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + // This function is directly exposed, so no strict guards or patterns protecting us. + if (argc != 1 || !enif_is_atom(env, argv[0])) { return enif_make_badarg(env); } + + return NeuralTable::GetKeyPosition(env, argv[0]); +} + +static ERL_NIF_TERM neural_new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + return NeuralTable::MakeTable(env, argv[0], argv[1]); +} + +static ERL_NIF_TERM neural_put(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + return NeuralTable::Insert(env, argv[0], argv[1], argv[2]); +} + +static ERL_NIF_TERM neural_put_new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + return NeuralTable::InsertNew(env, argv[0], argv[1], argv[2]); +} + +static ERL_NIF_TERM neural_increment(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + if (!enif_is_atom(env, argv[0]) || !enif_is_number(env, argv[1]) || !enif_is_list(env, argv[2])) { + return enif_make_badarg(env); + } + + return NeuralTable::Increment(env, argv[0], argv[1], argv[2]); +} + +static ERL_NIF_TERM neural_shift(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + return NeuralTable::Shift(env, argv[0], argv[1], argv[2]); +} + +static ERL_NIF_TERM neural_unshift(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + return NeuralTable::Unshift(env, argv[0], argv[1], argv[2]); +} + +static ERL_NIF_TERM neural_swap(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]){ + return NeuralTable::Swap(env, argv[0], argv[1], argv[2]); +} + +static ERL_NIF_TERM neural_get(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + return NeuralTable::Get(env, argv[0], argv[1]); +} + +static ERL_NIF_TERM neural_delete(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + return NeuralTable::Delete(env, argv[0], argv[1]); +} + +static ERL_NIF_TERM neural_empty(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + if (!enif_is_atom(env, argv[0])) { return enif_make_badarg(env); } + + return NeuralTable::Empty(env, argv[0]); +} + +static ERL_NIF_TERM neural_dump(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + if (!enif_is_atom(env, argv[0])) { return enif_make_badarg(env); } + + return NeuralTable::Dump(env, argv[0]); +} + +static ERL_NIF_TERM neural_drain(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + if (!enif_is_atom(env, argv[0])) { return enif_make_badarg(env); } + + return NeuralTable::Drain(env, argv[0]); +} + +static ERL_NIF_TERM neural_garbage(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + if (!enif_is_atom(env, argv[0])) { return enif_make_badarg(env); } + + return NeuralTable::GarbageCollect(env, argv[0]); +} + +static ERL_NIF_TERM neural_garbage_size(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + if (!enif_is_atom(env, argv[0])) { return enif_make_badarg(env); } + + return NeuralTable::GarbageSize(env, argv[0]); +} + +static void neural_resource_cleanup(ErlNifEnv* env, void* arg) +{ + /* Delete any dynamically allocated memory stored in neural_handle */ + /* neural_handle* handle = (neural_handle*)arg; */ +} + +static int on_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info) +{ + NeuralTable::Initialize(); + return 0; +} + +static void on_unload(ErlNifEnv *env, void *priv_data) { + NeuralTable::Shutdown(); +} + +ERL_NIF_INIT(neural, nif_funcs, &on_load, NULL, NULL, &on_unload); diff --git a/c_src/neural/neural_utils.cpp b/c_src/neural/neural_utils.cpp new file mode 100644 index 0000000..a579ef5 --- /dev/null +++ b/c_src/neural/neural_utils.cpp @@ -0,0 +1,46 @@ +#include "neural_utils.h" + +unsigned long int estimate_size(ErlNifEnv *env, ERL_NIF_TERM term) { + if (enif_is_atom(env, term)) { + return WORD_SIZE; + } + + // Treating all numbers like longs. + if (enif_is_number(env, term)) { + return 2 * WORD_SIZE; + } + + if (enif_is_binary(env, term)) { + ErlNifBinary bin; + enif_inspect_binary(env, term, &bin); + return bin.size + (6 * WORD_SIZE); + } + + if (enif_is_list(env, term)) { + unsigned long int size = 0; + ERL_NIF_TERM it, curr; + it = term; + size += WORD_SIZE; + while (!enif_is_empty_list(env, it)) { + enif_get_list_cell(env, it, &curr, &it); + size += estimate_size(env, curr) + WORD_SIZE; + } + return size; + } + + if (enif_is_tuple(env, term)) { + unsigned long int size = 0; + const ERL_NIF_TERM *tpl; + int arity; + enif_get_tuple(env, term, &arity, &tpl); + for (int i = 0; i < arity; ++i) { + size += estimate_size(env, tpl[i]); + } + return size; + } + + // Return 1 word by default + return WORD_SIZE; +} + + diff --git a/c_src/neural/neural_utils.h b/c_src/neural/neural_utils.h new file mode 100644 index 0000000..6111d0b --- /dev/null +++ b/c_src/neural/neural_utils.h @@ -0,0 +1,9 @@ +#ifndef NEURAL_UTILS_H +#define NEURAL_UTILS_H + +#include "erl_nif.h" +#define WORD_SIZE sizeof(int) + +unsigned long int estimate_size(ErlNifEnv *env, ERL_NIF_TERM term); + +#endif diff --git a/c_src/neural/rebar.config b/c_src/neural/rebar.config new file mode 100644 index 0000000..8d8e6be --- /dev/null +++ b/c_src/neural/rebar.config @@ -0,0 +1,14 @@ +{port_specs, [ + {"../../priv/neural.so", ["*.cpp"]} +]}. + +{port_env, [ + {".*", "CXXFLAGS", "$CXXFLAGS -std=c++11 -O3"}, + {".*", "LDFLAGS", "$LDFLAGS -lstdc++ -shared"} +]}. + + + + + + diff --git a/src/dataType/utTermSize.erl b/src/dataType/utTermSize.erl index c0e3d1a..0077699 100644 --- a/src/dataType/utTermSize.erl +++ b/src/dataType/utTermSize.erl @@ -85,7 +85,7 @@ internal_test() -> 32 = byteSize(<<$a, $b, $c>>, 8), 8 = byteSize([], 8), 24 = byteSize([0|[]], 8), - 24 = byteSize([1|2], 8), % improper list + 24 = byteSize([1|2], 8), % itime_tmproper list 16 = byteSize({}, 8), 24 = byteSize({0}, 8), 8 = byteSize(0, 8), diff --git a/src/nifSrc/bitmap_filter/bitmap_filter.erl b/src/nifSrc/bitmap_filter/bitmap_filter.erl new file mode 100644 index 0000000..0eb5004 --- /dev/null +++ b/src/nifSrc/bitmap_filter/bitmap_filter.erl @@ -0,0 +1,20 @@ +-module(bitmap_filter). + +-export([init/0, filter/1]). +-on_load(init/0). + +init() -> + PrivDir = case code:priv_dir(?MODULE) of + {error, _} -> + EbinDir = filename:dirname(code:which(?MODULE)), + AppPath = filename:dirname(EbinDir), + filename:join(AppPath, "priv"); + Path -> + Path + end, + erlang:load_nif(filename:join(PrivDir, "bitmap_filter"), 0). + +% Hack - overriden by init, which is called in on_load. +% I couldn't find another way that the compiler or code load didn't complain about. +filter(DefaultArgs) -> + DefaultArgs. diff --git a/src/nifSrc/bsn/bsn.erl b/src/nifSrc/bsn/bsn.erl new file mode 100644 index 0000000..ee5b01f --- /dev/null +++ b/src/nifSrc/bsn/bsn.erl @@ -0,0 +1,77 @@ +-module(bsn). + +%% API +-export([hash/2, compare/2]). +-export([new/2, add/2, all/1, chains/1, in/2, count/1, clear/2]). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +%-include_lib("triq/include/triq.hrl"). +-endif. + + +%% Create new resource, `CellCount' is the size of the painters' store. +new('int_quadric', CellsCount) when CellsCount > 0 -> + {'bsn_int', bsn_int:new(-CellsCount)}; + +new('int_linear', CellsCount) when CellsCount > 0 -> + {'bsn_int', bsn_int:new(CellsCount)}; + +new('ext', CellsCount) when CellsCount > 0 -> + {'bsn_ext', bsn_ext:new(CellsCount)}. + + + + +%% Add new element. +%% If the result is a negative integer +%% then object was already added. +%% We found this object with (result) steps. +%% +%% If the result is a positive integer +%% then object was added after (result) elements. +add({Type, Res}, Bin) -> + Type:add(Res, Bin). + +all({Type, Res}) -> + Type:all(Res). + +chains({Type, Res}) -> + Type:chains(Res). + +%% Add new element. +%% If the result is a negative integer +%% then object was found with (-result) steps. +%% +%% If the result is a positive integer +%% then object was not found with (result) steps. +in({Type, Res}, Bin) -> + Type:in(Res, Bin). + +clear({Type, Res}, Bin) -> + Type:clear(Res, Bin). + +%% Return the count of elements stored in this resource. +count({Type, Res}) -> + Type:count(Res). + +%% Calculate the hash of the binary +hash(Bin, Max) -> + bsn_ext:hash(Bin, Max). + +compare(Bin1, Bin2) -> + bsn_ext:compare(Bin1, Bin2). + +-ifdef(TEST). +-ifdef(FORALL). +prop_compare_test_() -> + {"Binary compare testing.", + {timeout, 60, + fun() -> triq:check(prop_compare()) end}}. + +prop_compare() -> + ?FORALL({Xs},{binary()}, + compare(Xs, Xs)). +-endif. + +-endif. diff --git a/src/nifSrc/bsn/bsn_ext.erl b/src/nifSrc/bsn/bsn_ext.erl new file mode 100644 index 0000000..6a822da --- /dev/null +++ b/src/nifSrc/bsn/bsn_ext.erl @@ -0,0 +1,56 @@ +-module(bsn_ext). + +-on_load(init/0). +-export([init/0]). + +%% API +-export([hash/2, compare/2]). +-export([new/1, add/2, all/1, chains/1, in/2, count/1, clear/2]). + +-define(NIF_NOT_LOADED, erlang:nif_error(nif_not_loaded)). + +init() -> + erlang:load_nif(code:priv_dir('bsn')++"/bsn_ext", 0). + +%% Create new resource, `CellCount' is the size of the painters' store. +new(CellsCount) -> + ?NIF_NOT_LOADED. + +%% Add new element. +%% If the result is a negative integer +%% then object was already added. +%% We found this object with (result) steps. +%% +%% If the result is a positive integer +%% then object was added after (result) elements. +add(Res, Bin) -> + ?NIF_NOT_LOADED. + +all(Res) -> + ?NIF_NOT_LOADED. + +chains(Res) -> + ?NIF_NOT_LOADED. + +%% Add new element. +%% If the result is a negative integer +%% then object was found with (-result) steps. +%% +%% If the result is a positive integer +%% then object was not found with (result) steps. +in(Res, Bin) -> + ?NIF_NOT_LOADED. + +%% Return the count of elements stored in this resource. +count(Res) -> + ?NIF_NOT_LOADED. + +%% Calculate the hash of the binary +hash(Bin, Max) -> + ?NIF_NOT_LOADED. + +compare(Bin1, Bin2) -> + ?NIF_NOT_LOADED. + +clear(Res, Bin) -> + ?NIF_NOT_LOADED. diff --git a/src/nifSrc/bsn/bsn_int.erl b/src/nifSrc/bsn/bsn_int.erl new file mode 100644 index 0000000..ad1328d --- /dev/null +++ b/src/nifSrc/bsn/bsn_int.erl @@ -0,0 +1,45 @@ +-module(bsn_int). + +-on_load(init/0). +-export([init/0]). + +%% API +-export([new/1, add/2, all/1, in/2, count/1, clear/2]). + +-define(NIF_NOT_LOADED, erlang:nif_error(nif_not_loaded)). + +init() -> + erlang:load_nif(code:priv_dir('bsn')++"/bsn_int", 0). + +%% Create new resource, `CellCount' is the size of the painters' store. +new(CellsCount) -> + ?NIF_NOT_LOADED. + +%% Add new element. +%% If the result is a negative integer +%% then object was already added. +%% We found this object with (result) steps. +%% +%% If the result is a positive integer +%% then object was added after (result) elements. +add(Res, Bin) -> + ?NIF_NOT_LOADED. + +all(Res) -> + ?NIF_NOT_LOADED. + +%% Add new element. +%% If the result is a negative integer +%% then object was found with (-result) steps. +%% +%% If the result is a positive integer +%% then object was not found with (result) steps. +in(Res, Bin) -> + ?NIF_NOT_LOADED. + +%% Return the count of elements stored in this resource. +count(Res) -> + ?NIF_NOT_LOADED. + +clear(Res, Bin) -> + ?NIF_NOT_LOADED. diff --git a/src/nifSrc/bsn/bsn_measure.erl b/src/nifSrc/bsn/bsn_measure.erl new file mode 100644 index 0000000..fe2c694 --- /dev/null +++ b/src/nifSrc/bsn/bsn_measure.erl @@ -0,0 +1,236 @@ +-module(bsn_measure). +-export([test/0, test2/0, test3/0, print/0]). +-export([gen/2, check_type/4]). +-export([check_type/3, get_type/3, test_type/2]). +-export([check_degrade/0, test_filled/1]). + +-ifndef(TEST). +-define(TEST, e). +-endif. +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +%-include_lib("triq/include/triq.hrl"). +-endif. + + +% InOutK is (success / failure) checks. +% Return {TestCases, Elements}. +gen(ElemCount, InOutK) + when ElemCount>0 -> + Nums = lists:seq(0, erlang:round(ElemCount*100)), + filter(ElemCount, InOutK, Nums, [], []). + + +filter(EC, InOutK, [H|T], AllAcc, ElemAcc) + when EC>0 -> + case random:uniform() of + X when X + filter(EC-1, InOutK, + T, [H|AllAcc], [H|ElemAcc]); + _X -> + filter(EC, InOutK, + T, [H|AllAcc], ElemAcc) + end; +filter(_ElemCount, _InOutK, _Acc, AllAcc, ElemAcc) -> + {AllAcc, ElemAcc}. + + +check_type(Type, Size, InOutK) -> + check_type(fun average/1, Type, Size, InOutK). + +get_type(Type, Size, InOutK) -> + check_type(fun(X) -> X end, Type, Size, InOutK). + +check_type(OutF, Type, Size, InOutK) -> + % Build resourse + F = fun() -> bsn:new(Type, Size) end, + + [do_check(OutF, F, Size, InOutK, 0.1), + do_check(OutF, F, Size, InOutK, 0.25), + do_check(OutF, F, Size, InOutK, 0.5), + do_check(OutF, F, Size, InOutK, 0.75), + do_check(OutF, F, Size, InOutK, 0.9), + do_check(OutF, F, Size, InOutK, 1)]. + +do_check(OutF, F, Size, InOutK, CapacityK) -> + Res = F(), + ElemCount = Size * CapacityK, + {CaseList, ElemList} = gen(ElemCount, InOutK), + fill_values(Res, ElemList), + VaList = check_values(Res, CaseList, []), + {MissList, InNegList} = lists:partition(fun(X) -> X>0 end, VaList), + InList = lists:map(fun erlang:'-'/1, InNegList), + AllList = InList ++ MissList, + {CapacityK, + {size, Size}, + {real_count, bsn:count(Res)}, + {miss, OutF(MissList)}, + {in, OutF(InList)}, + {all, OutF(AllList)}}. + + +average([]) -> + false; +average([X|Tail]) -> + average1(Tail, X, 1). +% @private +average1([X|Tail], Sum, Count) -> + average1(Tail, Sum + X, Count + 1); +average1([], Sum, Count) -> + round4(Sum / Count). + + + + +round4(X) when is_number(X) -> + erlang:round(X * 1000) / 1000; +round4(X) -> + X. + +check_values(Res, [H|T], Acc) -> + X = bsn:in(Res, integer_to_binary(H)), + check_values(Res, T, [X|Acc]); +check_values(_Res, [], Acc) -> + Acc. + +fill_values(Res, [H|T]) -> + case bsn:add(Res, integer_to_binary(H)) of + no_more -> + Res; + X -> + fill_values(Res, T) + end; +fill_values(Res, []) -> + Res. + +fill_values(Res, [H|T], Acc) -> + case bsn:add(Res, integer_to_binary(H)) of + no_more -> + Acc; + X -> + fill_values(Res, T, [H|Acc]) + end; +fill_values(_Res, [], Acc) -> + Acc. + +integer_to_binary(X) -> + erlang:list_to_binary(erlang:integer_to_list(X)). + +test() -> + [{ext, check_type(ext, 100, 0.5)} + ,{int_linear, check_type(int_linear, 100, 0.5)} + ,{int_quadric, check_type(int_quadric, 100, 0.5)}]. + +%% All values. +test2() -> + [{ext, get_type(ext, 100, 0.5)} + ,{int_linear, get_type(int_linear, 100, 0.5)} + ,{int_quadric, get_type(int_quadric, 100, 0.5)}]. + +%% Counts of values. +test3() -> + F = fun anal_values/1, + [{ext, check_type(F, ext, 100, 0.5)} + ,{int_linear, check_type(F, int_linear, 100, 0.5)} + ,{int_quadric, check_type(F, int_quadric, 100, 0.5)}]. + +print() -> + do_print(test3()). + +do_print([{Type, Vals}|T]) -> + io:format("Type ~w~n", [Type]), + lists:map(fun({K, + {real_count,RC}, + {miss, M}, + {in, I}, + {all, A}}) -> + io:format("K=~w, RC=~w~n", [K, RC]), + io:format("count,miss,in,all\n"), + + print_mia(lists:seq(1, 100), M, I, A), + io:format("\n") + end, Vals), + do_print(T); +do_print([]) -> + ok. + +print_mia([H|T], [{H,0}|T1], [{H,0}|T2], [{H,0}|T3]) -> + print_mia(T, T1, T2, T3); +print_mia([H|T], [{H,C1}|T1], [{H,C2}|T2], [{H,C3}|T3]) -> + io:format("~w,~w,~w,~w\n", [H, C1, C2, C3]), + print_mia(T, T1, T2, T3); + +print_mia([H|_]=L, [{X,_}|_]=L1, L2, L3) + when X =/= H -> + print_mia(L, [{H,0}|L1], L2, L3); +print_mia([H|_]=L, [], L2, L3) -> + print_mia(L, [{H,0}], L2, L3); + +print_mia([H|_]=L, L1, [{X,_}|_]=L2, L3) + when X =/= H -> + print_mia(L, L1, [{H,0}|L2], L3); +print_mia([H|_]=L, L1, [], L3) -> + print_mia(L, L1, [{H,0}], L3); + +print_mia([H|_]=L, L1, L2, L3) -> + print_mia(L, L1, L2, [{H,0}|L3]); +print_mia([], _, _, _) -> + ok. + + + + + + + +anal_values(L) -> + do_anal(lists:sort(L), 1, []). + +do_anal([H,H|T], C, Acc) -> + do_anal([H|T], C+1, Acc); +do_anal([OldH|T], C, Acc) -> + do_anal(T, 1, [{OldH, C}|Acc]); +do_anal([], C, Acc) -> + lists:reverse(Acc). + +avg(L) -> do_avg(L, 0, 0). +do_avg([H|T], Cnt, Sum) -> + do_avg(T, Cnt+1, Sum+H); +do_avg([], Cnt, Sum) -> + Sum / Cnt. + +check_degrade() -> + [do_check_degrade(ext) + ,do_check_degrade(int_linear) + ,do_check_degrade(int_quadric) + ]. + +do_check_degrade(Type) -> + OutF = fun avg/1, + [Type, + lists:map(fun(Size) -> + F = fun() -> bsn:new(Type, Size) end, + do_check(OutF, F, Size, 0.5, 1) + end, [10, 100, 500, 1000, 5000, 10000])]. + +test_filled(ElemCount) -> + Res = bsn:new(ext, ElemCount), + {CaseList, ElemList} = gen(ElemCount, 1), + Vals = fill_values(Res, ElemList, []), + {bsn_ext, R} = Res, + R. + +-ifdef(TEST). + +do_test_() -> + [?_assert(test_type(bsn:new(ext, 100), 100)) + ,?_assert(test_type(bsn:new(int_linear, 100), 100)) + ,?_assert(test_type(bsn:new(int_quadric, 100), 100)) + ]. +-endif. + +test_type(Res, ElemCount) -> + {CaseList, ElemList} = gen(ElemCount, 1), + Vals = fill_values(Res, ElemList, []), + %Vals = ElemList, + lists:all(fun(X) -> bsn:in(Res, integer_to_binary(X)) < 0 end, Vals). diff --git a/src/nifSrc/couchdb_hqeue/hqueue.erl b/src/nifSrc/couchdb_hqeue/hqueue.erl new file mode 100644 index 0000000..eec8b98 --- /dev/null +++ b/src/nifSrc/couchdb_hqeue/hqueue.erl @@ -0,0 +1,160 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http:%www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(hqueue). + + +-on_load(init/0). + + +-export([ + new/0, + new/1, + + extract_max/1, + insert/3, + + from_list/1, + from_list/2, + to_list/1, + + heap_size/1, + info/1, + is_empty/1, + max_elems/1, + size/1, + + resize_heap/2, + scale_by/2, + set_max_elems/2 +]). + + +-define(NOT_LOADED, not_loaded(?LINE)). + + +-type hqueue() :: term(). +-type hqueue_priority() :: float(). %% this should be non_neg_float() +-type hqueue_val() :: term(). +-type hqueue_elem() :: {hqueue_priority(), hqueue_val()}. +-type hqueue_option() :: {max_elems, pos_integer()} + | {heap_size, pos_integer()}. +-type hqueue_stat() :: {max_elems, pos_integer()} + | {heap_size, pos_integer()} + | {size, non_neg_integer()}. + +-export_type([hqueue/0]). + + +-spec new() -> {ok, hqueue()}. +new() -> + new([]). + + +-spec new([hqueue_option()]) -> {ok, hqueue()}. +new(_Options) -> + ?NOT_LOADED. + + +%% Extraction order is undefined for entries with duplicate priorities +-spec extract_max(hqueue()) -> hqueue_elem() | {error, empty}. +extract_max(_HQ) -> + ?NOT_LOADED. + + +-spec insert(hqueue(), hqueue_priority(), hqueue_val()) -> ok | {error, full}. +insert(_HQ, _Priority, _Val) -> + ?NOT_LOADED. + + +-spec size(hqueue()) -> integer(). +size(_HQ) -> + ?NOT_LOADED. + + +-spec max_elems(hqueue()) -> integer(). +max_elems(_HQ) -> + ?NOT_LOADED. + + +%% Returns old max elems or error if NewMaxElems < size(HQ) +-spec set_max_elems(hqueue(), pos_integer()) -> pos_integer() + | {error, too_small}. +set_max_elems(_HQ, _NewMaxElems) -> + ?NOT_LOADED. + + +-spec is_empty(hqueue()) -> boolean(). +is_empty(HQ) -> + hqueue:size(HQ) =:= 0. + + +-spec to_list(hqueue()) -> [hqueue_elem()]. +to_list(_HQ) -> + ?NOT_LOADED. + + +-spec from_list([hqueue_elem()]) -> {ok, hqueue()}. +from_list(Elems) -> + from_list(Elems, []). + + +-spec from_list([hqueue_elem()], [hqueue_option()]) -> {ok, hqueue()}. +from_list(Elems, Options) -> + {ok, HQ} = ?MODULE:new(Options), + lists:foreach(fun({Priority, Val}) -> + ?MODULE:insert(HQ, Priority, Val) + end, Elems), + {ok, HQ}. + + +-spec scale_by(hqueue(), float()) -> ok. +scale_by(_HQ, _Factor) -> + ?NOT_LOADED. + + +%% Returns old heap size or error if NewHeapSize < size(HQ) +-spec resize_heap(hqueue(), pos_integer()) -> pos_integer() + | {error, too_small}. +resize_heap(_HQ, _NewHeapSize) -> + ?NOT_LOADED. + + +-spec heap_size(hqueue()) -> pos_integer(). +heap_size(_HQ) -> + ?NOT_LOADED. + + +-spec info(hqueue()) -> [hqueue_stat()]. +info(HQ) -> + [ + {heap_size, hqueue:heap_size(HQ)}, + {max_elems, hqueue:max_elems(HQ)}, + {size, hqueue:size(HQ)} + ]. + + + +init() -> + PrivDir = case code:priv_dir(?MODULE) of + {error, _} -> + EbinDir = filename:dirname(code:which(?MODULE)), + AppPath = filename:dirname(EbinDir), + filename:join(AppPath, "priv"); + Path -> + Path + end, + erlang:load_nif(filename:join(PrivDir, "hqueue"), 0). + + +not_loaded(Line) -> + erlang:nif_error({not_loaded, [{module, ?MODULE}, {line, Line}]}). diff --git a/src/nifSrc/cq/cq.erl b/src/nifSrc/cq/cq.erl deleted file mode 100644 index e69de29..0000000 diff --git a/src/nifSrc/enlfq/enlfq.erl b/src/nifSrc/enlfq/enlfq.erl new file mode 100644 index 0000000..9a24cc4 --- /dev/null +++ b/src/nifSrc/enlfq/enlfq.erl @@ -0,0 +1,51 @@ +-module(enlfq). + +-on_load(load_nif/0). + +-define(NOT_LOADED, not_loaded(?LINE)). + +%% API exports +-export([new/0, push/2, pop/1]). + +%%==================================================================== +%% API functions +%%==================================================================== + + +-spec(new() -> {ok, QueueRef :: reference()} | badarg | {error, Reason :: binary()}). +new() -> + ?NOT_LOADED. + +-spec(push(QueueRef :: reference(), Data :: any()) -> + true | {error, Reason :: binary()}). +push(_QueueRef, _Data) -> + ?NOT_LOADED. + +-spec(pop(QueueRef :: reference()) -> + {ok, Data :: any()} | empty | {error, Reason :: binary()}). +pop(_QueueRef) -> + ?NOT_LOADED. + +%%==================================================================== +%% Internal functions +%%==================================================================== + + +%% nif functions + +load_nif() -> + SoName = get_priv_path(?MODULE), + io:format(<<"Loading library: ~p ~n">>, [SoName]), + ok = erlang:load_nif(SoName, 0). + +get_priv_path(File) -> + case code:priv_dir(?MODULE) of + {error, bad_name} -> + Ebin = filename:dirname(code:which(?MODULE)), + filename:join([filename:dirname(Ebin), "priv", File]); + Dir -> + filename:join(Dir, File) + end. + +not_loaded(Line) -> + erlang:nif_error({not_loaded, [{module, ?MODULE}, {line, Line}]}). \ No newline at end of file diff --git a/src/nifSrc/enlfq/testing/benchmark.erl b/src/nifSrc/enlfq/testing/benchmark.erl new file mode 100644 index 0000000..05f9e92 --- /dev/null +++ b/src/nifSrc/enlfq/testing/benchmark.erl @@ -0,0 +1,71 @@ +-module(benchmark). +-author("silviu.caragea"). + +-export([ + benchmark_serial/2, + benchmark_concurrent/3 +]). + +benchmark_serial(Elements, MaxPriority) -> + rand:uniform(), %just to init the seed + {ok, Q} = enlfq:new(), + + {T0, ok} = timer:tc(fun() -> insert_none(Elements, MaxPriority) end), + {T1, ok} = timer:tc(fun() -> insert_item(Elements, Q, MaxPriority) end), + {T2, ok} = timer:tc(fun() -> remove_item(Q) end), + + T0Ms = T0/1000, + T1Ms = T1/1000, + T2Ms = T2/1000, + + io:format(<<"insert overhead: ~p ms insert time: ~p ms pop time: ~p ms ~n">>, [T0Ms, T1Ms, T2Ms]). + +benchmark_concurrent(Procs, Elements, MaxPriority) -> + {ok, Q} = enlfq:new(), + + ElsPerProcess = round(Elements/Procs), + + InsertNoneWorkFun = fun() -> + insert_none(ElsPerProcess, MaxPriority) + end, + + InsertWorkFun = fun() -> + insert_item(ElsPerProcess, Q, MaxPriority) + end, + + RemoveWorkFun = fun() -> + remove_item(Q) + end, + + {T0, _} = timer:tc(fun()-> multi_spawn:do_work(InsertNoneWorkFun, Procs) end), + {T1, _} = timer:tc(fun()-> multi_spawn:do_work(InsertWorkFun, Procs) end), + {T2, _} = timer:tc(fun()-> multi_spawn:do_work(RemoveWorkFun, Procs) end), + + T0Ms = T0/1000, + T1Ms = T1/1000, + T2Ms = T2/1000, + + io:format(<<"insert overhead: ~p ms insert time: ~p ms pop time: ~p ms ~n">>, [T0Ms, T1Ms, T2Ms]). + +insert_item(0, _Q, _Max) -> + ok; +insert_item(N, Q, Max) -> +%% El = rand:uniform(Max), + true = enlfq:push(Q,{}), + insert_item(N-1, Q, Max). + +remove_item(Q) -> + case enlfq:pop(Q) of + empty -> + ok; + {ok, _} -> + remove_item(Q) + end. + +insert_none(0, _Max) -> + ok; +insert_none(N, Max) -> +%% rand:uniform(Max), + insert_none(N-1, Max). + + diff --git a/src/nifSrc/enlfq/testing/multi_spawn.erl b/src/nifSrc/enlfq/testing/multi_spawn.erl new file mode 100644 index 0000000..3348e43 --- /dev/null +++ b/src/nifSrc/enlfq/testing/multi_spawn.erl @@ -0,0 +1,23 @@ +-module(multi_spawn). +-author("silviu.caragea"). + +-export([do_work/2]). + +do_work(Fun, Count) -> + process_flag(trap_exit, true), + spawn_childrens(Fun, Count), + wait_responses(Count). + +spawn_childrens(_Fun, 0) -> + ok; +spawn_childrens(Fun, Count) -> + spawn_link(Fun), + spawn_childrens(Fun, Count -1). + +wait_responses(0) -> + ok; +wait_responses(Count) -> + receive + {'EXIT',_FromPid, _Reason} -> + wait_responses(Count -1) + end. \ No newline at end of file diff --git a/src/nifSrc/enq/enq.erl b/src/nifSrc/enq/enq.erl new file mode 100644 index 0000000..9bb068d --- /dev/null +++ b/src/nifSrc/enq/enq.erl @@ -0,0 +1,159 @@ +%%%----------------------------------------------------------------------------- +%%% @author s@shuvatov.ru +%%% @copyright 2018 Sergei Shuvatov +%%% @doc +%%% Native implemented queue with TTL. +%%% By default queue type is FIFO and TTL is 0 (disabled), size unlimited. +%%% Usage: +%%% {ok, Q} = enq:new([fifo, +%%% {ttl, 10000}, % 10 seconds +%%% {max_size, 1000}]), % maximum 1000 elements +%%% ok = enq:push(Q, test), % push atom 'test' to the queue +%%% [test] = enq:pop(Q), % pop one element from the queue +%%% [] = enq:pop(Q), % pop returns empty list if the queue is empty +%%% % pushed item can be any term +%%% ok = enq:push(Q, fun() -> io:format("some important job~n") end), +%%% 1 = enq:size(Q), % you can take length of the queue as efficiently as O(1) +%%% @end +%%%----------------------------------------------------------------------------- +-module(enq). +-author("Sergei Shuvatov"). + +%% API +-export([new/0, + new/1, + push/2, + pop/1, + size/1]). + +-export_type([queue/0, option/0, error/0]). + +-type queue() :: reference(). +-type option() :: fifo | + lifo | + {ttl, Microseconds :: non_neg_integer()} | + {max_size, Count :: non_neg_integer()}. +-type error() :: max_size. + +%%============================================================================== +%% API +%%============================================================================== + +%% Same as enq:new([fifo, {ttl, 0}]). +-spec new() -> {ok, enq:queue()} | {error, enq:error()}. +new() -> + new([]). + +%% Returns a new queue or error in case of memory allocation error. +-spec new([option()]) -> {ok, enq:queue()} | {error, enq:error()}. +new(Options) -> + enq_nif:new(Options). + +%% Pushes Item on top (LIFO) or tail (FIFO) of Queue. +-spec push(Queue :: enq:queue(), Item :: any()) -> ok | {error, enq:error()}. +push(Queue, Item) -> + enq_nif:push(Queue, erlang:term_to_binary(Item)). + +%% Returns next item from the Queue. +-spec pop(Queue :: enq:queue()) -> [] | [any()]. +pop(Queue) -> + [ erlang:binary_to_term(I) || I <- enq_nif:pop(Queue) ]. + +%% Returns Queue length. Speed does not depend on number of elements. +-spec size(Queue :: enq:queue()) -> non_neg_integer(). +size(Queue) -> + enq_nif:size(Queue). + +%%============================================================================== +%% Tests +%%============================================================================== + +-ifdef(TEST). + +-include_lib("eunit/include/eunit.hrl"). + +-define(log(F, A), io:format(standard_error, "~p:line ~p: " F "~n", [?FILE, ?LINE | A])). +-define(log(F), ?log(F, [])). + +fifo_test() -> + fifo_test(1000000). + +fifo_test(N) -> + {ok, Q} = enq:new(), + T1 = erlang:timestamp(), + % fill the queue with N elements + fill(Q, N), + Diff1 = timer:now_diff(erlang:timestamp(), T1), + ?log("FIFO fill time: ~p ms", [Diff1 / 1000]), + % ensure that size of queue matches N + N = enq:size(Q), + T2 = erlang:timestamp(), + % pop all elements + fifo_pop_all(Q, N), + Diff2 = timer:now_diff(erlang:timestamp(), T2), + ?log("FIFO pop time: ~p ms", [Diff2 / 1000]), + % size of the queue must be 0 + 0 = enq:size(Q). + +fill(_Q, 0) -> + ok; +fill(Q, N) -> + ok = enq:push(Q, N), + fill(Q, N - 1). + +fifo_pop_all(Q, 0) -> + [] = enq:pop(Q); +fifo_pop_all(Q, N) -> + [N] = enq:pop(Q), + fifo_pop_all(Q, N - 1). + +ttl_test() -> + {ok, Q} = enq:new([{ttl, 100}]), + enq:push(Q, test), + timer:sleep(95), + [test] = enq:pop(Q), + [] = enq:pop(Q), + enq:push(Q, test), + timer:sleep(105), + [] = enq:pop(Q). + +lifo_test() -> + lifo_test(1000000). + +lifo_test(N) -> + {ok, Q} = enq:new([lifo]), + T1 = erlang:timestamp(), + % fill the queue with N elements + fill(Q, N), + Diff1 = timer:now_diff(erlang:timestamp(), T1), + ?log("LIFO fill time: ~p ms", [Diff1 / 1000]), + % ensure that size of queue matches N + N = enq:size(Q), + T2 = erlang:timestamp(), + % pop all elements + lifo_pop_all(Q, N), + Diff2 = timer:now_diff(erlang:timestamp(), T2), + ?log("LIFO pop time: ~p ms", [Diff2 / 1000]), + % size of the queue must be 0 + 0 = enq:size(Q). + +lifo_pop_all(Q, N) -> + lifo_pop_all(Q, 1, N). + +lifo_pop_all(Q, I, N) when I > N -> + [] = enq:pop(Q); +lifo_pop_all(Q, I, N) -> + [I] = enq:pop(Q), + lifo_pop_all(Q, I + 1, N). + +max_size_test() -> + {ok, Q} = enq:new([{ttl, 100}, {max_size, 1}]), + ok = enq:push(Q, test), + timer:sleep(50), + {error, max_size} = enq:push(Q, 123), + timer:sleep(55), + ok = enq:push(Q, 321), + [321] = enq:pop(Q), + [] = enq:pop(Q). + +-endif. % TEST \ No newline at end of file diff --git a/src/nifSrc/enq/enq_nif.erl b/src/nifSrc/enq/enq_nif.erl new file mode 100644 index 0000000..d2cf618 --- /dev/null +++ b/src/nifSrc/enq/enq_nif.erl @@ -0,0 +1,63 @@ +%%%------------------------------------------------------------------- +%%% @author s@shuvatov.ru +%%% @copyright 2018 Sergei Shuvatov +%%%------------------------------------------------------------------- +-module(enq_nif). +-author("Sergei Shuvatov"). + +%% API +-export([new/1, + push/2, + pop/1, + size/1]). + +-on_load(load_nif/0). + +-define(app, enq). +-define(log(F, A), io:format(standard_error, "~p:~p: " F, [?MODULE, ?LINE | A])). +-define(not_loaded(), not_loaded(?LINE)). + +%%============================================================================== +%% API +%%============================================================================== + +new(_Options) -> + ?not_loaded(). + +push(_Queue, _Item) -> + ?not_loaded(). + +pop(_Queue) -> + ?not_loaded(). + +size(_Queue) -> + ?not_loaded(). + +%%============================================================================== +%% Internal functions +%%============================================================================== + +load_nif() -> + SoName = get_priv_path(?MODULE), + % ?log("Loading library: ~p ~n", [SoName]), + ok = erlang:load_nif(SoName, 0). + +get_priv_path(File) -> + case code:priv_dir(get_app()) of + {error, bad_name} -> + Ebin = filename:dirname(code:which(?MODULE)), + filename:join([filename:dirname(Ebin), "priv", File]); + Dir -> + filename:join(Dir, File) + end. + +get_app() -> + case application:get_application(?MODULE) of + {ok, App} -> + App; + _ -> + ?app + end. + +not_loaded(Line) -> + erlang:nif_error({not_loaded, [{module, ?MODULE}, {line, Line}]}). diff --git a/src/nifSrc/etsq/etsq.erl b/src/nifSrc/etsq/etsq.erl new file mode 100644 index 0000000..4f34279 --- /dev/null +++ b/src/nifSrc/etsq/etsq.erl @@ -0,0 +1,103 @@ +%% @author vinod +%% @doc @todo Add description to ets_queue. + + +-module(etsq). +-on_load(load_nif/0). + +-export([load_nif/0, + new/1, + info/1, + push/2, + pop/1, + front/1]). + +%% ==================================================================== +%% API functions +%% ==================================================================== + +-define(LIB_BASE_NAME, "etsq"). +-define(LIB_NIF_VSN, 1). +-define(LIB_APP_NAME, etsq). + +-spec new(atom()) -> ok | {error, already_exists}. +new(_Name) -> + erlang:nif_error({nif_not_loaded,module,?MODULE,line,?LINE}). + +-spec info(atom()) -> ok. +info(_Name) -> + erlang:nif_error({nif_not_loaded,module,?MODULE,line,?LINE}). + +-spec push(atom(), term()) -> ok. +push(Name, Term) -> + push_back(Name, term_to_binary(Term)). + +-spec pop(atom()) -> ok | {error, empty}. +pop(Name) -> + get_val(pop_front(Name)). + +-spec front(atom()) -> ok | {error, empty}. +front(Name) -> + get_val(get_front(Name)). + + +get_val(Value) when is_binary(Value) -> + binary_to_term(Value); +get_val(Value) -> + Value. + +push_back(_Name, _Bin) -> + erlang:nif_error({nif_not_loaded,module,?MODULE,line,?LINE}). +pop_front(_Name) -> + erlang:nif_error({nif_not_loaded,module,?MODULE,line,?LINE}). +get_front(_Name) -> + erlang:nif_error({nif_not_loaded,module,?MODULE,line,?LINE}). + +-spec load_nif() -> ok | {error, term()}. +load_nif() -> + LibBaseName = ?LIB_BASE_NAME, + PrivDir = code:priv_dir(etsq), + LibName = case erlang:system_info(build_type) of + opt -> + LibBaseName; + Type -> + LibTypeName = LibBaseName ++ "." ++ atom_to_list(Type), + case (filelib:wildcard( + filename:join( + [PrivDir, + "lib", + LibTypeName ++ "*"])) /= []) orelse + (filelib:wildcard( + filename:join( + [PrivDir, + "lib", + erlang:system_info(system_architecture), + LibTypeName ++ "*"])) /= []) of + true -> LibTypeName; + false -> LibBaseName + end + end, + Lib = filename:join([PrivDir, "lib", LibName]), + Status = case erlang:load_nif(Lib, ?LIB_NIF_VSN) of + ok -> ok; + {error, {load_failed, _}}=Error1 -> + ArchLibDir = + filename:join([PrivDir, "lib", + erlang:system_info(system_architecture)]), + Candidate = + filelib:wildcard(filename:join([ArchLibDir,LibName ++ "*" ])), + case Candidate of + [] -> Error1; + _ -> + ArchLib = filename:join([ArchLibDir, LibName]), + erlang:load_nif(ArchLib, ?LIB_NIF_VSN) + end; + Error1 -> Error1 + end, + case Status of + ok -> ok; + {error, {E, Str}} -> + error_logger:error_msg("Unable to load ~p nif library. " + "Failed with error:~n\"~p, ~s\"~n", [?LIB_APP_NAME, E, Str]), + Status + end. diff --git a/src/nifSrc/etsq/etsq_tests.erl b/src/nifSrc/etsq/etsq_tests.erl new file mode 100644 index 0000000..45a2108 --- /dev/null +++ b/src/nifSrc/etsq/etsq_tests.erl @@ -0,0 +1,65 @@ +%% @author vinod +%% @doc @todo Add description to etsq_tests. + + +-module(etsq_tests). +-compile(export_all). + +-export([init/0, + time/3, + stats/3]). + +-type microseconds() :: pos_integer(). +-type milliseconds() :: pos_integer(). + +%% ==================================================================== +%% API functions +%% ==================================================================== + +init() -> + etsq:new(queue), + ets:new(tab, [named_table, public]). + +-spec time(run_ets | run_queue, pos_integer()) -> microseconds(). +time(Op, NumOp) -> + {Time, _} = timer:tc(?MODULE, Op, [NumOp]), + Time. + +-spec time(pos_integer(), run_ets | run_queue, pos_integer()) -> microseconds(). +time(NumProc, Op, NumOp) -> + {Time, _} = timer:tc(?MODULE, spawn, [NumProc, Op, NumOp]), + Time. + +-spec stats(run_ets | run_queue, pos_integer()) -> milliseconds(). +stats(Op, NumOp) -> + erlang:statistics(runtime), + ?MODULE:Op(NumOp), + {_, Time} = erlang:statistics(runtime), + Time. + +-spec stats(pos_integer(), run_ets | run_queue, pos_integer()) -> milliseconds(). +stats(NumProc, Op, NumOp) -> + erlang:statistics(runtime), + ?MODULE:spawn(NumProc, Op, NumOp), + {_, Time} = erlang:statistics(runtime), + Time. + +run_ets(Num) -> + Self = self(), + Data = lists:seq(1, 100), + L = lists:seq(1, Num), + [ets:insert(tab, {{Self, K}, Data}) || K <- L], + [ets:take(tab, {Self, K}) || K <- L]. + +run_queue(Num) -> + Self = self(), + Data = lists:seq(1, 100), + L = lists:seq(1, Num), + [etsq:push(queue, {{Self, K}, Data}) || K <- L], + [etsq:pop(queue) || _ <- L]. + +spawn(NumProc, Op, NumOp) -> + Pid = self(), + L = lists:seq(1, NumProc), + [spawn_link(fun() -> ?MODULE:Op(NumOp), Pid ! done end) || _ <- L], + [receive done -> ok end || _ <- L]. diff --git a/src/nifSrc/gb_lru/btree_lru.erl b/src/nifSrc/gb_lru/btree_lru.erl new file mode 100644 index 0000000..4238d9b --- /dev/null +++ b/src/nifSrc/gb_lru/btree_lru.erl @@ -0,0 +1,102 @@ +-module(btree_lru). + +-export([create/1, + close/1, + register_pid/2, + unregister_pid/1, + get_registered_pid/1, + set_max_size/2, + get_max_size/1, + get_size/1, + write/2, + write/3, + read/2, + next/2, + prev/2, + remove/2, + seek/2, + iterate_next/2, + oldest/1, + latest/1, + last/1, + first/1]). + + + +-on_load(init/0). + +init() -> + Dir = "../priv", + PrivDir = + case code:priv_dir(?MODULE) of + {error, _} -> + case code:which(?MODULE) of + Filename when is_list(Filename) -> + filename:join([filename:dirname(Filename), Dir]); + _ -> + Dir + end; + Path -> Path + end, + Lib = filename:join(PrivDir, "btreelru_nif"), + erlang:load_nif(Lib, 0). + +write(Tab, {Key, Value}) -> + write(Tab, Key, Value). + +create(_Maxsize) -> + erlang:nif_error(nif_library_not_loaded). + +register_pid(_Tab, _Pid) -> + erlang:nif_error(nif_library_not_loaded). + +unregister_pid(_Tab) -> + erlang:nif_error(nif_library_not_loaded). + +get_registered_pid(_Tab) -> + erlang:nif_error(nif_library_not_loaded). + +set_max_size(_Tab, _MaxSize) -> + erlang:nif_error(nif_library_not_loaded). + +get_max_size(_Tab) -> + erlang:nif_error(nif_library_not_loaded). + +get_size(_Tab) -> + erlang:nif_error(nif_library_not_loaded). + +write(_Tab, _Key, _Value) -> + erlang:nif_error(nif_library_not_loaded). + +read(_Tab, _Key) -> + erlang:nif_error(nif_library_not_loaded). + +next(_Tab, _Key) -> + erlang:nif_error(nif_library_not_loaded). + +prev(_Tab, _Key) -> + erlang:nif_error(nif_library_not_loaded). + +remove(_Tab, _Key) -> + erlang:nif_error(nif_library_not_loaded). + +seek(_Tab, _Key) -> + erlang:nif_error(nif_library_not_loaded). + +iterate_next(_Tab, _It) -> + erlang:nif_error(nif_library_not_loaded). + +oldest(_Tab) -> + erlang:nif_error(nif_library_not_loaded). + +latest(_Tab) -> + erlang:nif_error(nif_library_not_loaded). + +close(_Tab) -> + erlang:nif_error(nif_library_not_loaded). + +last(_Tab) -> + erlang:nif_error(nif_library_not_loaded). + +first(_Tab) -> + erlang:nif_error(nif_library_not_loaded). diff --git a/src/nifSrc/gb_lru/btree_lru_test.erl b/src/nifSrc/gb_lru/btree_lru_test.erl new file mode 100644 index 0000000..8d5cd22 --- /dev/null +++ b/src/nifSrc/gb_lru/btree_lru_test.erl @@ -0,0 +1,59 @@ +-module(btree_lru_test). + +-compile(export_all). + +-export([create/0, + create/1]). + + + +create() -> + create(1024*1024*1024*1000). + +create(Size) -> + {ok, _Tab} = btree_lru:create(Size). + + +write(Tab) -> + Objs = [{X,X} || X <- lists:seq(1,10000000)], + write(Tab, Objs). + +write(Tab, [Obj | Objs]) -> + ok = btree_lru:write(Tab, Obj), + write(Tab, Objs); +write(_Tab, []) -> + ok. + +read(Tab, [{K,D} | Objs]) -> + {K,D} = btree_lru:read(Tab, K), + read(Tab, Objs); +read(_Tab, []) -> + ok. + +timing_write(Tab) -> + Objs = [{X,X} || X <- lists:seq(1,10000000)], + timer:tc(?MODULE, write, [Tab, Objs]). +timing_read(Tab) -> + Objs = [{X,X} || X <- lists:seq(1,10000000)], + timer:tc(?MODULE, read, [Tab, Objs]). + +timing_ets_write(Tab) -> + Objs = [{X,X} || X <- lists:seq(1,10000000)], + timer:tc(?MODULE, ets_write, [Tab, Objs]). + +timing_ets_read(Tab) -> + Objs = [{X,X} || X <- lists:seq(1,10000000)], + timer:tc(?MODULE, ets_read, [Tab, Objs]). + +ets_write(Tab, [Obj | Objs]) -> + true = ets:insert(Tab, Obj), + ets_write(Tab, Objs); +ets_write(_Tab, []) -> + ok. + +ets_read(Tab, [{K,D} | Objs]) -> + [{K,D}] = ets:lookup(Tab, K), + ets_read(Tab, Objs); +ets_read(_Tab, []) -> + ok. + diff --git a/src/nifSrc/gb_lru/gb_lru.app.src b/src/nifSrc/gb_lru/gb_lru.app.src new file mode 100644 index 0000000..efff174 --- /dev/null +++ b/src/nifSrc/gb_lru/gb_lru.app.src @@ -0,0 +1,6 @@ +{application, gb_lru, + [{description, "gb_lru"}, + {vsn, "0.1"}, + {registered, []}, + {applications, []} + ]}. diff --git a/src/nifSrc/native_array/native_array.erl b/src/nifSrc/native_array/native_array.erl new file mode 100644 index 0000000..09e7ab8 --- /dev/null +++ b/src/nifSrc/native_array/native_array.erl @@ -0,0 +1,19 @@ +-module(native_array). +-export([new/2, get/2, put/3, delete/1]). +-on_load(init/0). + +init() -> + ok = erlang:load_nif("./native_array_nif", 0). + +new(_Idx, _Length) -> + exit(nif_library_not_loaded). + +get(_Idx, _Offset) -> + exit(nif_library_not_loaded). + +put(_Idx, _Offset, _NewVal) -> + exit(nif_library_not_loaded). + +delete(_Idx) -> + exit(nif_library_not_loaded). +