nif相关整理添加

5 年前 · c7be845914
--- a/.gitignore
+++ b/.gitignore
@ -8,7 +8,7 @@ erl_crash.dump
 # rebar 2.x
 .rebar
 rel/example_project
 ebin/*.beam
 ebin/*
 deps

 # rebar 3
@ -16,7 +16,10 @@ deps
 _build/
 _checkouts/
 rebar.lock
 priv

 # idea
 .idea
 *.iml
 *.iml
 cmake-build*
 CMakeLists.txt
--- a/c_src/Makefile
+++ b/c_src/Makefile
@ -0,0 +1,15 @@
 AllNifDirs :=$(shell ls -l | grep ^d | awk '{print $$9}')

 CurDir  := $(shell pwd)
 MdPriv  := $(shell mkdir -p $(CurDir)/../priv/)

 ECHO:
 	@echo $(AllNifDirs)

 all:$(AllNifDirs)

 $(AllNifDirs):ECHO
 	@make -C $@

 clean:
 	@for OneNifDir in $(AllNifDirs); do make -C $$OneNifDir clean; done
--- a/c_src/cq/Makefile
+++ b/c_src/cq/Makefile
@ -0,0 +1,73 @@
 # Based on c_src.mk from erlang.mk by Loic Hoguin <essen@ninenines.eu>

 CURDIR := $(shell pwd)
 BASEDIR := $(abspath $(CURDIR))

 PROJECT ?= $(notdir $(BASEDIR))
 PROJECT := $(strip $(PROJECT))

 ERTS_INCLUDE_DIR ?= $(shell erl -noshell -s init stop -eval "io:format(\"~s/erts-~s/include/\", [code:root_dir(), erlang:system_info(version)]).")
 ERL_INTERFACE_INCLUDE_DIR ?= $(shell erl -noshell -s init stop -eval "io:format(\"~s\", [code:lib_dir(erl_interface, include)]).")
 ERL_INTERFACE_LIB_DIR ?= $(shell erl -noshell -s init stop -eval "io:format(\"~s\", [code:lib_dir(erl_interface, lib)]).")

 C_SRC_DIR = $(CURDIR)
 C_SRC_OUTPUT ?= $(CURDIR)/../../priv/$(PROJECT).so

 # System type and C compiler/flags.

 UNAME_SYS := $(shell uname -s)
 ifeq ($(UNAME_SYS), Darwin)
 	CC ?= cc
 	CFLAGS ?= -O3 -std=c99 -arch x86_64 -finline-functions -Wall -Wmissing-prototypes
 	CXXFLAGS ?= -O3 -arch x86_64 -finline-functions -Wall
 	LDFLAGS ?= -arch x86_64 -flat_namespace -undefined suppress
 else ifeq ($(UNAME_SYS), FreeBSD)
 	CC ?= cc
 	CFLAGS ?= -O3 -std=c99 -finline-functions -Wall -Wmissing-prototypes
 	CXXFLAGS ?= -O3 -finline-functions -Wall
 else ifeq ($(UNAME_SYS), Linux)
 	CC ?= gcc
 	CFLAGS ?= -O3 -std=c99 -finline-functions -Wall -Wmissing-prototypes
 	CXXFLAGS ?= -O3 -finline-functions -Wall
 endif

 CFLAGS += -fPIC -I $(ERTS_INCLUDE_DIR) -I $(ERL_INTERFACE_INCLUDE_DIR)
 CXXFLAGS += -fPIC -I $(ERTS_INCLUDE_DIR) -I $(ERL_INTERFACE_INCLUDE_DIR)

 LDLIBS += -L $(ERL_INTERFACE_LIB_DIR) -lerl_interface -lei
 LDFLAGS += -shared

 # Verbosity.

 c_verbose_0 = @echo " C     " $(?F);
 c_verbose = $(c_verbose_$(V))

 cpp_verbose_0 = @echo " CPP   " $(?F);
 cpp_verbose = $(cpp_verbose_$(V))

 link_verbose_0 = @echo " LD    " $(@F);
 link_verbose = $(link_verbose_$(V))

 SOURCES := $(shell find $(C_SRC_DIR) -type f \( -name "*.c" -o -name "*.C" -o -name "*.cc" -o -name "*.cpp" \))
 OBJECTS = $(addsuffix .o, $(basename $(SOURCES)))

 COMPILE_C = $(c_verbose) $(CC) $(CFLAGS) $(CPPFLAGS) -c
 COMPILE_CPP = $(cpp_verbose) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c

 $(C_SRC_OUTPUT): $(OBJECTS)
 	$(link_verbose) $(CC) $(OBJECTS) $(LDFLAGS) $(LDLIBS) -o $(C_SRC_OUTPUT)

 %.o: %.c
 	$(COMPILE_C) $(OUTPUT_OPTION) $<

 %.o: %.cc
 	$(COMPILE_CPP) $(OUTPUT_OPTION) $<

 %.o: %.C
 	$(COMPILE_CPP) $(OUTPUT_OPTION) $<

 %.o: %.cpp
 	$(COMPILE_CPP) $(OUTPUT_OPTION) $<

 clean:
 	@rm -f $(C_SRC_OUTPUT) $(OBJECTS)
--- a/c_src/cq/cq_nif.c
+++ b/c_src/cq/cq_nif.c
@ -0,0 +1,564 @@
 #include <stdio.h>
 #include <unistd.h>

 #include "erl_nif.h"
 #include "cq_nif.h"


 /* #ifndef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
 # error Requires dirty schedulers
 #endif */





 ERL_NIF_TERM
 mk_atom(ErlNifEnv* env, const char* atom)
 {
    ERL_NIF_TERM ret;

    if(!enif_make_existing_atom(env, atom, &ret, ERL_NIF_LATIN1))
        return enif_make_atom(env, atom);

    return ret;
 }

 ERL_NIF_TERM
 mk_error(ErlNifEnv* env, const char* mesg)
 {
    return enif_make_tuple2(env, mk_atom(env, "error"), mk_atom(env, mesg));
 }


 static ERL_NIF_TERM
 queue_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {
    cq_t *q = enif_alloc_resource(CQ_RESOURCE, sizeof(cq_t));
    if (q == NULL)
        return mk_error(env, "priv_alloc_error");

    ERL_NIF_TERM ret = enif_make_resource(env, q);
    /* enif_release_resource(ret); */

    uint32_t queue_id = 0;
    uint32_t queue_size = 0;
    uint32_t overflow_size = 0;

    if (!enif_get_uint(env, argv[0], &queue_id) ||
        !enif_get_uint(env, argv[1], &queue_size) ||
        !enif_get_uint(env, argv[2], &overflow_size))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "bad_queue_id");

    /* TODO: Check that queue_size is power of 2 */

    if (QUEUES[queue_id] != NULL)
        return mk_error(env, "queue_id_already_exists");

    q->id             = queue_id;
    q->queue_size     = queue_size;
    q->overflow_size  = overflow_size;
    q->tail           = 0;
    q->head           = 0;
    q->slots_states   = calloc(q->queue_size, CACHE_LINE_SIZE);
    q->slots_terms    = calloc(q->queue_size, CACHE_LINE_SIZE);
    q->slots_envs     = calloc(q->queue_size, CACHE_LINE_SIZE);
    q->overflow_terms = calloc(q->overflow_size, CACHE_LINE_SIZE);
    q->overflow_envs  = calloc(q->queue_size, CACHE_LINE_SIZE);

    q->push_queue = new_queue();
    q->pop_queue = new_queue();

    /* TODO: Check calloc return */


    for (int i = 0; i < q->queue_size; i++) {
        ErlNifEnv *slot_env = enif_alloc_env();

        q->slots_envs[i*CACHE_LINE_SIZE] = slot_env;
        //q->overflow_envs[i*CACHE_LINE_SIZE] = (ErlNifEnv *) enif_alloc_env();
    }

    QUEUES[q->id] = q;

    return enif_make_tuple2(env, mk_atom(env, "ok"), ret);
 }


 static ERL_NIF_TERM
 queue_free(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {
    uint32_t queue_id = 0;

    if (!enif_get_uint(env, argv[0], &queue_id))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "badarg");

    cq_t *q = QUEUES[queue_id];
    if (q == NULL)
        return mk_error(env, "bad_queue_id");


    /* TODO: Free all the things! */
    QUEUES[queue_id] = NULL;

    return enif_make_atom(env, "ok");

 }

 /* Push to the head of the queue. */
 static ERL_NIF_TERM
 queue_push(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {
    uint32_t queue_id = 0;

    if (!enif_get_uint(env, argv[0], &queue_id))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "badarg");

    /* Load the queue */
    cq_t *q = QUEUES[queue_id];
    if (q == NULL)
        return mk_error(env, "bad_queue_id");

    if (q->id != queue_id)
        return mk_error(env, "not_identical_queue_id");


    for (int i = 0; i < q->queue_size; i++) {
        fprintf(stderr, "queue slot %d, index %d, state %d\n",
                i, i*CACHE_LINE_SIZE, q->slots_states[i*CACHE_LINE_SIZE]);
    }

    /* If there's consumers waiting, the queue must be empty and we
       should directly pick a consumer to notify. */

    ErlNifPid *waiting_consumer;
    int dequeue_ret = dequeue(q->pop_queue, &waiting_consumer);
    if (dequeue_ret) {
        ErlNifEnv *msg_env = enif_alloc_env();
        ERL_NIF_TERM copy = enif_make_copy(msg_env, argv[1]);
        ERL_NIF_TERM tuple = enif_make_tuple2(msg_env, mk_atom(env, "pop"), copy);

        if (enif_send(env, waiting_consumer, msg_env, tuple)) {
            enif_free_env(msg_env);
            return mk_atom(env, "ok");
        } else {
            return mk_error(env, "notify_failed");
        }
    }



    /* Increment head and attempt to claim the slot by marking it as
       busy. This ensures no other thread will attempt to modify this
       slot. If we cannot lock it, another thread must have */

    uint64_t head = __sync_add_and_fetch(&q->head, 1);
    size_t size = q->queue_size;

    while (1) {
        uint64_t index = SLOT_INDEX(head, size);
        uint64_t ret = __sync_val_compare_and_swap(&q->slots_states[index],
                                                   STATE_EMPTY,
                                                   STATE_WRITE);

        switch (ret) {

        case STATE_EMPTY:
            head = __sync_add_and_fetch(&q->head, 1);

        case STATE_WRITE:
            /* We acquired the write lock, go ahead with the write. */
            break;

        case STATE_FULL:
            /* We have caught up with the tail and the buffer is
               full. Block the producer until a consumer reads the
               item. */
            return mk_error(env, "full_not_implemented");
        }
    }

    /* If head catches up with tail, the queue is full. Add to
       overflow instead */


    /* Copy term to slot-specific temporary process env. */
    ERL_NIF_TERM copy = enif_make_copy(q->slots_envs[SLOT_INDEX(head, size)], argv[1]);
    q->slots_terms[SLOT_INDEX(head, size)] = copy;

    __sync_synchronize(); /* Or compiler memory barrier? */


    /* TODO: Do we need to collect garbage? */


    /* Mark the slot ready to be consumed */
    if (__sync_bool_compare_and_swap(&q->slots_states[SLOT_INDEX(head, size)],
                                     STATE_WRITE,
                                     STATE_FULL)) {
        return mk_atom(env, "ok");
    } else {
        return mk_error(env, "could_not_update_slots_after_insert");
    }

 }



 static ERL_NIF_TERM
 queue_async_pop(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {
    /* Load queue */

    uint32_t queue_id = 0;

    if (!enif_get_uint(env, argv[0], &queue_id))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "badarg");

    cq_t *q = QUEUES[queue_id];
    if (q == NULL)
        return mk_error(env, "bad_queue_id");

    if (q->id != queue_id)
        return mk_error(env, "not_identical_queue_id");

    uint64_t qsize = q->queue_size;
    uint64_t tail = q->tail;
    uint64_t num_busy = 0;

    /* Walk the buffer starting the tail position until we are either
       able to consume a term or find an empty slot. */
    while (1) {
        uint64_t index = SLOT_INDEX(tail, qsize);
        uint64_t ret = __sync_val_compare_and_swap(&q->slots_states[index],
                                                   STATE_FULL,
                                                   STATE_READ);

        if (ret == STATE_READ) {
            /* We were able to mark the term as read in progress. We
               now have an exclusive lock. */
            break;

        } else if (ret == STATE_WRITE) {
            /* We found an item with a write in progress. If that
               thread progresses, it will eventually mark the slot as
               full. We can spin until that happens.

               This can take an arbitrary amount of time and multiple
               reading threads will compete for the same slot.

               Instead we add the caller to the queue of blocking
               consumers. When the next producer comes it will "help"
               this thread by calling enif_send on the current
               in-progress term *and* handle it's own terms. If
               there's no new push to the queue, this will block
               forever. */
            return mk_atom(env, "write_in_progress_not_implemented");

        } else if (ret == STATE_EMPTY) {
            /* We found an empty item. Queue must be empty. Add
               calling Erlang consumer process to queue of waiting
               processes. When the next producer comes along, it first
               checks the waiting consumers and calls enif_send
               instead of writing to the slots. */

            ErlNifPid *pid = enif_alloc(sizeof(ErlNifPid));
            pid = enif_self(env, pid);
            enqueue(q->pop_queue, pid);

            return mk_atom(env, "wait_for_msg");

        } else {
            tail = __sync_add_and_fetch(&q->tail, 1);
        }
    }


    /* Copy term into calling process env. The NIF env can now be
       gargbage collected. */
    ERL_NIF_TERM copy = enif_make_copy(env, q->slots_terms[SLOT_INDEX(tail, qsize)]);


    /* Mark the slot as free. Note: We don't increment the tail
       position here, as another thread also walking the buffer might
       have incremented it multiple times */
    q->slots_terms[SLOT_INDEX(tail, qsize)] = 0;
    if (__sync_bool_compare_and_swap(&q->slots_states[SLOT_INDEX(tail, qsize)],
                                     STATE_READ,
                                     STATE_EMPTY)) {
        return enif_make_tuple2(env, mk_atom(env, "ok"), copy);
    } else {
        return mk_error(env, "could_not_update_slots_after_pop");
    }
 }


 static ERL_NIF_TERM
 queue_debug(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {

    uint32_t queue_id = 0;

    if (!enif_get_uint(env, argv[0], &queue_id))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "badarg");

    cq_t *q = QUEUES[queue_id];
    if (q == NULL)
        return mk_error(env, "bad_queue_id");



    ERL_NIF_TERM *slots_states = enif_alloc(sizeof(ERL_NIF_TERM) * q->queue_size);
    ERL_NIF_TERM *slots_terms = enif_alloc(sizeof(ERL_NIF_TERM) * q->queue_size);
    for (int i = 0; i < q->queue_size; i++) {
        slots_states[i] = enif_make_int(env, q->slots_states[i * CACHE_LINE_SIZE]);

        if (q->slots_terms[i * CACHE_LINE_SIZE] == 0) {
            slots_terms[i] = mk_atom(env, "null");
        } else {
            slots_terms[i] = enif_make_copy(env, q->slots_terms[i * CACHE_LINE_SIZE]);
        }
    }
    return enif_make_tuple4(env,
                            enif_make_uint64(env, q->tail),
                            enif_make_uint64(env, q->head),
                            enif_make_list_from_array(env, slots_states, q->queue_size),
                            enif_make_list_from_array(env, slots_terms, q->queue_size));
 }

 static ERL_NIF_TERM
 queue_debug_poppers(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {
    uint32_t queue_id = 0;

    if (!enif_get_uint(env, argv[0], &queue_id))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "badarg");

    cq_t *q = QUEUES[queue_id];
    if (q == NULL)
        return mk_error(env, "bad_queue_id");


    uint64_t pop_queue_size = 0;
    cq_node_t *node = q->pop_queue->head;
    if (node->value == NULL) {
        node = node->next;
        node = Q_PTR(node);
    }

    while (node != NULL) {
        pop_queue_size++;
        node = node->next;
        node = Q_PTR(node);
    }

    ERL_NIF_TERM *pop_queue_pids = enif_alloc(sizeof(ERL_NIF_TERM) * pop_queue_size);

    node = q->pop_queue->head;
    node = Q_PTR(node);
    if (node->value == NULL) {
        node = node->next;
        node = Q_PTR(node);
    }

    uint64_t i = 0;
    while (node != NULL) {
        if (node->value == 0) {
            pop_queue_pids[i] = mk_atom(env, "null");
        }
        else {
            pop_queue_pids[i] = enif_make_pid(env, node->value);
        }

        i++;
        node = node->next;
        node = Q_PTR(node);
    }

    ERL_NIF_TERM list = enif_make_list_from_array(env, pop_queue_pids, pop_queue_size);
    enif_free(pop_queue_pids);

    return list;
 }



 static ERL_NIF_TERM
 print_bits(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {

    uint64_t *p1 = malloc(8);
    *p1 = 0;


    for (int bit = 63; bit >= 0; bit--) {
        uint64_t power = 1 << bit;
        //uint64_t byte = *p1;
        uint64_t byte = p1;
        fprintf(stderr, "%d", (byte & power) >> bit);
    }
    fprintf(stderr, "\n");

    //enif_free(p1);

    return mk_atom(env, "ok");
 }

 void free_resource(ErlNifEnv* env, void* arg)
 {
    //cq_t *cq = (cq_t *) arg;

    fprintf(stderr, "free_resource\n");
 }


 cq_queue_t * new_queue()
 {
    cq_queue_t *queue = enif_alloc(sizeof(cq_queue_t));
    cq_node_t *node = enif_alloc(sizeof(cq_node_t));
    node->next = NULL;
    //node->env = NULL;
    node->value = NULL;
    queue->head = node;
    queue->tail = node;

    return queue;
 }



 void enqueue(cq_queue_t *queue, ErlNifPid *pid)
 {
    cq_node_t *node = enif_alloc(sizeof(cq_node_t));
    //node->env = enif_alloc_env();
    //node->term = enif_make_copy(node->env, term);
    node->value = pid;
    node->next = NULL;
    fprintf(stderr, "node %lu\n", node);

    cq_node_t *tail = NULL;
    uint64_t tail_count = 0;
    while (1) {
        tail = queue->tail;
        cq_node_t *tail_ptr = Q_PTR(tail);
        tail_count = Q_COUNT(tail);

        cq_node_t *next = tail->next;
        cq_node_t *next_ptr = Q_PTR(next);
        uint64_t next_count = Q_COUNT(next);

        if (tail == queue->tail) {
            fprintf(stderr, "tail == queue->tail\n");
            if (next_ptr == NULL) {
                fprintf(stderr, "next_ptr == NULL\n");
                if (__sync_bool_compare_and_swap(&tail_ptr->next,
                                                 next,
                                                 Q_SET_COUNT(node, next_count+1)))
                    fprintf(stderr, "CAS(tail_ptr->next, next, (node, next_count+1)) -> true\n");
                    break;
            } else {
                __sync_bool_compare_and_swap(&queue->tail,
                                             tail,
                                             Q_SET_COUNT(next_ptr, next_count+1));
                    fprintf(stderr, "CAS(queue->tail, tail, (next_ptr, next_count+1))\n");
            }
        }
    }

    cq_node_t *node_with_count = Q_SET_COUNT(node, tail_count+1);
    int ret = __sync_bool_compare_and_swap(&queue->tail,
                                           tail,
                                           node_with_count);
    fprintf(stderr, "CAS(queue->tail, tail, %lu) -> %d\n", node_with_count, ret);
 }


 int dequeue(cq_queue_t *queue, ErlNifPid **pid)
 {
    fprintf(stderr, "dequeue\n");
    cq_node_t *head, *head_ptr, *tail, *tail_ptr, *next, *next_ptr;

    while (1) {
        head = queue->head;
        head_ptr = Q_PTR(head);
        tail = queue->tail;
        tail_ptr = Q_PTR(tail);
        next = head->next;
        next_ptr = Q_PTR(next);
        fprintf(stderr, "head %lu, tail %lu, next %lu\n", head, tail, next);

        if (head == queue->head) {
            if (head_ptr == tail_ptr) {
                if (next_ptr == NULL) {
                    return 0; /* Queue is empty */
                }
                fprintf(stderr, "CAS(queue->tail, tail, (next_ptr, tail+1))\n");
                __sync_bool_compare_and_swap(&queue->tail,
                                             tail,
                                             Q_SET_COUNT(next_ptr, Q_COUNT(tail)+1));
            } else {
                fprintf(stderr, "next->value %lu\n", next_ptr->value);
                *pid = next_ptr->value;
                fprintf(stderr, "CAS(queue->head, head, (next_ptr, head+1))\n");
                if (__sync_bool_compare_and_swap(&queue->head,
                                                 head,
                                                 Q_SET_COUNT(next_ptr, Q_COUNT(head)+1)))
                    break;
            }
        }
    }
    // free pid
    //enif_free(Q_PTR(head));
    return 1;
 }




 int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info) {
    /* Initialize global array mapping id to cq_t ptr */
    QUEUES = (cq_t **) calloc(8, sizeof(cq_t **));
    if (QUEUES == NULL)
        return -1;


    ErlNifResourceFlags flags = (ErlNifResourceFlags)(ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER);
    CQ_RESOURCE = enif_open_resource_type(env, "cq", "cq",
                                          &free_resource, flags, NULL);

    if (CQ_RESOURCE == NULL)
        return -1;

    return 0;
 }


 static ErlNifFunc nif_funcs[] = {
    {"new"      , 3, queue_new},
    {"free"     , 1, queue_free},
    {"push"     , 2, queue_push},
    {"async_pop", 1, queue_async_pop},
    {"debug"    , 1, queue_debug},
    {"debug_poppers", 1, queue_debug_poppers},
    {"print_bits", 0, print_bits}
 };

 ERL_NIF_INIT(cq, nif_funcs, load, NULL, NULL, NULL);
--- a/c_src/cq/cq_nif.h
+++ b/c_src/cq/cq_nif.h
@ -0,0 +1,71 @@
 #include <stdint.h>
 #include "erl_nif.h"


 #define CACHE_LINE_SIZE 64

 #define SLOT_INDEX(__index, __size) __index & (__size - 1)

 #define Q_MASK 3L
 #define Q_PTR(__ptr) (cq_node_t *) (((uint64_t)__ptr) & (~Q_MASK))
 #define Q_COUNT(__ptr) ((uint64_t) __ptr & Q_MASK)
 #define Q_SET_COUNT(__ptr, __val) (cq_node_t *) ((uint64_t) __ptr | (__val & Q_MASK))


 #define STATE_EMPTY 0
 #define STATE_WRITE 1
 #define STATE_READ  2
 #define STATE_FULL  3


 ErlNifResourceType* CQ_RESOURCE;

 typedef struct cq_node cq_node_t;

 struct cq_node {
    ErlNifEnv *env;
    //ERL_NIF_TERM term;
    ErlNifPid *value;
    cq_node_t *next;
 };



 typedef struct cq_queue {
    cq_node_t *head;
    cq_node_t *tail;
 } cq_queue_t;


 // TODO: Add padding between the fields
 typedef struct cq {
    uint32_t id;
    uint64_t queue_size;
    uint64_t overflow_size;
    uint64_t head;
    uint64_t tail;

    uint8_t       *slots_states;
    ERL_NIF_TERM  *slots_terms;
    ErlNifEnv    **slots_envs;

    cq_queue_t *push_queue;
    cq_queue_t *pop_queue;

    uint8_t       *overflow_states;
    ERL_NIF_TERM  *overflow_terms;
    ErlNifEnv    **overflow_envs;

 } cq_t;

 cq_t **QUEUES = NULL; /* Initialized on nif load */


 ERL_NIF_TERM mk_atom(ErlNifEnv* env, const char* atom);
 ERL_NIF_TERM mk_error(ErlNifEnv* env, const char* msg);
 int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info);
 void free_resource(ErlNifEnv*, void*);


 cq_queue_t* new_queue(void);
 void enqueue(cq_queue_t *q, ErlNifPid *pid);
--- a/c_src/cq1/Makefile
+++ b/c_src/cq1/Makefile
@ -0,0 +1,73 @@
 # Based on c_src.mk from erlang.mk by Loic Hoguin <essen@ninenines.eu>

 CURDIR := $(shell pwd)
 BASEDIR := $(abspath $(CURDIR))

 PROJECT ?= $(notdir $(BASEDIR))
 PROJECT := $(strip $(PROJECT))

 ERTS_INCLUDE_DIR ?= $(shell erl -noshell -s init stop -eval "io:format(\"~s/erts-~s/include/\", [code:root_dir(), erlang:system_info(version)]).")
 ERL_INTERFACE_INCLUDE_DIR ?= $(shell erl -noshell -s init stop -eval "io:format(\"~s\", [code:lib_dir(erl_interface, include)]).")
 ERL_INTERFACE_LIB_DIR ?= $(shell erl -noshell -s init stop -eval "io:format(\"~s\", [code:lib_dir(erl_interface, lib)]).")

 C_SRC_DIR = $(CURDIR)
 C_SRC_OUTPUT ?= $(CURDIR)/../../priv/$(PROJECT).so

 # System type and C compiler/flags.

 UNAME_SYS := $(shell uname -s)
 ifeq ($(UNAME_SYS), Darwin)
 	CC ?= cc
 	CFLAGS ?= -O3 -std=c99 -arch x86_64 -finline-functions -Wall -Wmissing-prototypes
 	CXXFLAGS ?= -O3 -arch x86_64 -finline-functions -Wall
 	LDFLAGS ?= -arch x86_64 -flat_namespace -undefined suppress
 else ifeq ($(UNAME_SYS), FreeBSD)
 	CC ?= cc
 	CFLAGS ?= -O3 -std=c99 -finline-functions -Wall -Wmissing-prototypes
 	CXXFLAGS ?= -O3 -finline-functions -Wall
 else ifeq ($(UNAME_SYS), Linux)
 	CC ?= gcc
 	CFLAGS ?= -O3 -std=c99 -finline-functions -Wall -Wmissing-prototypes
 	CXXFLAGS ?= -O3 -finline-functions -Wall
 endif

 CFLAGS += -fPIC -I $(ERTS_INCLUDE_DIR) -I $(ERL_INTERFACE_INCLUDE_DIR)
 CXXFLAGS += -fPIC -I $(ERTS_INCLUDE_DIR) -I $(ERL_INTERFACE_INCLUDE_DIR)

 LDLIBS += -L $(ERL_INTERFACE_LIB_DIR) -lerl_interface -lei
 LDFLAGS += -shared

 # Verbosity.

 c_verbose_0 = @echo " C     " $(?F);
 c_verbose = $(c_verbose_$(V))

 cpp_verbose_0 = @echo " CPP   " $(?F);
 cpp_verbose = $(cpp_verbose_$(V))

 link_verbose_0 = @echo " LD    " $(@F);
 link_verbose = $(link_verbose_$(V))

 SOURCES := $(shell find $(C_SRC_DIR) -type f \( -name "*.c" -o -name "*.C" -o -name "*.cc" -o -name "*.cpp" \))
 OBJECTS = $(addsuffix .o, $(basename $(SOURCES)))

 COMPILE_C = $(c_verbose) $(CC) $(CFLAGS) $(CPPFLAGS) -c
 COMPILE_CPP = $(cpp_verbose) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c

 $(C_SRC_OUTPUT): $(OBJECTS)
 	$(link_verbose) $(CC) $(OBJECTS) $(LDFLAGS) $(LDLIBS) -o $(C_SRC_OUTPUT)

 %.o: %.c
 	$(COMPILE_C) $(OUTPUT_OPTION) $<

 %.o: %.cc
 	$(COMPILE_CPP) $(OUTPUT_OPTION) $<

 %.o: %.C
 	$(COMPILE_CPP) $(OUTPUT_OPTION) $<

 %.o: %.cpp
 	$(COMPILE_CPP) $(OUTPUT_OPTION) $<

 clean:
 	@rm -f $(C_SRC_OUTPUT) $(OBJECTS)
--- a/c_src/cq1/cq_nif.c
+++ b/c_src/cq1/cq_nif.c
@ -0,0 +1,564 @@
 #include <stdio.h>
 #include <unistd.h>

 #include "erl_nif.h"
 #include "cq_nif.h"


 /* #ifndef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
 # error Requires dirty schedulers
 #endif */





 ERL_NIF_TERM
 mk_atom(ErlNifEnv* env, const char* atom)
 {
    ERL_NIF_TERM ret;

    if(!enif_make_existing_atom(env, atom, &ret, ERL_NIF_LATIN1))
        return enif_make_atom(env, atom);

    return ret;
 }

 ERL_NIF_TERM
 mk_error(ErlNifEnv* env, const char* mesg)
 {
    return enif_make_tuple2(env, mk_atom(env, "error"), mk_atom(env, mesg));
 }


 static ERL_NIF_TERM
 queue_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {
    cq_t *q = enif_alloc_resource(CQ_RESOURCE, sizeof(cq_t));
    if (q == NULL)
        return mk_error(env, "priv_alloc_error");

    ERL_NIF_TERM ret = enif_make_resource(env, q);
    /* enif_release_resource(ret); */

    uint32_t queue_id = 0;
    uint32_t queue_size = 0;
    uint32_t overflow_size = 0;

    if (!enif_get_uint(env, argv[0], &queue_id) ||
        !enif_get_uint(env, argv[1], &queue_size) ||
        !enif_get_uint(env, argv[2], &overflow_size))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "bad_queue_id");

    /* TODO: Check that queue_size is power of 2 */

    if (QUEUES[queue_id] != NULL)
        return mk_error(env, "queue_id_already_exists");

    q->id             = queue_id;
    q->queue_size     = queue_size;
    q->overflow_size  = overflow_size;
    q->tail           = 0;
    q->head           = 0;
    q->slots_states   = calloc(q->queue_size, CACHE_LINE_SIZE);
    q->slots_terms    = calloc(q->queue_size, CACHE_LINE_SIZE);
    q->slots_envs     = calloc(q->queue_size, CACHE_LINE_SIZE);
    q->overflow_terms = calloc(q->overflow_size, CACHE_LINE_SIZE);
    q->overflow_envs  = calloc(q->queue_size, CACHE_LINE_SIZE);

    q->push_queue = new_queue();
    q->pop_queue = new_queue();

    /* TODO: Check calloc return */


    for (int i = 0; i < q->queue_size; i++) {
        ErlNifEnv *slot_env = enif_alloc_env();

        q->slots_envs[i*CACHE_LINE_SIZE] = slot_env;
        //q->overflow_envs[i*CACHE_LINE_SIZE] = (ErlNifEnv *) enif_alloc_env();
    }

    QUEUES[q->id] = q;

    return enif_make_tuple2(env, mk_atom(env, "ok"), ret);
 }


 static ERL_NIF_TERM
 queue_free(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {
    uint32_t queue_id = 0;

    if (!enif_get_uint(env, argv[0], &queue_id))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "badarg");

    cq_t *q = QUEUES[queue_id];
    if (q == NULL)
        return mk_error(env, "bad_queue_id");


    /* TODO: Free all the things! */
    QUEUES[queue_id] = NULL;

    return enif_make_atom(env, "ok");

 }

 /* Push to the head of the queue. */
 static ERL_NIF_TERM
 queue_push(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {
    uint32_t queue_id = 0;

    if (!enif_get_uint(env, argv[0], &queue_id))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "badarg");

    /* Load the queue */
    cq_t *q = QUEUES[queue_id];
    if (q == NULL)
        return mk_error(env, "bad_queue_id");

    if (q->id != queue_id)
        return mk_error(env, "not_identical_queue_id");


    for (int i = 0; i < q->queue_size; i++) {
        fprintf(stderr, "queue slot %d, index %d, state %d\n",
                i, i*CACHE_LINE_SIZE, q->slots_states[i*CACHE_LINE_SIZE]);
    }

    /* If there's consumers waiting, the queue must be empty and we
       should directly pick a consumer to notify. */

    ErlNifPid *waiting_consumer;
    int dequeue_ret = dequeue(q->pop_queue, &waiting_consumer);
    if (dequeue_ret) {
        ErlNifEnv *msg_env = enif_alloc_env();
        ERL_NIF_TERM copy = enif_make_copy(msg_env, argv[1]);
        ERL_NIF_TERM tuple = enif_make_tuple2(msg_env, mk_atom(env, "pop"), copy);

        if (enif_send(env, waiting_consumer, msg_env, tuple)) {
            enif_free_env(msg_env);
            return mk_atom(env, "ok");
        } else {
            return mk_error(env, "notify_failed");
        }
    }



    /* Increment head and attempt to claim the slot by marking it as
       busy. This ensures no other thread will attempt to modify this
       slot. If we cannot lock it, another thread must have */

    uint64_t head = __sync_add_and_fetch(&q->head, 1);
    size_t size = q->queue_size;

    while (1) {
        uint64_t index = SLOT_INDEX(head, size);
        uint64_t ret = __sync_val_compare_and_swap(&q->slots_states[index],
                                                   STATE_EMPTY,
                                                   STATE_WRITE);

        switch (ret) {

        case STATE_EMPTY:
            head = __sync_add_and_fetch(&q->head, 1);

        case STATE_WRITE:
            /* We acquired the write lock, go ahead with the write. */
            break;

        case STATE_FULL:
            /* We have caught up with the tail and the buffer is
               full. Block the producer until a consumer reads the
               item. */
            return mk_error(env, "full_not_implemented");
        }
    }

    /* If head catches up with tail, the queue is full. Add to
       overflow instead */


    /* Copy term to slot-specific temporary process env. */
    ERL_NIF_TERM copy = enif_make_copy(q->slots_envs[SLOT_INDEX(head, size)], argv[1]);
    q->slots_terms[SLOT_INDEX(head, size)] = copy;

    __sync_synchronize(); /* Or compiler memory barrier? */


    /* TODO: Do we need to collect garbage? */


    /* Mark the slot ready to be consumed */
    if (__sync_bool_compare_and_swap(&q->slots_states[SLOT_INDEX(head, size)],
                                     STATE_WRITE,
                                     STATE_FULL)) {
        return mk_atom(env, "ok");
    } else {
        return mk_error(env, "could_not_update_slots_after_insert");
    }

 }



 static ERL_NIF_TERM
 queue_async_pop(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {
    /* Load queue */

    uint32_t queue_id = 0;

    if (!enif_get_uint(env, argv[0], &queue_id))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "badarg");

    cq_t *q = QUEUES[queue_id];
    if (q == NULL)
        return mk_error(env, "bad_queue_id");

    if (q->id != queue_id)
        return mk_error(env, "not_identical_queue_id");

    uint64_t qsize = q->queue_size;
    uint64_t tail = q->tail;
    uint64_t num_busy = 0;

    /* Walk the buffer starting the tail position until we are either
       able to consume a term or find an empty slot. */
    while (1) {
        uint64_t index = SLOT_INDEX(tail, qsize);
        uint64_t ret = __sync_val_compare_and_swap(&q->slots_states[index],
                                                   STATE_FULL,
                                                   STATE_READ);

        if (ret == STATE_READ) {
            /* We were able to mark the term as read in progress. We
               now have an exclusive lock. */
            break;

        } else if (ret == STATE_WRITE) {
            /* We found an item with a write in progress. If that
               thread progresses, it will eventually mark the slot as
               full. We can spin until that happens.

               This can take an arbitrary amount of time and multiple
               reading threads will compete for the same slot.

               Instead we add the caller to the queue of blocking
               consumers. When the next producer comes it will "help"
               this thread by calling enif_send on the current
               in-progress term *and* handle it's own terms. If
               there's no new push to the queue, this will block
               forever. */
            return mk_atom(env, "write_in_progress_not_implemented");

        } else if (ret == STATE_EMPTY) {
            /* We found an empty item. Queue must be empty. Add
               calling Erlang consumer process to queue of waiting
               processes. When the next producer comes along, it first
               checks the waiting consumers and calls enif_send
               instead of writing to the slots. */

            ErlNifPid *pid = enif_alloc(sizeof(ErlNifPid));
            pid = enif_self(env, pid);
            enqueue(q->pop_queue, pid);

            return mk_atom(env, "wait_for_msg");

        } else {
            tail = __sync_add_and_fetch(&q->tail, 1);
        }
    }


    /* Copy term into calling process env. The NIF env can now be
       gargbage collected. */
    ERL_NIF_TERM copy = enif_make_copy(env, q->slots_terms[SLOT_INDEX(tail, qsize)]);


    /* Mark the slot as free. Note: We don't increment the tail
       position here, as another thread also walking the buffer might
       have incremented it multiple times */
    q->slots_terms[SLOT_INDEX(tail, qsize)] = 0;
    if (__sync_bool_compare_and_swap(&q->slots_states[SLOT_INDEX(tail, qsize)],
                                     STATE_READ,
                                     STATE_EMPTY)) {
        return enif_make_tuple2(env, mk_atom(env, "ok"), copy);
    } else {
        return mk_error(env, "could_not_update_slots_after_pop");
    }
 }


 static ERL_NIF_TERM
 queue_debug(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {

    uint32_t queue_id = 0;

    if (!enif_get_uint(env, argv[0], &queue_id))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "badarg");

    cq_t *q = QUEUES[queue_id];
    if (q == NULL)
        return mk_error(env, "bad_queue_id");



    ERL_NIF_TERM *slots_states = enif_alloc(sizeof(ERL_NIF_TERM) * q->queue_size);
    ERL_NIF_TERM *slots_terms = enif_alloc(sizeof(ERL_NIF_TERM) * q->queue_size);
    for (int i = 0; i < q->queue_size; i++) {
        slots_states[i] = enif_make_int(env, q->slots_states[i * CACHE_LINE_SIZE]);

        if (q->slots_terms[i * CACHE_LINE_SIZE] == 0) {
            slots_terms[i] = mk_atom(env, "null");
        } else {
            slots_terms[i] = enif_make_copy(env, q->slots_terms[i * CACHE_LINE_SIZE]);
        }
    }
    return enif_make_tuple4(env,
                            enif_make_uint64(env, q->tail),
                            enif_make_uint64(env, q->head),
                            enif_make_list_from_array(env, slots_states, q->queue_size),
                            enif_make_list_from_array(env, slots_terms, q->queue_size));
 }

 static ERL_NIF_TERM
 queue_debug_poppers(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {
    uint32_t queue_id = 0;

    if (!enif_get_uint(env, argv[0], &queue_id))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "badarg");

    cq_t *q = QUEUES[queue_id];
    if (q == NULL)
        return mk_error(env, "bad_queue_id");


    uint64_t pop_queue_size = 0;
    cq_node_t *node = q->pop_queue->head;
    if (node->value == NULL) {
        node = node->next;
        node = Q_PTR(node);
    }

    while (node != NULL) {
        pop_queue_size++;
        node = node->next;
        node = Q_PTR(node);
    }

    ERL_NIF_TERM *pop_queue_pids = enif_alloc(sizeof(ERL_NIF_TERM) * pop_queue_size);

    node = q->pop_queue->head;
    node = Q_PTR(node);
    if (node->value == NULL) {
        node = node->next;
        node = Q_PTR(node);
    }

    uint64_t i = 0;
    while (node != NULL) {
        if (node->value == 0) {
            pop_queue_pids[i] = mk_atom(env, "null");
        }
        else {
            pop_queue_pids[i] = enif_make_pid(env, node->value);
        }

        i++;
        node = node->next;
        node = Q_PTR(node);
    }

    ERL_NIF_TERM list = enif_make_list_from_array(env, pop_queue_pids, pop_queue_size);
    enif_free(pop_queue_pids);

    return list;
 }



 static ERL_NIF_TERM
 print_bits(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {

    uint64_t *p1 = malloc(8);
    *p1 = 0;


    for (int bit = 63; bit >= 0; bit--) {
        uint64_t power = 1 << bit;
        //uint64_t byte = *p1;
        uint64_t byte = p1;
        fprintf(stderr, "%d", (byte & power) >> bit);
    }
    fprintf(stderr, "\n");

    //enif_free(p1);

    return mk_atom(env, "ok");
 }

 void free_resource(ErlNifEnv* env, void* arg)
 {
    //cq_t *cq = (cq_t *) arg;

    fprintf(stderr, "free_resource\n");
 }


 cq_queue_t * new_queue()
 {
    cq_queue_t *queue = enif_alloc(sizeof(cq_queue_t));
    cq_node_t *node = enif_alloc(sizeof(cq_node_t));
    node->next = NULL;
    //node->env = NULL;
    node->value = NULL;
    queue->head = node;
    queue->tail = node;

    return queue;
 }



 void enqueue(cq_queue_t *queue, ErlNifPid *pid)
 {
    cq_node_t *node = enif_alloc(sizeof(cq_node_t));
    //node->env = enif_alloc_env();
    //node->term = enif_make_copy(node->env, term);
    node->value = pid;
    node->next = NULL;
    fprintf(stderr, "node %lu\n", node);

    cq_node_t *tail = NULL;
    uint64_t tail_count = 0;
    while (1) {
        tail = queue->tail;
        cq_node_t *tail_ptr = Q_PTR(tail);
        tail_count = Q_COUNT(tail);

        cq_node_t *next = tail->next;
        cq_node_t *next_ptr = Q_PTR(next);
        uint64_t next_count = Q_COUNT(next);

        if (tail == queue->tail) {
            fprintf(stderr, "tail == queue->tail\n");
            if (next_ptr == NULL) {
                fprintf(stderr, "next_ptr == NULL\n");
                if (__sync_bool_compare_and_swap(&tail_ptr->next,
                                                 next,
                                                 Q_SET_COUNT(node, next_count+1)))
                    fprintf(stderr, "CAS(tail_ptr->next, next, (node, next_count+1)) -> true\n");
                    break;
            } else {
                __sync_bool_compare_and_swap(&queue->tail,
                                             tail,
                                             Q_SET_COUNT(next_ptr, next_count+1));
                    fprintf(stderr, "CAS(queue->tail, tail, (next_ptr, next_count+1))\n");
            }
        }
    }

    cq_node_t *node_with_count = Q_SET_COUNT(node, tail_count+1);
    int ret = __sync_bool_compare_and_swap(&queue->tail,
                                           tail,
                                           node_with_count);
    fprintf(stderr, "CAS(queue->tail, tail, %lu) -> %d\n", node_with_count, ret);
 }


 int dequeue(cq_queue_t *queue, ErlNifPid **pid)
 {
    fprintf(stderr, "dequeue\n");
    cq_node_t *head, *head_ptr, *tail, *tail_ptr, *next, *next_ptr;

    while (1) {
        head = queue->head;
        head_ptr = Q_PTR(head);
        tail = queue->tail;
        tail_ptr = Q_PTR(tail);
        next = head->next;
        next_ptr = Q_PTR(next);
        fprintf(stderr, "head %lu, tail %lu, next %lu\n", head, tail, next);

        if (head == queue->head) {
            if (head_ptr == tail_ptr) {
                if (next_ptr == NULL) {
                    return 0; /* Queue is empty */
                }
                fprintf(stderr, "CAS(queue->tail, tail, (next_ptr, tail+1))\n");
                __sync_bool_compare_and_swap(&queue->tail,
                                             tail,
                                             Q_SET_COUNT(next_ptr, Q_COUNT(tail)+1));
            } else {
                fprintf(stderr, "next->value %lu\n", next_ptr->value);
                *pid = next_ptr->value;
                fprintf(stderr, "CAS(queue->head, head, (next_ptr, head+1))\n");
                if (__sync_bool_compare_and_swap(&queue->head,
                                                 head,
                                                 Q_SET_COUNT(next_ptr, Q_COUNT(head)+1)))
                    break;
            }
        }
    }
    // free pid
    //enif_free(Q_PTR(head));
    return 1;
 }




 int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info) {
    /* Initialize global array mapping id to cq_t ptr */
    QUEUES = (cq_t **) calloc(8, sizeof(cq_t **));
    if (QUEUES == NULL)
        return -1;


    ErlNifResourceFlags flags = (ErlNifResourceFlags)(ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER);
    CQ_RESOURCE = enif_open_resource_type(env, "cq", "cq",
                                          &free_resource, flags, NULL);

    if (CQ_RESOURCE == NULL)
        return -1;

    return 0;
 }


 static ErlNifFunc nif_funcs[] = {
    {"new"      , 3, queue_new},
    {"free"     , 1, queue_free},
    {"push"     , 2, queue_push},
    {"async_pop", 1, queue_async_pop},
    {"debug"    , 1, queue_debug},
    {"debug_poppers", 1, queue_debug_poppers},
    {"print_bits", 0, print_bits}
 };

 ERL_NIF_INIT(cq, nif_funcs, load, NULL, NULL, NULL);
--- a/c_src/cq1/cq_nif.h
+++ b/c_src/cq1/cq_nif.h
@ -0,0 +1,71 @@
 #include <stdint.h>
 #include "erl_nif.h"


 #define CACHE_LINE_SIZE 64

 #define SLOT_INDEX(__index, __size) __index & (__size - 1)

 #define Q_MASK 3L
 #define Q_PTR(__ptr) (cq_node_t *) (((uint64_t)__ptr) & (~Q_MASK))
 #define Q_COUNT(__ptr) ((uint64_t) __ptr & Q_MASK)
 #define Q_SET_COUNT(__ptr, __val) (cq_node_t *) ((uint64_t) __ptr | (__val & Q_MASK))


 #define STATE_EMPTY 0
 #define STATE_WRITE 1
 #define STATE_READ  2
 #define STATE_FULL  3


 ErlNifResourceType* CQ_RESOURCE;

 typedef struct cq_node cq_node_t;

 struct cq_node {
    ErlNifEnv *env;
    //ERL_NIF_TERM term;
    ErlNifPid *value;
    cq_node_t *next;
 };



 typedef struct cq_queue {
    cq_node_t *head;
    cq_node_t *tail;
 } cq_queue_t;


 // TODO: Add padding between the fields
 typedef struct cq {
    uint32_t id;
    uint64_t queue_size;
    uint64_t overflow_size;
    uint64_t head;
    uint64_t tail;

    uint8_t       *slots_states;
    ERL_NIF_TERM  *slots_terms;
    ErlNifEnv    **slots_envs;

    cq_queue_t *push_queue;
    cq_queue_t *pop_queue;

    uint8_t       *overflow_states;
    ERL_NIF_TERM  *overflow_terms;
    ErlNifEnv    **overflow_envs;

 } cq_t;

 cq_t **QUEUES = NULL; /* Initialized on nif load */


 ERL_NIF_TERM mk_atom(ErlNifEnv* env, const char* atom);
 ERL_NIF_TERM mk_error(ErlNifEnv* env, const char* msg);
 int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info);
 void free_resource(ErlNifEnv*, void*);


 cq_queue_t* new_queue(void);
 void enqueue(cq_queue_t *q, ErlNifPid *pid);
--- a/c_src/cq2/Makefile
+++ b/c_src/cq2/Makefile
@ -0,0 +1,73 @@
 # Based on c_src.mk from erlang.mk by Loic Hoguin <essen@ninenines.eu>

 CURDIR := $(shell pwd)
 BASEDIR := $(abspath $(CURDIR))

 PROJECT ?= $(notdir $(BASEDIR))
 PROJECT := $(strip $(PROJECT))

 ERTS_INCLUDE_DIR ?= $(shell erl -noshell -s init stop -eval "io:format(\"~s/erts-~s/include/\", [code:root_dir(), erlang:system_info(version)]).")
 ERL_INTERFACE_INCLUDE_DIR ?= $(shell erl -noshell -s init stop -eval "io:format(\"~s\", [code:lib_dir(erl_interface, include)]).")
 ERL_INTERFACE_LIB_DIR ?= $(shell erl -noshell -s init stop -eval "io:format(\"~s\", [code:lib_dir(erl_interface, lib)]).")

 C_SRC_DIR = $(CURDIR)
 C_SRC_OUTPUT ?= $(CURDIR)/../../priv/$(PROJECT).so

 # System type and C compiler/flags.

 UNAME_SYS := $(shell uname -s)
 ifeq ($(UNAME_SYS), Darwin)
 	CC ?= cc
 	CFLAGS ?= -O3 -std=c99 -arch x86_64 -finline-functions -Wall -Wmissing-prototypes
 	CXXFLAGS ?= -O3 -arch x86_64 -finline-functions -Wall
 	LDFLAGS ?= -arch x86_64 -flat_namespace -undefined suppress
 else ifeq ($(UNAME_SYS), FreeBSD)
 	CC ?= cc
 	CFLAGS ?= -O3 -std=c99 -finline-functions -Wall -Wmissing-prototypes
 	CXXFLAGS ?= -O3 -finline-functions -Wall
 else ifeq ($(UNAME_SYS), Linux)
 	CC ?= gcc
 	CFLAGS ?= -O3 -std=c99 -finline-functions -Wall -Wmissing-prototypes
 	CXXFLAGS ?= -O3 -finline-functions -Wall
 endif

 CFLAGS += -fPIC -I $(ERTS_INCLUDE_DIR) -I $(ERL_INTERFACE_INCLUDE_DIR)
 CXXFLAGS += -fPIC -I $(ERTS_INCLUDE_DIR) -I $(ERL_INTERFACE_INCLUDE_DIR)

 LDLIBS += -L $(ERL_INTERFACE_LIB_DIR) -lerl_interface -lei
 LDFLAGS += -shared

 # Verbosity.

 c_verbose_0 = @echo " C     " $(?F);
 c_verbose = $(c_verbose_$(V))

 cpp_verbose_0 = @echo " CPP   " $(?F);
 cpp_verbose = $(cpp_verbose_$(V))

 link_verbose_0 = @echo " LD    " $(@F);
 link_verbose = $(link_verbose_$(V))

 SOURCES := $(shell find $(C_SRC_DIR) -type f \( -name "*.c" -o -name "*.C" -o -name "*.cc" -o -name "*.cpp" \))
 OBJECTS = $(addsuffix .o, $(basename $(SOURCES)))

 COMPILE_C = $(c_verbose) $(CC) $(CFLAGS) $(CPPFLAGS) -c
 COMPILE_CPP = $(cpp_verbose) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c

 $(C_SRC_OUTPUT): $(OBJECTS)
 	$(link_verbose) $(CC) $(OBJECTS) $(LDFLAGS) $(LDLIBS) -o $(C_SRC_OUTPUT)

 %.o: %.c
 	$(COMPILE_C) $(OUTPUT_OPTION) $<

 %.o: %.cc
 	$(COMPILE_CPP) $(OUTPUT_OPTION) $<

 %.o: %.C
 	$(COMPILE_CPP) $(OUTPUT_OPTION) $<

 %.o: %.cpp
 	$(COMPILE_CPP) $(OUTPUT_OPTION) $<

 clean:
 	@rm -f $(C_SRC_OUTPUT) $(OBJECTS)
--- a/c_src/cq2/cq_nif.c
+++ b/c_src/cq2/cq_nif.c
@ -0,0 +1,564 @@
 #include <stdio.h>
 #include <unistd.h>

 #include "erl_nif.h"
 #include "cq_nif.h"


 /* #ifndef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
 # error Requires dirty schedulers
 #endif */





 ERL_NIF_TERM
 mk_atom(ErlNifEnv* env, const char* atom)
 {
    ERL_NIF_TERM ret;

    if(!enif_make_existing_atom(env, atom, &ret, ERL_NIF_LATIN1))
        return enif_make_atom(env, atom);

    return ret;
 }

 ERL_NIF_TERM
 mk_error(ErlNifEnv* env, const char* mesg)
 {
    return enif_make_tuple2(env, mk_atom(env, "error"), mk_atom(env, mesg));
 }


 static ERL_NIF_TERM
 queue_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {
    cq_t *q = enif_alloc_resource(CQ_RESOURCE, sizeof(cq_t));
    if (q == NULL)
        return mk_error(env, "priv_alloc_error");

    ERL_NIF_TERM ret = enif_make_resource(env, q);
    /* enif_release_resource(ret); */

    uint32_t queue_id = 0;
    uint32_t queue_size = 0;
    uint32_t overflow_size = 0;

    if (!enif_get_uint(env, argv[0], &queue_id) ||
        !enif_get_uint(env, argv[1], &queue_size) ||
        !enif_get_uint(env, argv[2], &overflow_size))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "bad_queue_id");

    /* TODO: Check that queue_size is power of 2 */

    if (QUEUES[queue_id] != NULL)
        return mk_error(env, "queue_id_already_exists");

    q->id             = queue_id;
    q->queue_size     = queue_size;
    q->overflow_size  = overflow_size;
    q->tail           = 0;
    q->head           = 0;
    q->slots_states   = calloc(q->queue_size, CACHE_LINE_SIZE);
    q->slots_terms    = calloc(q->queue_size, CACHE_LINE_SIZE);
    q->slots_envs     = calloc(q->queue_size, CACHE_LINE_SIZE);
    q->overflow_terms = calloc(q->overflow_size, CACHE_LINE_SIZE);
    q->overflow_envs  = calloc(q->queue_size, CACHE_LINE_SIZE);

    q->push_queue = new_queue();
    q->pop_queue = new_queue();

    /* TODO: Check calloc return */


    for (int i = 0; i < q->queue_size; i++) {
        ErlNifEnv *slot_env = enif_alloc_env();

        q->slots_envs[i*CACHE_LINE_SIZE] = slot_env;
        //q->overflow_envs[i*CACHE_LINE_SIZE] = (ErlNifEnv *) enif_alloc_env();
    }

    QUEUES[q->id] = q;

    return enif_make_tuple2(env, mk_atom(env, "ok"), ret);
 }


 static ERL_NIF_TERM
 queue_free(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {
    uint32_t queue_id = 0;

    if (!enif_get_uint(env, argv[0], &queue_id))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "badarg");

    cq_t *q = QUEUES[queue_id];
    if (q == NULL)
        return mk_error(env, "bad_queue_id");


    /* TODO: Free all the things! */
    QUEUES[queue_id] = NULL;

    return enif_make_atom(env, "ok");

 }

 /* Push to the head of the queue. */
 static ERL_NIF_TERM
 queue_push(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {
    uint32_t queue_id = 0;

    if (!enif_get_uint(env, argv[0], &queue_id))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "badarg");

    /* Load the queue */
    cq_t *q = QUEUES[queue_id];
    if (q == NULL)
        return mk_error(env, "bad_queue_id");

    if (q->id != queue_id)
        return mk_error(env, "not_identical_queue_id");


    for (int i = 0; i < q->queue_size; i++) {
        fprintf(stderr, "queue slot %d, index %d, state %d\n",
                i, i*CACHE_LINE_SIZE, q->slots_states[i*CACHE_LINE_SIZE]);
    }

    /* If there's consumers waiting, the queue must be empty and we
       should directly pick a consumer to notify. */

    ErlNifPid *waiting_consumer;
    int dequeue_ret = dequeue(q->pop_queue, &waiting_consumer);
    if (dequeue_ret) {
        ErlNifEnv *msg_env = enif_alloc_env();
        ERL_NIF_TERM copy = enif_make_copy(msg_env, argv[1]);
        ERL_NIF_TERM tuple = enif_make_tuple2(msg_env, mk_atom(env, "pop"), copy);

        if (enif_send(env, waiting_consumer, msg_env, tuple)) {
            enif_free_env(msg_env);
            return mk_atom(env, "ok");
        } else {
            return mk_error(env, "notify_failed");
        }
    }



    /* Increment head and attempt to claim the slot by marking it as
       busy. This ensures no other thread will attempt to modify this
       slot. If we cannot lock it, another thread must have */

    uint64_t head = __sync_add_and_fetch(&q->head, 1);
    size_t size = q->queue_size;

    while (1) {
        uint64_t index = SLOT_INDEX(head, size);
        uint64_t ret = __sync_val_compare_and_swap(&q->slots_states[index],
                                                   STATE_EMPTY,
                                                   STATE_WRITE);

        switch (ret) {

        case STATE_EMPTY:
            head = __sync_add_and_fetch(&q->head, 1);

        case STATE_WRITE:
            /* We acquired the write lock, go ahead with the write. */
            break;

        case STATE_FULL:
            /* We have caught up with the tail and the buffer is
               full. Block the producer until a consumer reads the
               item. */
            return mk_error(env, "full_not_implemented");
        }
    }

    /* If head catches up with tail, the queue is full. Add to
       overflow instead */


    /* Copy term to slot-specific temporary process env. */
    ERL_NIF_TERM copy = enif_make_copy(q->slots_envs[SLOT_INDEX(head, size)], argv[1]);
    q->slots_terms[SLOT_INDEX(head, size)] = copy;

    __sync_synchronize(); /* Or compiler memory barrier? */


    /* TODO: Do we need to collect garbage? */


    /* Mark the slot ready to be consumed */
    if (__sync_bool_compare_and_swap(&q->slots_states[SLOT_INDEX(head, size)],
                                     STATE_WRITE,
                                     STATE_FULL)) {
        return mk_atom(env, "ok");
    } else {
        return mk_error(env, "could_not_update_slots_after_insert");
    }

 }



 static ERL_NIF_TERM
 queue_async_pop(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {
    /* Load queue */

    uint32_t queue_id = 0;

    if (!enif_get_uint(env, argv[0], &queue_id))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "badarg");

    cq_t *q = QUEUES[queue_id];
    if (q == NULL)
        return mk_error(env, "bad_queue_id");

    if (q->id != queue_id)
        return mk_error(env, "not_identical_queue_id");

    uint64_t qsize = q->queue_size;
    uint64_t tail = q->tail;
    uint64_t num_busy = 0;

    /* Walk the buffer starting the tail position until we are either
       able to consume a term or find an empty slot. */
    while (1) {
        uint64_t index = SLOT_INDEX(tail, qsize);
        uint64_t ret = __sync_val_compare_and_swap(&q->slots_states[index],
                                                   STATE_FULL,
                                                   STATE_READ);

        if (ret == STATE_READ) {
            /* We were able to mark the term as read in progress. We
               now have an exclusive lock. */
            break;

        } else if (ret == STATE_WRITE) {
            /* We found an item with a write in progress. If that
               thread progresses, it will eventually mark the slot as
               full. We can spin until that happens.

               This can take an arbitrary amount of time and multiple
               reading threads will compete for the same slot.

               Instead we add the caller to the queue of blocking
               consumers. When the next producer comes it will "help"
               this thread by calling enif_send on the current
               in-progress term *and* handle it's own terms. If
               there's no new push to the queue, this will block
               forever. */
            return mk_atom(env, "write_in_progress_not_implemented");

        } else if (ret == STATE_EMPTY) {
            /* We found an empty item. Queue must be empty. Add
               calling Erlang consumer process to queue of waiting
               processes. When the next producer comes along, it first
               checks the waiting consumers and calls enif_send
               instead of writing to the slots. */

            ErlNifPid *pid = enif_alloc(sizeof(ErlNifPid));
            pid = enif_self(env, pid);
            enqueue(q->pop_queue, pid);

            return mk_atom(env, "wait_for_msg");

        } else {
            tail = __sync_add_and_fetch(&q->tail, 1);
        }
    }


    /* Copy term into calling process env. The NIF env can now be
       gargbage collected. */
    ERL_NIF_TERM copy = enif_make_copy(env, q->slots_terms[SLOT_INDEX(tail, qsize)]);


    /* Mark the slot as free. Note: We don't increment the tail
       position here, as another thread also walking the buffer might
       have incremented it multiple times */
    q->slots_terms[SLOT_INDEX(tail, qsize)] = 0;
    if (__sync_bool_compare_and_swap(&q->slots_states[SLOT_INDEX(tail, qsize)],
                                     STATE_READ,
                                     STATE_EMPTY)) {
        return enif_make_tuple2(env, mk_atom(env, "ok"), copy);
    } else {
        return mk_error(env, "could_not_update_slots_after_pop");
    }
 }


 static ERL_NIF_TERM
 queue_debug(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {

    uint32_t queue_id = 0;

    if (!enif_get_uint(env, argv[0], &queue_id))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "badarg");

    cq_t *q = QUEUES[queue_id];
    if (q == NULL)
        return mk_error(env, "bad_queue_id");



    ERL_NIF_TERM *slots_states = enif_alloc(sizeof(ERL_NIF_TERM) * q->queue_size);
    ERL_NIF_TERM *slots_terms = enif_alloc(sizeof(ERL_NIF_TERM) * q->queue_size);
    for (int i = 0; i < q->queue_size; i++) {
        slots_states[i] = enif_make_int(env, q->slots_states[i * CACHE_LINE_SIZE]);

        if (q->slots_terms[i * CACHE_LINE_SIZE] == 0) {
            slots_terms[i] = mk_atom(env, "null");
        } else {
            slots_terms[i] = enif_make_copy(env, q->slots_terms[i * CACHE_LINE_SIZE]);
        }
    }
    return enif_make_tuple4(env,
                            enif_make_uint64(env, q->tail),
                            enif_make_uint64(env, q->head),
                            enif_make_list_from_array(env, slots_states, q->queue_size),
                            enif_make_list_from_array(env, slots_terms, q->queue_size));
 }

 static ERL_NIF_TERM
 queue_debug_poppers(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {
    uint32_t queue_id = 0;

    if (!enif_get_uint(env, argv[0], &queue_id))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "badarg");

    cq_t *q = QUEUES[queue_id];
    if (q == NULL)
        return mk_error(env, "bad_queue_id");


    uint64_t pop_queue_size = 0;
    cq_node_t *node = q->pop_queue->head;
    if (node->value == NULL) {
        node = node->next;
        node = Q_PTR(node);
    }

    while (node != NULL) {
        pop_queue_size++;
        node = node->next;
        node = Q_PTR(node);
    }

    ERL_NIF_TERM *pop_queue_pids = enif_alloc(sizeof(ERL_NIF_TERM) * pop_queue_size);

    node = q->pop_queue->head;
    node = Q_PTR(node);
    if (node->value == NULL) {
        node = node->next;
        node = Q_PTR(node);
    }

    uint64_t i = 0;
    while (node != NULL) {
        if (node->value == 0) {
            pop_queue_pids[i] = mk_atom(env, "null");
        }
        else {
            pop_queue_pids[i] = enif_make_pid(env, node->value);
        }

        i++;
        node = node->next;
        node = Q_PTR(node);
    }

    ERL_NIF_TERM list = enif_make_list_from_array(env, pop_queue_pids, pop_queue_size);
    enif_free(pop_queue_pids);

    return list;
 }



 static ERL_NIF_TERM
 print_bits(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {

    uint64_t *p1 = malloc(8);
    *p1 = 0;


    for (int bit = 63; bit >= 0; bit--) {
        uint64_t power = 1 << bit;
        //uint64_t byte = *p1;
        uint64_t byte = p1;
        fprintf(stderr, "%d", (byte & power) >> bit);
    }
    fprintf(stderr, "\n");

    //enif_free(p1);

    return mk_atom(env, "ok");
 }

 void free_resource(ErlNifEnv* env, void* arg)
 {
    //cq_t *cq = (cq_t *) arg;

    fprintf(stderr, "free_resource\n");
 }


 cq_queue_t * new_queue()
 {
    cq_queue_t *queue = enif_alloc(sizeof(cq_queue_t));
    cq_node_t *node = enif_alloc(sizeof(cq_node_t));
    node->next = NULL;
    //node->env = NULL;
    node->value = NULL;
    queue->head = node;
    queue->tail = node;

    return queue;
 }



 void enqueue(cq_queue_t *queue, ErlNifPid *pid)
 {
    cq_node_t *node = enif_alloc(sizeof(cq_node_t));
    //node->env = enif_alloc_env();
    //node->term = enif_make_copy(node->env, term);
    node->value = pid;
    node->next = NULL;
    fprintf(stderr, "node %lu\n", node);

    cq_node_t *tail = NULL;
    uint64_t tail_count = 0;
    while (1) {
        tail = queue->tail;
        cq_node_t *tail_ptr = Q_PTR(tail);
        tail_count = Q_COUNT(tail);

        cq_node_t *next = tail->next;
        cq_node_t *next_ptr = Q_PTR(next);
        uint64_t next_count = Q_COUNT(next);

        if (tail == queue->tail) {
            fprintf(stderr, "tail == queue->tail\n");
            if (next_ptr == NULL) {
                fprintf(stderr, "next_ptr == NULL\n");
                if (__sync_bool_compare_and_swap(&tail_ptr->next,
                                                 next,
                                                 Q_SET_COUNT(node, next_count+1)))
                    fprintf(stderr, "CAS(tail_ptr->next, next, (node, next_count+1)) -> true\n");
                    break;
            } else {
                __sync_bool_compare_and_swap(&queue->tail,
                                             tail,
                                             Q_SET_COUNT(next_ptr, next_count+1));
                    fprintf(stderr, "CAS(queue->tail, tail, (next_ptr, next_count+1))\n");
            }
        }
    }

    cq_node_t *node_with_count = Q_SET_COUNT(node, tail_count+1);
    int ret = __sync_bool_compare_and_swap(&queue->tail,
                                           tail,
                                           node_with_count);
    fprintf(stderr, "CAS(queue->tail, tail, %lu) -> %d\n", node_with_count, ret);
 }


 int dequeue(cq_queue_t *queue, ErlNifPid **pid)
 {
    fprintf(stderr, "dequeue\n");
    cq_node_t *head, *head_ptr, *tail, *tail_ptr, *next, *next_ptr;

    while (1) {
        head = queue->head;
        head_ptr = Q_PTR(head);
        tail = queue->tail;
        tail_ptr = Q_PTR(tail);
        next = head->next;
        next_ptr = Q_PTR(next);
        fprintf(stderr, "head %lu, tail %lu, next %lu\n", head, tail, next);

        if (head == queue->head) {
            if (head_ptr == tail_ptr) {
                if (next_ptr == NULL) {
                    return 0; /* Queue is empty */
                }
                fprintf(stderr, "CAS(queue->tail, tail, (next_ptr, tail+1))\n");
                __sync_bool_compare_and_swap(&queue->tail,
                                             tail,
                                             Q_SET_COUNT(next_ptr, Q_COUNT(tail)+1));
            } else {
                fprintf(stderr, "next->value %lu\n", next_ptr->value);
                *pid = next_ptr->value;
                fprintf(stderr, "CAS(queue->head, head, (next_ptr, head+1))\n");
                if (__sync_bool_compare_and_swap(&queue->head,
                                                 head,
                                                 Q_SET_COUNT(next_ptr, Q_COUNT(head)+1)))
                    break;
            }
        }
    }
    // free pid
    //enif_free(Q_PTR(head));
    return 1;
 }




 int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info) {
    /* Initialize global array mapping id to cq_t ptr */
    QUEUES = (cq_t **) calloc(8, sizeof(cq_t **));
    if (QUEUES == NULL)
        return -1;


    ErlNifResourceFlags flags = (ErlNifResourceFlags)(ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER);
    CQ_RESOURCE = enif_open_resource_type(env, "cq", "cq",
                                          &free_resource, flags, NULL);

    if (CQ_RESOURCE == NULL)
        return -1;

    return 0;
 }


 static ErlNifFunc nif_funcs[] = {
    {"new"      , 3, queue_new},
    {"free"     , 1, queue_free},
    {"push"     , 2, queue_push},
    {"async_pop", 1, queue_async_pop},
    {"debug"    , 1, queue_debug},
    {"debug_poppers", 1, queue_debug_poppers},
    {"print_bits", 0, print_bits}
 };

 ERL_NIF_INIT(cq, nif_funcs, load, NULL, NULL, NULL);
--- a/c_src/cq2/cq_nif.h
+++ b/c_src/cq2/cq_nif.h
@ -0,0 +1,71 @@
 #include <stdint.h>
 #include "erl_nif.h"


 #define CACHE_LINE_SIZE 64

 #define SLOT_INDEX(__index, __size) __index & (__size - 1)

 #define Q_MASK 3L
 #define Q_PTR(__ptr) (cq_node_t *) (((uint64_t)__ptr) & (~Q_MASK))
 #define Q_COUNT(__ptr) ((uint64_t) __ptr & Q_MASK)
 #define Q_SET_COUNT(__ptr, __val) (cq_node_t *) ((uint64_t) __ptr | (__val & Q_MASK))


 #define STATE_EMPTY 0
 #define STATE_WRITE 1
 #define STATE_READ  2
 #define STATE_FULL  3


 ErlNifResourceType* CQ_RESOURCE;

 typedef struct cq_node cq_node_t;

 struct cq_node {
    ErlNifEnv *env;
    //ERL_NIF_TERM term;
    ErlNifPid *value;
    cq_node_t *next;
 };



 typedef struct cq_queue {
    cq_node_t *head;
    cq_node_t *tail;
 } cq_queue_t;


 // TODO: Add padding between the fields
 typedef struct cq {
    uint32_t id;
    uint64_t queue_size;
    uint64_t overflow_size;
    uint64_t head;
    uint64_t tail;

    uint8_t       *slots_states;
    ERL_NIF_TERM  *slots_terms;
    ErlNifEnv    **slots_envs;

    cq_queue_t *push_queue;
    cq_queue_t *pop_queue;

    uint8_t       *overflow_states;
    ERL_NIF_TERM  *overflow_terms;
    ErlNifEnv    **overflow_envs;

 } cq_t;

 cq_t **QUEUES = NULL; /* Initialized on nif load */


 ERL_NIF_TERM mk_atom(ErlNifEnv* env, const char* atom);
 ERL_NIF_TERM mk_error(ErlNifEnv* env, const char* msg);
 int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info);
 void free_resource(ErlNifEnv*, void*);


 cq_queue_t* new_queue(void);
 void enqueue(cq_queue_t *q, ErlNifPid *pid);
--- a/rebar.config
+++ b/rebar.config
@ -1,2 +1,8 @@
 {erl_opts, [no_debug_info]}.
 {deps, []}.
 {erl_opts, [no_debug_info, {i, "include"}]}.
 {deps, []}.

 {pre_hooks,
   [{"linux", compile, "make  -C c_src all"}]}.

 {post_hooks,
   [{"linux", clean, "make -C c_src clean"}]}.
--- a/src/docs/erlangNif编程相关.md
+++ b/src/docs/erlangNif编程相关.md
@ -0,0 +1,64 @@
 # 描述
    NIF库包含Erlang模块的某些功能的本机实现。像其他任何函数一样，调用本机实现的函数（NIF），与调用方没有任何区别。
    NIF库被构建为动态链接的库文件，并通过调用erlang：load_nif / 2在运行时加载。
    
    警告
    谨慎使用此功能。
    执行本机功能作为VM的本机代码的直接扩展。执行不是在安全的环境中进行的。VM 无法提供与执行Erlang代码时相同的服务，
    例如抢先式调度或内存保护。如果本机功能运行不正常，则整个VM都会出现异常。
    崩溃的本机功能将使整个VM崩溃。
    错误实现的本机功能可能会导致VM内部状态不一致，从而导致VM崩溃或在调用本机功能后的任何时候VM的其他异常行为。
    在返回之前进行长时间工作的本机功能会降低VM的响应能力，并可能导致其他奇怪的行为。这种奇怪的行为包括但不限于极端的内存
    使用情况以及调度程序之间的不良负载平衡。在Erlang / OTP发行版之间，由于冗长的工作而可能发生的奇怪行为也会有所不同。
    
 # 简单示例
 ```
    /* niftest.c */
    #include <erl_nif.h>
    static ERL_NIF_TERM hello(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
    {
        return enif_make_string(env, "Hello world!", ERL_NIF_LATIN1);
    }
    static ErlNifFunc nif_funcs[] =
    {
        {"hello", 0, hello}
    };
    ERL_NIF_INIT(niftest,nif_funcs,NULL,NULL,NULL,NULL)  
 ```
 ```
    -module(niftest).
    -export([init/0, hello/0]).
    -on_load(init/0).
    init() ->
          erlang:load_nif("./niftest", 0).
    
    hello() ->
          erlang:nif_error("NIF library not loaded").
 ```  
     在上面的示例中，使用了on_load指令，该命令功能是在加载模块时自动调用的指定的函数-init/0。
     init函数初始化依次调用erlang：load_nif / 2 ，
     该加载器会加载NIF库，并用C中的本机实现替换hello函数。加载后，NIF库将保持不变。在清除它所属的模块代码版本之前，不会将其卸载。 
     如果在成功加载NIF库之前调用了该函数，则每个NIF必须具有用Erlang调用的实现。典型的此类存根实现是调用erlang：nif_error，
     这将引发异常。如果NIF库缺少某些操作系统或硬件体系结构的实现，则Erlang函数也可以用作后备实现。     
     
     注意
     NIF不必导出，它可以在模块本地。但是，编译器会优化未使用的本地存根函数，从而导致NIF库的加载失败。
     
 # 功能性
     NIF代码和Erlang运行时系统之间的所有交互都是通过调用NIF API函数来执行的。存在以下功能的功能：   
     读写Erlang术语
     任何Erlang术语都可以作为函数参数传递给NIF，并作为函数返回值返回。这些术语属于C类型的 ERL_NIF_TERM，只能使用API函数读取或写入。大部分用于读取术语内容的函数都以enif_get_为前缀，并且如果该术语属于预期类型（或非预期类型），则通常返回 true（或false）。编写术语的函数都带有enif_make_前缀 ，通常返回创建的ERL_NIF_TERM。还有一些查询术语的函数，例如enif_is_atom，enif_is_identical和enif_compare。
     
     类型的所有方面ERL_NIF_TERM属于类型的环境ErlNifEnv。术语的生存期由其环境对象的生存期控制。读取或写入术语的所有API函数都将术语所属的环境作为第一个函数参数
     
 增加和减少资源的引用计数的次数必须匹配，否则可能引发问题。
 至此，持久资源的主要接口的实现就介绍完了，用户使用时，可以先通过enif_open_resource_type建立资源类型的描述符，
 然后利用此描述符，使用enif_alloc_resource分配资源所占用的内存空间，使用enif_make_resource将资源导出到erlang模块层，
 在进程间传递资源描述符，资源再传回NIF时，可以通过enif_get_resource取回资源描述符中的资源数据结构，
 同时可以通过enif_keep_resource来共享资源，通过enif_release_resource来放弃使用资源，gc系统也会正确回收引用计数为0的资源，
 开发者再也不用担心内存没有被正确释放了。
 持久资源为NIF的开发带来了极大的便利，用户可以将一些大规模的数据结构一次传入内存，生成一个资源描述符，
 然后在进程间传递资源描述符而不是资源数据本身，减轻每次资源数据拷贝的开销，同时持久资源也是线程安全的，
 写erlang程序也可以像写c程序一样高效了。     
     
     
--- a/src/docs/erlangVS环境编译nifdll.md
+++ b/src/docs/erlangVS环境编译nifdll.md
@ -0,0 +1,27 @@
 # window下编译nif dll  需要安装vs

 ## 第一种 makefile配置(可参考jiffy的Makefile) 命令行下编译dll 需要设置vs相关环境变量
 具体要设置的环境变量可参考下面几个
 ```
 path 新增
 	D:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Tools\MSVC\14.24.28314\bin\Hostx64\x64
 	
 LIB
 	D:\Windows Kits\10\Lib\10.0.18362.0\ucrt\x64
 	D:\Windows Kits\10\Lib\10.0.18362.0\ucrt\x64
 	D:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Tools\MSVC\14.24.28314\lib\x64

 INCLUDE
 	D:\Windows Kits\10\Include\10.0.18362.0\ucrt
 	D:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Tools\MSVC\14.24.28314\include
 ```
 	
 ## 第二种 在vs单独编译 然后拷贝使用	
    VS编译 
    	1 新建空项目或者从现有代码创建项目
    	2 先选择 编辑框上边的  解决方案配置 与 解决方案平台
    	3 右键项目属性 设置 配置与第2步 解决方案配置 一样 设置 平台与第二步设置的 解决方案平台 一样
    	4 右键项目属性 配置属性 -> 常规 -> 配置类型 ->动态库(.dll)
    	5 右键项目属性 配置属性 -> VC++目录 -> 包含目录 新增 D:\Program Files\erl10.6\erts-10.6\include
    	6 右键项目属性 生成 
    	注意编译使用的erlang include要合使用的erl版本对应