#include <stdio.h>
#include <unistd.h>

#include "erl_nif.h"
#include "cq_nif.h"


/* #ifndef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
# error Requires dirty schedulers
#endif */


ERL_NIF_TERM
mk_atom(ErlNifEnv* env, const char* atom)
{
    ERL_NIF_TERM ret;

    if(!enif_make_existing_atom(env, atom, &ret, ERL_NIF_LATIN1))
        return enif_make_atom(env, atom);

    return ret;
}

ERL_NIF_TERM
mk_error(ErlNifEnv* env, const char* mesg)
{
    return enif_make_tuple2(env, mk_atom(env, "error"), mk_atom(env, mesg));
}


static ERL_NIF_TERM
queue_new(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{
    cq_t *q = enif_alloc_resource(CQ_RESOURCE, sizeof(cq_t));
    if (q == NULL)
        return mk_error(env, "priv_alloc_error");

    ERL_NIF_TERM ret = enif_make_resource(env, q);
    /* enif_release_resource(ret); */

    uint32_t queue_id = 0;
    uint32_t queue_size = 0;
    uint32_t overflow_size = 0;

    if (!enif_get_uint(env, argv[0], &queue_id) ||
        !enif_get_uint(env, argv[1], &queue_size) ||
        !enif_get_uint(env, argv[2], &overflow_size))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "bad_queue_id");

    /* TODO: Check that queue_size is power of 2 */

    if (QUEUES[queue_id] != NULL)
        return mk_error(env, "queue_id_already_exists");

    q->id             = queue_id;
    q->queue_size     = queue_size;
    q->overflow_size  = overflow_size;
    q->tail           = 0;
    q->head           = 0;
    q->slots_states   = calloc(q->queue_size, CACHE_LINE_SIZE);
    q->slots_terms    = calloc(q->queue_size, CACHE_LINE_SIZE);
    q->slots_envs     = calloc(q->queue_size, CACHE_LINE_SIZE);
    q->overflow_terms = calloc(q->overflow_size, CACHE_LINE_SIZE);
    q->overflow_envs  = calloc(q->queue_size, CACHE_LINE_SIZE);

    q->push_queue = new_queue();
    q->pop_queue = new_queue();

    /* TODO: Check calloc return */


    for (int i = 0; i < q->queue_size; i++) {
        ErlNifEnv *slot_env = enif_alloc_env();

        q->slots_envs[i*CACHE_LINE_SIZE] = slot_env;
        //q->overflow_envs[i*CACHE_LINE_SIZE] = (ErlNifEnv *) enif_alloc_env();
    }

    QUEUES[q->id] = q;

    return enif_make_tuple2(env, mk_atom(env, "ok"), ret);
}


static ERL_NIF_TERM
queue_free(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{
    uint32_t queue_id = 0;

    if (!enif_get_uint(env, argv[0], &queue_id))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "badarg");

    cq_t *q = QUEUES[queue_id];
    if (q == NULL)
        return mk_error(env, "bad_queue_id");


    /* TODO: Free all the things! */
    QUEUES[queue_id] = NULL;

    return enif_make_atom(env, "ok");

}

/* Push to the head of the queue. */
static ERL_NIF_TERM
queue_push(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{
    uint32_t queue_id = 0;

    if (!enif_get_uint(env, argv[0], &queue_id))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "badarg");

    /* Load the queue */
    cq_t *q = QUEUES[queue_id];
    if (q == NULL)
        return mk_error(env, "bad_queue_id");

    if (q->id != queue_id)
        return mk_error(env, "not_identical_queue_id");


    for (int i = 0; i < q->queue_size; i++) {
        fprintf(stderr, "queue slot %d, index %d, state %d\n",
                i, i*CACHE_LINE_SIZE, q->slots_states[i*CACHE_LINE_SIZE]);
    }

    /* If there's consumers waiting, the queue must be empty and we
       should directly pick a consumer to notify. */

    ErlNifPid *waiting_consumer;
    int dequeue_ret = dequeue(q->pop_queue, &waiting_consumer);
    if (dequeue_ret) {
        ErlNifEnv *msg_env = enif_alloc_env();
        ERL_NIF_TERM copy = enif_make_copy(msg_env, argv[1]);
        ERL_NIF_TERM tuple = enif_make_tuple2(msg_env, mk_atom(env, "pop"), copy);

        if (enif_send(env, waiting_consumer, msg_env, tuple)) {
            enif_free_env(msg_env);
            return mk_atom(env, "ok");
        } else {
            return mk_error(env, "notify_failed");
        }
    }


    /* Increment head and attempt to claim the slot by marking it as
       busy. This ensures no other thread will attempt to modify this
       slot. If we cannot lock it, another thread must have */

    uint64_t head = __sync_add_and_fetch(&q->head, 1);
    size_t size = q->queue_size;

    while (1) {
        uint64_t index = SLOT_INDEX(head, size);
        uint64_t ret = __sync_val_compare_and_swap(&q->slots_states[index],
                                                   STATE_EMPTY,
                                                   STATE_WRITE);

        switch (ret) {

        case STATE_EMPTY:
            head = __sync_add_and_fetch(&q->head, 1);

        case STATE_WRITE:
            /* We acquired the write lock, go ahead with the write. */
            break;

        case STATE_FULL:
            /* We have caught up with the tail and the buffer is
               full. Block the producer until a consumer reads the
               item. */
            return mk_error(env, "full_not_implemented");
        }
    }

    /* If head catches up with tail, the queue is full. Add to
       overflow instead */


    /* Copy term to slot-specific temporary process env. */
    ERL_NIF_TERM copy = enif_make_copy(q->slots_envs[SLOT_INDEX(head, size)], argv[1]);
    q->slots_terms[SLOT_INDEX(head, size)] = copy;

    __sync_synchronize(); /* Or compiler memory barrier? */


    /* TODO: Do we need to collect garbage? */


    /* Mark the slot ready to be consumed */
    if (__sync_bool_compare_and_swap(&q->slots_states[SLOT_INDEX(head, size)],
                                     STATE_WRITE,
                                     STATE_FULL)) {
        return mk_atom(env, "ok");
    } else {
        return mk_error(env, "could_not_update_slots_after_insert");
    }

}


static ERL_NIF_TERM
queue_async_pop(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{
    /* Load queue */

    uint32_t queue_id = 0;

    if (!enif_get_uint(env, argv[0], &queue_id))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "badarg");

    cq_t *q = QUEUES[queue_id];
    if (q == NULL)
        return mk_error(env, "bad_queue_id");

    if (q->id != queue_id)
        return mk_error(env, "not_identical_queue_id");

    uint64_t qsize = q->queue_size;
    uint64_t tail = q->tail;
    uint64_t num_busy = 0;

    /* Walk the buffer starting the tail position until we are either
       able to consume a term or find an empty slot. */
    while (1) {
        uint64_t index = SLOT_INDEX(tail, qsize);
        uint64_t ret = __sync_val_compare_and_swap(&q->slots_states[index],
                                                   STATE_FULL,
                                                   STATE_READ);

        if (ret == STATE_READ) {
            /* We were able to mark the term as read in progress. We
               now have an exclusive lock. */
            break;

        } else if (ret == STATE_WRITE) {
            /* We found an item with a write in progress. If that
               thread progresses, it will eventually mark the slot as
               full. We can spin until that happens.

               This can take an arbitrary amount of time and multiple
               reading threads will compete for the same slot.

               Instead we add the caller to the queue of blocking
               consumers. When the next producer comes it will "help"
               this thread by calling enif_send on the current
               in-progress term *and* handle it's own terms. If
               there's no new push to the queue, this will block
               forever. */
            return mk_atom(env, "write_in_progress_not_implemented");

        } else if (ret == STATE_EMPTY) {
            /* We found an empty item. Queue must be empty. Add
               calling Erlang consumer process to queue of waiting
               processes. When the next producer comes along, it first
               checks the waiting consumers and calls enif_send
               instead of writing to the slots. */

            ErlNifPid *pid = enif_alloc(sizeof(ErlNifPid));
            pid = enif_self(env, pid);
            enqueue(q->pop_queue, pid);

            return mk_atom(env, "wait_for_msg");

        } else {
            tail = __sync_add_and_fetch(&q->tail, 1);
        }
    }


    /* Copy term into calling process env. The NIF env can now be
       gargbage collected. */
    ERL_NIF_TERM copy = enif_make_copy(env, q->slots_terms[SLOT_INDEX(tail, qsize)]);


    /* Mark the slot as free. Note: We don't increment the tail
       position here, as another thread also walking the buffer might
       have incremented it multiple times */
    q->slots_terms[SLOT_INDEX(tail, qsize)] = 0;
    if (__sync_bool_compare_and_swap(&q->slots_states[SLOT_INDEX(tail, qsize)],
                                     STATE_READ,
                                     STATE_EMPTY)) {
        return enif_make_tuple2(env, mk_atom(env, "ok"), copy);
    } else {
        return mk_error(env, "could_not_update_slots_after_pop");
    }
}


static ERL_NIF_TERM
queue_debug(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{

    uint32_t queue_id = 0;

    if (!enif_get_uint(env, argv[0], &queue_id))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "badarg");

    cq_t *q = QUEUES[queue_id];
    if (q == NULL)
        return mk_error(env, "bad_queue_id");


    ERL_NIF_TERM *slots_states = enif_alloc(sizeof(ERL_NIF_TERM) * q->queue_size);
    ERL_NIF_TERM *slots_terms = enif_alloc(sizeof(ERL_NIF_TERM) * q->queue_size);
    for (int i = 0; i < q->queue_size; i++) {
        slots_states[i] = enif_make_int(env, q->slots_states[i * CACHE_LINE_SIZE]);

        if (q->slots_terms[i * CACHE_LINE_SIZE] == 0) {
            slots_terms[i] = mk_atom(env, "null");
        } else {
            slots_terms[i] = enif_make_copy(env, q->slots_terms[i * CACHE_LINE_SIZE]);
        }
    }
    return enif_make_tuple4(env,
                            enif_make_uint64(env, q->tail),
                            enif_make_uint64(env, q->head),
                            enif_make_list_from_array(env, slots_states, q->queue_size),
                            enif_make_list_from_array(env, slots_terms, q->queue_size));
}

static ERL_NIF_TERM
queue_debug_poppers(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{
    uint32_t queue_id = 0;

    if (!enif_get_uint(env, argv[0], &queue_id))
        return mk_error(env, "badarg");

    if (queue_id > 8)
        return mk_error(env, "badarg");

    cq_t *q = QUEUES[queue_id];
    if (q == NULL)
        return mk_error(env, "bad_queue_id");


    uint64_t pop_queue_size = 0;
    cq_node_t *node = q->pop_queue->head;
    if (node->value == NULL) {
        node = node->next;
        node = Q_PTR(node);
    }

    while (node != NULL) {
        pop_queue_size++;
        node = node->next;
        node = Q_PTR(node);
    }

    ERL_NIF_TERM *pop_queue_pids = enif_alloc(sizeof(ERL_NIF_TERM) * pop_queue_size);

    node = q->pop_queue->head;
    node = Q_PTR(node);
    if (node->value == NULL) {
        node = node->next;
        node = Q_PTR(node);
    }

    uint64_t i = 0;
    while (node != NULL) {
        if (node->value == 0) {
            pop_queue_pids[i] = mk_atom(env, "null");
        }
        else {
            pop_queue_pids[i] = enif_make_pid(env, node->value);
        }

        i++;
        node = node->next;
        node = Q_PTR(node);
    }

    ERL_NIF_TERM list = enif_make_list_from_array(env, pop_queue_pids, pop_queue_size);
    enif_free(pop_queue_pids);

    return list;
}


static ERL_NIF_TERM
print_bits(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{

    uint64_t *p1 = malloc(8);
    *p1 = 0;


    for (int bit = 63; bit >= 0; bit--) {
        uint64_t power = 1 << bit;
        //uint64_t byte = *p1;
        uint64_t byte = p1;
        fprintf(stderr, "%d", (byte & power) >> bit);
    }
    fprintf(stderr, "\n");

    //enif_free(p1);

    return mk_atom(env, "ok");
}

void free_resource(ErlNifEnv* env, void* arg)
{
    //cq_t *cq = (cq_t *) arg;

    fprintf(stderr, "free_resource\n");
}


cq_queue_t * new_queue()
{
    cq_queue_t *queue = enif_alloc(sizeof(cq_queue_t));
    cq_node_t *node = enif_alloc(sizeof(cq_node_t));
    node->next = NULL;
    //node->env = NULL;
    node->value = NULL;
    queue->head = node;
    queue->tail = node;

    return queue;
}


void enqueue(cq_queue_t *queue, ErlNifPid *pid)
{
    cq_node_t *node = enif_alloc(sizeof(cq_node_t));
    //node->env = enif_alloc_env();
    //node->term = enif_make_copy(node->env, term);
    node->value = pid;
    node->next = NULL;
    fprintf(stderr, "node %lu\n", node);

    cq_node_t *tail = NULL;
    uint64_t tail_count = 0;
    while (1) {
        tail = queue->tail;
        cq_node_t *tail_ptr = Q_PTR(tail);
        tail_count = Q_COUNT(tail);

        cq_node_t *next = tail->next;
        cq_node_t *next_ptr = Q_PTR(next);
        uint64_t next_count = Q_COUNT(next);

        if (tail == queue->tail) {
            fprintf(stderr, "tail == queue->tail\n");
            if (next_ptr == NULL) {
                fprintf(stderr, "next_ptr == NULL\n");
                if (__sync_bool_compare_and_swap(&tail_ptr->next,
                                                 next,
                                                 Q_SET_COUNT(node, next_count+1)))
                    fprintf(stderr, "CAS(tail_ptr->next, next, (node, next_count+1)) -> true\n");
                    break;
            } else {
                __sync_bool_compare_and_swap(&queue->tail,
                                             tail,
                                             Q_SET_COUNT(next_ptr, next_count+1));
                    fprintf(stderr, "CAS(queue->tail, tail, (next_ptr, next_count+1))\n");
            }
        }
    }

    cq_node_t *node_with_count = Q_SET_COUNT(node, tail_count+1);
    int ret = __sync_bool_compare_and_swap(&queue->tail,
                                           tail,
                                           node_with_count);
    fprintf(stderr, "CAS(queue->tail, tail, %lu) -> %d\n", node_with_count, ret);
}


int dequeue(cq_queue_t *queue, ErlNifPid **pid)
{
    fprintf(stderr, "dequeue\n");
    cq_node_t *head, *head_ptr, *tail, *tail_ptr, *next, *next_ptr;

    while (1) {
        head = queue->head;
        head_ptr = Q_PTR(head);
        tail = queue->tail;
        tail_ptr = Q_PTR(tail);
        next = head->next;
        next_ptr = Q_PTR(next);
        fprintf(stderr, "head %lu, tail %lu, next %lu\n", head, tail, next);

        if (head == queue->head) {
            if (head_ptr == tail_ptr) {
                if (next_ptr == NULL) {
                    return 0; /* Queue is empty */
                }
                fprintf(stderr, "CAS(queue->tail, tail, (next_ptr, tail+1))\n");
                __sync_bool_compare_and_swap(&queue->tail,
                                             tail,
                                             Q_SET_COUNT(next_ptr, Q_COUNT(tail)+1));
            } else {
                fprintf(stderr, "next->value %lu\n", next_ptr->value);
                *pid = next_ptr->value;
                fprintf(stderr, "CAS(queue->head, head, (next_ptr, head+1))\n");
                if (__sync_bool_compare_and_swap(&queue->head,
                                                 head,
                                                 Q_SET_COUNT(next_ptr, Q_COUNT(head)+1)))
                    break;
            }
        }
    }
    // free pid
    //enif_free(Q_PTR(head));
    return 1;
}


int load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info) {
    /* Initialize global array mapping id to cq_t ptr */
    QUEUES = (cq_t **) calloc(8, sizeof(cq_t **));
    if (QUEUES == NULL)
        return -1;


    ErlNifResourceFlags flags = (ErlNifResourceFlags)(ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER);
    CQ_RESOURCE = enif_open_resource_type(env, "cq", "cq",
                                          &free_resource, flags, NULL);

    if (CQ_RESOURCE == NULL)
        return -1;

    return 0;
}


static ErlNifFunc nif_funcs[] = {
    {"new"      , 3, queue_new},
    {"free"     , 1, queue_free},
    {"push"     , 2, queue_push},
    {"async_pop", 1, queue_async_pop},
    {"debug"    , 1, queue_debug},
    {"debug_poppers", 1, queue_debug_poppers},
    {"print_bits", 0, print_bits}
};

ERL_NIF_INIT(cq, nif_funcs, load, NULL, NULL, NULL);