From f604d0b94ad2312701c955af1c9c33d0de8020d6 Mon Sep 17 00:00:00 2001
From: SisMaker <156736github>
Date: Tue, 28 Dec 2021 22:35:24 +0800
Subject: [PATCH] =?UTF-8?q?ft:=20=E5=88=9D=E5=A7=8B=E5=8C=96=E6=8F=90?=
 =?UTF-8?q?=E4=BA=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .gitignore                   |   29 +
 LICENSE                      |   21 +
 README.md                    |   14 +
 c_src/eLfq/MPMC.md           |  140 ++
 c_src/eLfq/concurrentqueue.h | 3957 ++++++++++++++++++++++++++++++++++
 c_src/eLfq/eLfq.cc           |    3 +
 c_src/eLfq/rebar.config      |    3 +
 c_src/eNpc                   |  Bin 0 -> 25590 bytes
 c_src/eNpc.cmd               |    4 +
 rebar.config                 |    8 +
 src/eLfq.app.src             |   11 +
 src/eLfq.erl                 |  115 +
 12 files changed, 4305 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 LICENSE
 create mode 100644 README.md
 create mode 100644 c_src/eLfq/MPMC.md
 create mode 100644 c_src/eLfq/concurrentqueue.h
 create mode 100644 c_src/eLfq/eLfq.cc
 create mode 100644 c_src/eLfq/rebar.config
 create mode 100644 c_src/eNpc
 create mode 100644 c_src/eNpc.cmd
 create mode 100644 rebar.config
 create mode 100644 src/eLfq.app.src
 create mode 100644 src/eLfq.erl

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0ad44f1
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,29 @@
+.eunit
+*.o
+*.beam
+*.plt
+erl_crash.dump
+.concrete/DEV_MODE
+
+# rebar 2.x
+.rebar
+rel/example_project
+ebin/*
+deps
+
+# rebar 3
+.rebar3
+_build/
+_checkouts/
+rebar.lock
+
+# idea
+.idea
+*.iml
+cmake-build*
+CMakeLists.txt
+
+# nif compile temp file
+*.pdb
+*.d
+compile_commands.json
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..5ff4cad
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 AICells
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..140a327
--- /dev/null
+++ b/README.md
@@ -0,0 +1,14 @@
+eLfq
+=====
+
+An OTP library
+
+Build
+-----
+
+    $ rebar3 compile
+
+=====
+
+A simple NIF lock-free Queue using the library: [moodycamel::concurrentqueue](https://github.com/cameron314/concurrentqueue)
+
diff --git a/c_src/eLfq/MPMC.md b/c_src/eLfq/MPMC.md
new file mode 100644
index 0000000..27e3342
--- /dev/null
+++ b/c_src/eLfq/MPMC.md
@@ -0,0 +1,140 @@
+# 无锁队列的详细设计
+
+2014 年 11 月 6 日发布 这篇文章非常详细地概述了我对支持多个并发生产者和消费者（MPMC 队列）的高效无锁队列的设计。我的这个设计的 C++11 实现可以在 GitHub
+上找到。队列的更高级概述，包括基准测试结果，可以在我介绍我的实现的初始博客文章中找到。
+
+一个关键的见解是元素从队列中出来的总顺序是无关紧要的，只要它们在给定线程上的顺序与它们在另一个线程上的顺序相匹配。这意味着队列可以安全地实现为一组独立的队列，每个生产者一个；编写一个单生产者、多消费者的无锁队列比编写一个多生产者、多消费者的队列容易得多，并且可以更有效地实现。通过让消费者根据需要从不同的
+SPMC 队列中拉取数据，可以将 SPMC 队列推广为 MPMC 队列（这也可以通过一些巧妙的方式有效地完成）。在典型情况下，启发式用于加速出队，尽可能将消费者与生产者配对（这大大减少了系统中的整体争用）。
+
+除了高级 SPMC
+队列集设计之外，队列的另一个关键部分是核心排队算法本身，它是全新的（我自己设计的）并且与我听说过的任何其他算法不同。它使用原子计数器来跟踪有多少元素可用，并且一旦声明了一个或多个元素（通过增加相应的元素消耗计数器并检查该增量是否有效），原子索引可以安全地增加到获取要引用的元素的实际
+ID。然后问题简化为将整数 ID 映射到单个元素，而不必担心其他线程引用相同的对象（每个 ID 仅分配给一个线程）。详情如下！
+
+## 系统总览
+
+该队列由一系列单生产者多消费者 (SPMC) 队列组成。每个生产者有一个 SPMC 队列；消费者使用启发式方法来确定从下一个消费这些队列中的哪一个。队列是无锁的（虽然不是完全无等待）。它被设计为健壮的、非常快的（尤其是在 x86
+上），并且允许批量入队和出队，而额外的开销很少（与单个项目相比）。
+
+每个生产者都需要一些线程本地数据，也可以选择使用线程本地数据来加速消费者。此线程本地数据可以与用户分配的令牌相关联，或者，为了简化接口，如果用户没有为生产者提供令牌，则使用无锁哈希表（以当前线程 ID
+为键）查找线程本地生产者队列：为每个显式分配的生产者令牌创建一个 SPMC
+队列，以及另一个隐式一个用于生成项目而不提供令牌的线程。由于令牌包含线程特定的数据，它们永远不应该在多个线程中同时使用（尽管将令牌的所有权转移到另一个线程是可以的；特别是，这允许在线程池任务中使用令牌，即使运行任务的线程中途更改）。
+
+所有生产者队列都将自己链接到一个无锁链表中。当一个显式生产者不再有元素被添加到它时（即它的令牌被销毁），它被标记为与任何生产者无关，但它被保留在列表中并且它的内存不会被释放；下一个新生产者重用旧生产者的内存（无锁生产者列表是这样添加的）。隐式生产者永远不会被销毁（直到高级队列本身被销毁），因为无法知道给定的线程是否使用数据结构完成。请注意，最坏​​情况的出队速度取决于有多少生产者队列，即使它们都是空的。
+
+显式和隐式生产者队列的生命周期存在根本区别：显式队列的生产生命周期与令牌的生命周期相关，而隐式队列的生命周期是无限的，并且存在于高级队列本身的持续时间内. 因此，使用了两种略有不同的 SPMC
+算法，以最大限度地提高速度和内存使用率。一般来说，显式生产者队列设计得稍微快一点，占用更多内存，而隐式生产者队列设计得稍微慢一点，但将更多内存回收到高级队列的全局池中。为获得最佳速度，请始终使用显式令牌（除非您觉得它太不方便）。
+
+任何分配的内存只有在高级队列被销毁时才会被释放（尽管有几种重用机制）。内存分配可以预先完成，如果没有足够的内存（而不是分配更多），操作就会失败。如果需要，使用者可以覆盖各种默认大小参数（以及队列使用的内存分配函数）。
+
+## 完整的 API（伪代码）
+
+# Allocates more memory if necessary
+
+enqueue(item) : bool enqueue(prod_token, item) : bool enqueue_bulk(item_first, count) : bool enqueue_bulk(prod_token,
+item_first, count) : bool
+
+# Fails if not enough memory to enqueue
+
+try_enqueue(item) : bool try_enqueue(prod_token, item) : bool try_enqueue_bulk(item_first, count) : bool
+try_enqueue_bulk(prod_token, item_first, count) : bool
+
+# Attempts to dequeue from the queue (never allocates)
+
+try_dequeue(item&) : bool try_dequeue(cons_token, item&) : bool try_dequeue_bulk(item_first, max) : size_t
+try_dequeue_bulk(cons_token, item_first, max) : size_t
+
+# If you happen to know which producer you want to dequeue from
+
+try_dequeue_from_producer(prod_token, item&) : bool try_dequeue_bulk_from_producer(prod_token, item_first, max) : size_t
+
+# A not-necessarily-accurate count of the total number of elements
+
+size_approx() : size_t
+
+## 生产者队列 (SPMC) 设计
+
+跨隐式和显式版本的共享设计 生产者队列由块组成（显式和隐式生产者队列都使用相同的块对象以实现更好的内存共享）。最初，它开始时没有块。每个块可以容纳固定数量的元素（所有块具有相同的容量，即 2
+的幂）。此外，块包含一个标志，指示已填充的插槽是否已被完全消耗（由显式版本用于确定块何时为空），以及完全出列的元素数量的原子计数器（由隐式版本使用）版本以确定块何时为空）。
+
+出于无锁操作的目的，生产者队列可以被认为是一个抽象的无限数组。甲尾索引指示用于生产者填充下一个可用时隙;
+它还可以作为曾经入队的元素数量的计数（入队计数）。尾部索引由生产者单独写入，并且总是增加（除非它溢出并环绕，这对于我们的目的仍然被认为是“增加”）。由于只有一个线程正在更新所涉及的变量，因此生成一个项目是微不足道的。一个头指数表示接下来可以消耗什么元素。头索引由消费者原子地递增，可能是并发的。为了防止头索引到达/通过感知的尾索引，使用了一个额外的原子计数器：出队计数.
+出队计数是乐观的，即当消费者推测性地相信有一些东西要出队时，它会被消费者增加。如果增加后出队计数的值小于入队计数（尾部），那么保证至少有一个元素出队（即使考虑并发性），并且增加头部索引是安全的，知道之后会小于尾部索引。另一方面，如果在递增后出队计数超过（或等于）尾部，则出队操作失败并且出队计数在逻辑上递减（以使其最终与入队计数保持一致）：这可以通过直接减少出队计数，而是（以增加并行性并保持所有涉及的变量单调增加）出队过量使用计数器会增加。为了得到出队计数的逻辑值，我们从出队计数变量中减去出队过量使用值。
+
+在消费时，一旦确定了一个有效索引，它仍然需要映射到一个块和该块的偏移量；某种索引数据结构用于此目的（哪个取决于它是隐式队列还是显式队列）。最后，可以将元素移出，并更新某种状态，以便最终知道块何时完全用完。这些机制的完整描述在涵盖隐式和显式特定细节的各​​个部分中提供。
+
+如前所述，尾部和头部索引/计数最终会溢出。这是预期的并已考虑在内。因此，索引/计数被认为存在于最大整数值大小的圆上（类似于 360 度的圆，其中 359 在 1
+之前）。为了检查一个索引/计数是否a在另一个之前，例如b，（即逻辑小于），我们必须确定是否通过圆上的顺时针弧a更接近于b。使用以下循环小于算法（32 位版本）：a < b变成a - b > (1U << 31U). a <= b变成 a -
+b - 1ULL > (1ULL << 31ULL).
+请注意，循环减法“仅适用于”正常的无符号整数（假设为二进制补码）。注意确保尾索引不会超过头索引（这会破坏队列）。请注意，尽管如此，从技术上讲仍然存在竞争条件，其中消费者（或生产者，就此而言）看到的索引值是如此陈旧，以至于它几乎落后于其当前值整个圆圈的价值（或更多！），导致队列的内部状态被破坏。然而，在实践中，这不是问题，因为浏览
+2^31 个值（对于 32 位索引类型）需要一段时间，届时其他内核将看到更新的内容。事实上，许多无锁算法都是基于相关的标记指针习语，其中前 16 位用于重复递增的标签，后 16
+位用于指针值；这依赖于类似的假设，即一个核心不能在其他核心不知道的情况下增加标签超过 2^15 倍。然而，队列的默认索引类型是 64 位宽（即使 16 位似乎足够，即使在理论上也应该防止任何潜在的竞争）。
+
+内存分配失败也得到了正确处理，永远不会破坏队列（它只是简单地报告为失败）。但是，假定元素本身在被队列操作时永远不会抛出异常。
+
+## 块池
+
+使用了两种不同的块池：首先，有预先分配的块的初始数组。一旦被消耗，这个池子永远是空的。这将其无等待实现简化为带有检查（以确保该索引在范围内）的单个获取和添加原子指令（以获取空闲块的下一个索引）。其次，有一个无锁（虽然不是无等待）全局空闲列表（“全局”意味着对高级队列是全局的）准备重用的已用块，作为无锁单独实现链表：头指针最初指向空（空）。要将一个块添加到空闲列表中，该块的下一个将指针设置为头指针，然后在头未更改的情况下使用比较和交换（CAS）更新头指针以指向块；如果是，则重复该过程（这是经典的无锁
+CAS 循环设计模式）。要从空闲列表中删除块，使用了类似的算法：读取头块的下一个指针，然后将头设置为下一个指针（使用 CAS），条件是头在与此同时。为了避免 ABA 问题，每个块都有一个引用计数，它在执行 CAS
+以删除一个块之前递增，然后递减；如果在引用计数大于 0
+的情况下尝试将块重新添加到空闲列表，则设置一个标志，指示该块应该在空闲列表上，并且下一个完成持有最后一个引用的线程检查这个标志并在那个时候将块添加到列表中（这是有效的，因为我们不关心顺序）。我已经更详细地描述了这个无锁空闲列表的确切设计和实现在另一篇博文中。当生产者队列需要一个新块时，它首先检查初始块池，然后是全局空闲列表，只有当它找不到空闲块时才在堆上分配一个新块（或者失败，如果内存不允许分配）。
+
+## 显式生产者队列
+
+显式生产者队列实现为块的循环单链表。它在快速路径上是无等待的，但仅在需要从块池中获取块（或分配的新块）时才无锁；这仅在其内部块缓存全部已满（或没有，这是开始时的情况）时发生。
+
+一旦一个块被添加到显式生产者队列的循环链表中，它就永远不会被删除。甲尾块指针由生产者认为指向哪些元素目前正在插入块;
+当尾块已满时，检查下一个块以确定它是否为空。如果是，则更新尾块指针以指向该块；如果不是，则请求一个新块并将其插入紧跟在当前尾块之后的链表中，然后更新该块以指向该新块。
+
+当一个元素完成从块中出队时，每个元素的标志被设置以指示槽已满。（实际上，所有标志都从设置开始，并且仅在槽变空时才关闭。）生产者通过检查所有这些标志来检查块是否为空。如果块大小很小，这已经足够快了；否则，对于较大的块，而不是标志系统，每次消费者完成一个元素时，块级原子计数都会增加。当这个计数等于块的大小，或者所有标志都关闭时，块是空的，可以安全地重用。
+
+为了在恒定时间内索引块（即快速找到元素所在的块，从出队算法中给定元素的全局索引），使用循环缓冲区（连续数组）。该索引由生产者维护；消费者从中读取但从不写入。数组的前面是最近写入的块（尾块）。在后方数组的最后一个块中可能有元素。将此索引（从高级角度）视为已使用区块历史的一条长带会很有帮助。每当生产者在另一个块上启动时（可能是新分配的，也可能是从其循环的块列表中重新使用），前端就会增加。每当循环列表中已经存在的块被重新使用时，后部就会增加（因为块只在它们为空时才被重新使用，在这种情况下增加后部总是安全的）。不是显式存储后部，而是保留所用插槽数的计数（这避免了循环缓冲区中备用元素的需要，并简化了实现）。如果索引中没有足够的空间来添加新项目，分配一个新的索引数组，它是前一个数组大小的两倍（显然，这仅在允许内存分配的情况下才允许——如果不允许，则整个入队操作正常失败）。由于消费者仍然可以使用旧索引，它不会被释放，而是简单地链接到新索引（这形成了一个索引块链，当高级队列被破坏时可以正确释放）。当生产者队列的入队计数增加时，它释放对索引的所有写入；当消费者执行获取（它已经需要出队算法）时，从那时起消费者看到的任何索引都将包含对消费者感兴趣的块的引用。由于块的大小都相同，并且2的幂，我们可以使用移位和掩码来确定我们的目标块与索引中的任何其他块（以及目标块中的偏移量）的偏移量，前提是我们知道索引中给定块的基本索引。因此，索引不仅包含块指针，还包含每个块的相应基索引。在索引中被选为参考点（以计算偏移量）的块在使用时不得被生产者覆盖——使用索引的（感知）前端作为参考点保证了这一点，因为（知道块索引至少与我们正在查找的出队索引之前的入队计数一样最新）索引的前面必须位于或在目标块之前，并且目标块在索引中永远不会被覆盖，直到它（以及之前的所有块）为空，并且在出队操作本身完成之前它不能为空。索引大小是
+2 的幂，它允许更快地包装前/后变量。
+
+显式生产者队列要求在入队时传递用户分配的“生产者令牌”。该令牌仅包含指向生产者队列对象的指针。创建token时，会创建相应的生产者队列；当令牌被销毁时，生产者队列可能仍然包含未使用的元素，因此队列本身比令牌更长寿。事实上，一旦分配，生产者队列永远不会被销毁（直到高级队列被销毁），但它会在下一次创建生产者令牌时重新使用（而不是为新的生产者队列分配堆）。
+
+## 隐式生产者队列
+
+隐式生产者队列被实现为一组未链接的块。它是无锁的，但不是无等待的，因为主空闲块空闲列表的实现是无锁的，并且块不断地从该池中获取并插入回该池（调整块索引的大小也不是常数时间，并且需要内存分配）。实际的入队和出队操作仍然在单个块内等待空闲。
+
+甲当前块指针被保持;
+这是当前正在排队的块。当一个块填满时，会申请一个新的，而旧的（从生产者的角度来看）会被遗忘。在将元素添加到块之前，块被插入到块索引中（这允许消费者找到生产者已经忘记的块）。当块中的最后一个元素被消耗完时，该块从块索引中逻辑删除。
+
+隐式生产者队列永远不会被重用——一旦创建，它就会在高级队列的整个生命周期中存在。因此，为了减少内存消耗，它不会占用它曾经使用过的所有块（如显式生产者），而是将用过的块返回到全局空闲列表中。为此，只要消费者完成项目的出队，每个块中的原子出队计数就会增加；当计数器达到块的大小时，看到这一点的消费者知道它刚刚出列了最后一项，并将该块放入全局空闲列表中。
+
+隐式生产者队列使用循环缓冲区来实现其块索引，这允许在给定基本索引的情况下恒定时间搜索块。每个索引条目由表示块的基本索引的键值对和指向相应块本身的指针组成。由于块总是按顺序插入，索引中每个块的基索引保证在相邻条目之间增加恰好一个块大小的值。这意味着通过查看最后插入的基索引、计算所需基索引的偏移量并在该偏移量处查找索引条目，可以轻松找到索引中已知的任何块。
+
+当一个块被花费时，它会从索引中移除（为以后的块插入腾出空间）；因为另一个消费者仍然可以使用索引中的那个条目（来计算一个偏移量），索引条目没有被完全删除，而是块指针被设置为空，向生产者指示插槽可以重新用过的;
+对于仍在使用它来计算偏移量的任何消费者，块基数保持不变。由于生产者仅在所有前面的插槽也空闲时才重新使用插槽，并且当消费者在索引中查找块时，索引中必须至少有一个非空闲插槽（对应于它正在查找的块）
+，并且消费者用于查找块的块索引条目至少与该块的索引条目最近排入队列，
+
+当消费者希望将一个项目入队并且块索引中没有空间时，它（如果允许）分配另一个块索引（链接到旧的，以便在队列被破坏时它的内存最终可以被释放），这成为从此主索引。新索引是旧索引的副本，除了两倍大；复制所有索引条目允许消费者只需要查看一个索引即可找到他们要查找的块（块内出队的时间恒定）。因为在构造新索引时，消费者可能正在将索引条目标记为空闲（通过将块指针设置为空），所以索引条目本身不会被复制，而是指向它们的指针。这确保消费者对旧索引的任何更改也会正确影响当前索引。
+
+## 隐式生产者队列的哈希
+
+无锁哈希表用于将线程 ID 映射到隐式生产者；当没有为各种入队方法提供明确的生产者令牌时使用。它基于Jeff Preshing 的无锁哈希算法，有一些调整：键的大小与平台相关的数字线程 ID
+类型相同；值是指针；当散列变得太小（元素的数量用原子计数器跟踪）时，会分配一个新的并将其链接到旧的，并且旧的元素在读取时会延迟传输。由于元素数量的原子计数可用，并且元素永远不会被删除，因此希望在哈希表中插入一个太小的元素的线程必须尝试调整大小或等待另一个线程完成调整大小（调整大小受保护带有锁以防止虚假分配）。为了在争用情况下加速调整大小（即最小化线程等待另一个线程完成分配的自旋等待量），
+
+## 生产者链表
+
+如前所述，维护所有生产者的单链接 (LIFO) 列表。这个列表是使用尾指针实现的，每个生产者的下一个（实际上，“上一个”）指针都是侵入性的。尾部最初指向空；当一个新的生产者被创建时，它首先读取尾部，然后使用设置它紧挨着尾部，然后使用 CAS
+操作将尾部（如果它没有改变）设置为新的生产者（如果它没有改变）将自己添加到列表中（根据需要循环）。生产者永远不会从列表中删除，但可以标记为不活动。
+
+当消费者想要出列一个项目时，它只是遍历生产者列表，寻找其中包含一个项目的 SPMC 队列（由于生产者的数量是无限的，这在一定程度上是使高级队列只是无锁的部分原因）免等待）。
+
+## 出队启发式
+
+消费者可以将令牌传递给各种出队方法。此令牌的目的是加快选择适当的内部生产者队列以尝试从中出队。使用一个简单的方案，其中每个显式消费者都被分配一个自动递增的偏移量，表示它应该出队的生产者队列的索引。通过这种方式，消费者在生产者之间尽可能公平地分配；然而，并非所有生产者都有相同数量的可用元素，一些消费者可能比其他消费者消费得更快；为了解决这个问题，从同一个内部生产者队列连续消费
+256
+个项目的第一个消费者增加了一个全局偏移量，导致所有消费者在他们的下一个出队操作上轮换并从下一个生产者开始消费。（请注意，这意味着旋转速率由最快的消费者决定。）如果消费者指定的队列中没有可用的元素，它会移动到下一个有可用元素的队列。这种简单的启发式方法是有效的，并且能够以近乎完美的扩展将消费者与生产者配对，从而实现令人印象深刻的出队加速。
+
+## 关于线性化的说明
+
+如果数据结构的所有操作似乎都以某种顺序（线性）顺序执行，即使在并发下，它也是可线性化的（本文有一个很好的定义）。虽然这是一个有用的特性，因为它使并发算法明显正确且更容易推理，但它是一个非常强大的特性一致性模型。我在这里介绍的队列是不可线性化的，因为这样做会导致性能更差；但是，我相信它仍然非常有用。我的队列具有以下一致性模型：任何给定线程上的入队操作（显然）在该线程上可线性化，但不是其他线程（这应该无关紧要，因为即使使用完全可线性化的队列，元素的最终顺序也是不确定的，因为它取决于在线程之间的比赛中）。请注意，即使入队操作不能跨线程线性化，它们仍然是原子的——只有完全入队的元素才能出队。如果所有生产者队列在检查时都显示为空，则允许出队操作失败.
+这意味着出队操作也是非线性的，因为在出​​队操作失败期间，整个队列不一定在任何一点都为空。（即使单个生产者队列的空检查在技术上也是非线性的，因为入队操作可能完成但内存效应尚未传播到出队线程 - 同样，这无论如何都无关紧要，因为它取决于非-
+无论哪种方式，确定性竞争条件。）
+
+这种非线性在实际中意味着如果还有其他生产者在排队（无论其他线程是否正在出队），则出队操作可能会在队列完全清空之前失败。请注意，即使使用完全可线性化的队列，这种竞争条件仍然存在。如果队列已经稳定（即所有入队操作都已完成，并且它们的内存效应对任何潜在的消费者线程都可见），那么只要队列不为空，出队操作就永远不会失败。类似地，如果给定的一组元素对所有出队线程都是可见的，则这些线程上的出队操作将永远不会失败，直到至少该组元素被消耗为止（但即使队列不是完全空的，之后也可能会失败）。
+
+## 结论
+
+所以你有它！关于我的通用无锁队列设计，您想知道的比您想的要多。我已经使用 C++11 实现了这个设计，但我确信它可以移植到其他语言。如果有人确实用另一种语言实现了这个设计，我很乐意听到它！
\ No newline at end of file
diff --git a/c_src/eLfq/concurrentqueue.h b/c_src/eLfq/concurrentqueue.h
new file mode 100644
index 0000000..96b6978
--- /dev/null
+++ b/c_src/eLfq/concurrentqueue.h
@@ -0,0 +1,3957 @@
+// Provides a C++11 implementation of a multi-producer, multi-consumer lock-free queue.
+// An overview, including benchmark results, is provided here:
+//     http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++
+// The full design is also described in excruciating detail at:
+//    http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue
+
+// Simplified BSD license:
+// Copyright (c) 2013-2020, Cameron Desrochers.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice, this list of
+// conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice, this list of
+// conditions and the following disclaimer in the documentation and/or other materials
+// provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+// OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+// TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Also dual-licensed under the Boost Software License (see LICENSE.md)
+
+#pragma once
+
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
+                                                                                                                        // Disable -Wconversion warnings (spuriously triggered when Traits::size_t and
+// Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings
+// upon assigning any computed values)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+
+#ifdef MCDBGQ_USE_RELACY
+#pragma GCC diagnostic ignored "-Wint-to-pointer-cast"
+#endif
+#endif
+
+#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
+                                                                                                                        // VS2019 with /W4 warns about constant conditional expressions but unless /std=c++17 or higher
+// does not support `if constexpr`, so we have no choice but to simply disable the warning
+#pragma warning(push)
+#pragma warning(disable: 4127)  // conditional expression is constant
+#endif
+
+#if defined(__APPLE__)
+#include "TargetConditionals.h"
+#endif
+
+#ifdef MCDBGQ_USE_RELACY
+                                                                                                                        #include "relacy/relacy_std.hpp"
+#include "relacy_shims.h"
+// We only use malloc/free anyway, and the delete macro messes up `= delete` method declarations.
+// We'll override the default trait malloc ourselves without a macro.
+#undef new
+#undef delete
+#undef malloc
+#undef free
+#else
+
+#include <atomic>        // Requires C++11. Sorry VS2010.
+#include <cassert>
+
+#endif
+
+#include <cstddef>              // for max_align_t
+#include <cstdint>
+#include <cstdlib>
+#include <type_traits>
+#include <algorithm>
+#include <utility>
+#include <limits>
+#include <climits>        // for CHAR_BIT
+#include <array>
+#include <thread>        // partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading
+
+// Platform-specific definitions of a numeric thread ID type and an invalid value
+namespace moodycamel {
+    namespace details {
+        template<typename thread_id_t>
+        struct thread_id_converter {
+            typedef thread_id_t thread_id_numeric_size_t;
+            typedef thread_id_t thread_id_hash_t;
+
+            static thread_id_hash_t prehash(thread_id_t const &x) { return x; }
+        };
+    }
+}
+#if defined(MCDBGQ_USE_RELACY)
+                                                                                                                        namespace moodycamel { namespace details {
+	typedef std::uint32_t thread_id_t;
+	static const thread_id_t invalid_thread_id  = 0xFFFFFFFFU;
+	static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU;
+	static inline thread_id_t thread_id() { return rl::thread_index(); }
+} }
+#elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__)
+                                                                                                                        // No sense pulling in windows.h in a header, we'll manually declare the function
+// we use and rely on backwards-compatibility for this not to break
+extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void);
+namespace moodycamel { namespace details {
+	static_assert(sizeof(unsigned long) == sizeof(std::uint32_t), "Expected size of unsigned long to be 32 bits on Windows");
+	typedef std::uint32_t thread_id_t;
+	static const thread_id_t invalid_thread_id  = 0;			// See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx
+	static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU;	// Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4.
+	static inline thread_id_t thread_id() { return static_cast<thread_id_t>(::GetCurrentThreadId()); }
+} }
+#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE) || defined(MOODYCAMEL_NO_THREAD_LOCAL)
+                                                                                                                        namespace moodycamel { namespace details {
+	static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes");
+
+	typedef std::thread::id thread_id_t;
+	static const thread_id_t invalid_thread_id;         // Default ctor creates invalid ID
+
+	// Note we don't define a invalid_thread_id2 since std::thread::id doesn't have one; it's
+	// only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined anyway, which it won't
+	// be.
+	static inline thread_id_t thread_id() { return std::this_thread::get_id(); }
+
+	template<std::size_t> struct thread_id_size { };
+	template<> struct thread_id_size<4> { typedef std::uint32_t numeric_t; };
+	template<> struct thread_id_size<8> { typedef std::uint64_t numeric_t; };
+
+	template<> struct thread_id_converter<thread_id_t> {
+		typedef thread_id_size<sizeof(thread_id_t)>::numeric_t thread_id_numeric_size_t;
+#ifndef __APPLE__
+		typedef std::size_t thread_id_hash_t;
+#else
+		typedef thread_id_numeric_size_t thread_id_hash_t;
+#endif
+
+		static thread_id_hash_t prehash(thread_id_t const& x)
+		{
+#ifndef __APPLE__
+			return std::hash<std::thread::id>()(x);
+#else
+			return *reinterpret_cast<thread_id_hash_t const*>(&x);
+#endif
+		}
+	};
+} }
+#else
+// Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475
+// In order to get a numeric thread ID in a platform-independent way, we use a thread-local
+// static variable's address as a thread identifier :-)
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
+#define MOODYCAMEL_THREADLOCAL __thread
+#elif defined(_MSC_VER)
+#define MOODYCAMEL_THREADLOCAL __declspec(thread)
+#else
+// Assume C++11 compliant compiler
+#define MOODYCAMEL_THREADLOCAL thread_local
+#endif
+namespace moodycamel {
+    namespace details {
+        typedef std::uintptr_t thread_id_t;
+        static const thread_id_t invalid_thread_id = 0;        // Address can't be nullptr
+        static const thread_id_t invalid_thread_id2 = 1;        // Member accesses off a null pointer are also generally invalid. Plus it's not aligned.
+        inline thread_id_t thread_id() {
+            static MOODYCAMEL_THREADLOCAL int x;
+            return reinterpret_cast<thread_id_t>(&x);
+        }
+    }
+}
+#endif
+
+// Constexpr if
+#ifndef MOODYCAMEL_CONSTEXPR_IF
+#if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || __cplusplus > 201402L
+                                                                                                                        #define MOODYCAMEL_CONSTEXPR_IF if constexpr
+#define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]]
+#else
+#define MOODYCAMEL_CONSTEXPR_IF if
+#define MOODYCAMEL_MAYBE_UNUSED
+#endif
+#endif
+
+// Exceptions
+#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED
+#if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__))
+#define MOODYCAMEL_EXCEPTIONS_ENABLED
+#endif
+#endif
+#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
+#define MOODYCAMEL_TRY try
+#define MOODYCAMEL_CATCH(...) catch(__VA_ARGS__)
+#define MOODYCAMEL_RETHROW throw
+#define MOODYCAMEL_THROW(expr) throw (expr)
+#else
+                                                                                                                        #define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF (true)
+#define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF (false)
+#define MOODYCAMEL_RETHROW
+#define MOODYCAMEL_THROW(expr)
+#endif
+
+#ifndef MOODYCAMEL_NOEXCEPT
+#if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED)
+                                                                                                                        #define MOODYCAMEL_NOEXCEPT
+#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true
+#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true
+#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800
+                                                                                                                        // VS2012's std::is_nothrow_[move_]constructible is broken and returns true when it shouldn't :-(
+// We have to assume *all* non-trivial constructors may throw on VS2012!
+#define MOODYCAMEL_NOEXCEPT _NOEXCEPT
+#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value)
+#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
+#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900
+                                                                                                                        #define MOODYCAMEL_NOEXCEPT _NOEXCEPT
+#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value || std::is_nothrow_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value || std::is_nothrow_copy_constructible<type>::value)
+#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
+#else
+#define MOODYCAMEL_NOEXCEPT noexcept
+#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr)
+#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr)
+#endif
+#endif
+
+#ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+#ifdef MCDBGQ_USE_RELACY
+#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+#else
+// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445
+// g++ <=4.7 doesn't support thread_local either.
+// Finally, iOS/ARM doesn't have support for it either, and g++/ARM allows it to compile but it's unconfirmed to actually work
+#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
+// Assume `thread_local` is fully supported in all other C++11 compilers/platforms
+//#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED    // always disabled for now since several users report having problems with it on
+#endif
+#endif
+#endif
+
+// VS2012 doesn't support deleted functions.
+// In this case, we declare the function normally but don't define it. A link error will be generated if the function is called.
+#ifndef MOODYCAMEL_DELETE_FUNCTION
+#if defined(_MSC_VER) && _MSC_VER < 1800
+#define MOODYCAMEL_DELETE_FUNCTION
+#else
+#define MOODYCAMEL_DELETE_FUNCTION = delete
+#endif
+#endif
+
+namespace moodycamel {
+    namespace details {
+#ifndef MOODYCAMEL_ALIGNAS
+// VS2013 doesn't support alignas or alignof, and align() requires a constant literal
+#if defined(_MSC_VER) && _MSC_VER <= 1800
+                                                                                                                                #define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment))
+#define MOODYCAMEL_ALIGNOF(obj) __alignof(obj)
+#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) typename details::Vs2013Aligned<std::alignment_of<obj>::value, T>::type
+	template<int Align, typename T> struct Vs2013Aligned { };  // default, unsupported alignment
+	template<typename T> struct Vs2013Aligned<1, T> { typedef __declspec(align(1)) T type; };
+	template<typename T> struct Vs2013Aligned<2, T> { typedef __declspec(align(2)) T type; };
+	template<typename T> struct Vs2013Aligned<4, T> { typedef __declspec(align(4)) T type; };
+	template<typename T> struct Vs2013Aligned<8, T> { typedef __declspec(align(8)) T type; };
+	template<typename T> struct Vs2013Aligned<16, T> { typedef __declspec(align(16)) T type; };
+	template<typename T> struct Vs2013Aligned<32, T> { typedef __declspec(align(32)) T type; };
+	template<typename T> struct Vs2013Aligned<64, T> { typedef __declspec(align(64)) T type; };
+	template<typename T> struct Vs2013Aligned<128, T> { typedef __declspec(align(128)) T type; };
+	template<typename T> struct Vs2013Aligned<256, T> { typedef __declspec(align(256)) T type; };
+#else
+        template<typename T>
+        struct identity {
+            typedef T type;
+        };
+#define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment)
+#define MOODYCAMEL_ALIGNOF(obj) alignof(obj)
+#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) alignas(alignof(obj)) typename details::identity<T>::type
+#endif
+#endif
+    }
+}
+
+
+// TSAN can false report races in lock-free code.  To enable TSAN to be used from projects that use this one,
+// we can apply per-function compile-time suppression.
+// See https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer
+#define MOODYCAMEL_NO_TSAN
+#if defined(__has_feature)
+                                                                                                                        #if __has_feature(thread_sanitizer)
+  #undef MOODYCAMEL_NO_TSAN
+  #define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize("thread")))
+ #endif // TSAN
+#endif // TSAN
+
+// Compiler-specific likely/unlikely hints
+namespace moodycamel {
+    namespace details {
+#if defined(__GNUC__)
+                                                                                                                                static inline bool (likely)(bool x) { return __builtin_expect((x), true); }
+	static inline bool (unlikely)(bool x) { return __builtin_expect((x), false); }
+#else
+
+        static inline bool (likely)(bool x) { return x; }
+
+        static inline bool (unlikely)(bool x) { return x; }
+
+#endif
+    }
+}
+
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+#include "internal/concurrentqueue_internal_debug.h"
+#endif
+
+namespace moodycamel {
+    namespace details {
+        template<typename T>
+        struct const_numeric_max {
+            static_assert(std::is_integral<T>::value, "const_numeric_max can only be used with integers");
+            static const T value = std::numeric_limits<T>::is_signed
+                                   ? (static_cast<T>(1) << (sizeof(T) * CHAR_BIT - 1)) - static_cast<T>(1)
+                                   : static_cast<T>(-1);
+        };
+
+#if defined(__GLIBCXX__)
+        typedef ::max_align_t std_max_align_t;      // libstdc++ forgot to add it to std:: for a while
+#else
+        typedef std::max_align_t std_max_align_t;   // Others (e.g. MSVC) insist it can *only* be accessed via std::
+#endif
+
+        // Some platforms have incorrectly set max_align_t to a type with <8 bytes alignment even while supporting
+        // 8-byte aligned scalar values (*cough* 32-bit iOS). Work around this with our own union. See issue #64.
+        typedef union {
+            std_max_align_t x;
+            long long y;
+            void *z;
+        } max_align_t;
+    }
+
+// Default traits for the ConcurrentQueue. To change some of the
+// traits without re-implementing all of them, inherit from this
+// struct and shadow the declarations you wish to be different;
+// since the traits are used as a template type parameter, the
+// shadowed declarations will be used where defined, and the defaults
+// otherwise.
+    struct ConcurrentQueueDefaultTraits {
+        // General-purpose size type. std::size_t is strongly recommended.
+        typedef std::size_t size_t;
+
+        // The type used for the enqueue and dequeue indices. Must be at least as
+        // large as size_t. Should be significantly larger than the number of elements
+        // you expect to hold at once, especially if you have a high turnover rate;
+        // for example, on 32-bit x86, if you expect to have over a hundred million
+        // elements or pump several million elements through your queue in a very
+        // short space of time, using a 32-bit type *may* trigger a race condition.
+        // A 64-bit int type is recommended in that case, and in practice will
+        // prevent a race condition no matter the usage of the queue. Note that
+        // whether the queue is lock-free with a 64-int type depends on the whether
+        // std::atomic<std::uint64_t> is lock-free, which is platform-specific.
+        typedef std::size_t index_t;
+
+        // Internally, all elements are enqueued and dequeued from multi-element
+        // blocks; this is the smallest controllable unit. If you expect few elements
+        // but many producers, a smaller block size should be favoured. For few producers
+        // and/or many elements, a larger block size is preferred. A sane default
+        // is provided. Must be a power of 2.
+        static const size_t BLOCK_SIZE = 32;
+
+        // For explicit producers (i.e. when using a producer token), the block is
+        // checked for being empty by iterating through a list of flags, one per element.
+        // For large block sizes, this is too inefficient, and switching to an atomic
+        // counter-based approach is faster. The switch is made for block sizes strictly
+        // larger than this threshold.
+        static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32;
+
+        // How many full blocks can be expected for a single explicit producer? This should
+        // reflect that number's maximum for optimal performance. Must be a power of 2.
+        static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32;
+
+        // How many full blocks can be expected for a single implicit producer? This should
+        // reflect that number's maximum for optimal performance. Must be a power of 2.
+        static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32;
+
+        // The initial size of the hash table mapping thread IDs to implicit producers.
+        // Note that the hash is resized every time it becomes half full.
+        // Must be a power of two, and either 0 or at least 1. If 0, implicit production
+        // (using the enqueue methods without an explicit producer token) is disabled.
+        static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32;
+
+        // Controls the number of items that an explicit consumer (i.e. one with a token)
+        // must consume before it causes all consumers to rotate and move on to the next
+        // internal queue.
+        static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256;
+
+        // The maximum number of elements (inclusive) that can be enqueued to a sub-queue.
+        // Enqueue operations that would cause this limit to be surpassed will fail. Note
+        // that this limit is enforced at the block level (for performance reasons), i.e.
+        // it's rounded up to the nearest block size.
+        static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max<size_t>::value;
+
+        // The number of times to spin before sleeping when waiting on a semaphore.
+        // Recommended values are on the order of 1000-10000 unless the number of
+        // consumer threads exceeds the number of idle cores (in which case try 0-100).
+        // Only affects instances of the BlockingConcurrentQueue.
+        static const int MAX_SEMA_SPINS = 10000;
+
+
+#ifndef MCDBGQ_USE_RELACY
+        // Memory allocation can be customized if needed.
+        // malloc should return nullptr on failure, and handle alignment like std::malloc.
+#if defined(malloc) || defined(free)
+                                                                                                                                // Gah, this is 2015, stop defining macros that break standard code already!
+	// Work around malloc/free being special macros:
+	static inline void* WORKAROUND_malloc(size_t size) { return malloc(size); }
+	static inline void WORKAROUND_free(void* ptr) { return free(ptr); }
+	static inline void* (malloc)(size_t size) { return WORKAROUND_malloc(size); }
+	static inline void (free)(void* ptr) { return WORKAROUND_free(ptr); }
+#else
+
+        static inline void *malloc(size_t size) { return std::malloc(size); }
+
+        static inline void free(void *ptr) { return std::free(ptr); }
+
+#endif
+#else
+                                                                                                                                // Debug versions when running under the Relacy race detector (ignore
+	// these in user code)
+	static inline void* malloc(size_t size) { return rl::rl_malloc(size, $); }
+	static inline void free(void* ptr) { return rl::rl_free(ptr, $); }
+#endif
+    };
+
+
+// When producing or consuming many elements, the most efficient way is to:
+//    1) Use one of the bulk-operation methods of the queue with a token
+//    2) Failing that, use the bulk-operation methods without a token
+//    3) Failing that, create a token and use that with the single-item methods
+//    4) Failing that, use the single-parameter methods of the queue
+// Having said that, don't create tokens willy-nilly -- ideally there should be
+// a maximum of one token per thread (of each kind).
+    struct ProducerToken;
+    struct ConsumerToken;
+
+    template<typename T, typename Traits>
+    class ConcurrentQueue;
+
+    template<typename T, typename Traits>
+    class BlockingConcurrentQueue;
+
+    class ConcurrentQueueTests;
+
+
+    namespace details {
+        struct ConcurrentQueueProducerTypelessBase {
+            ConcurrentQueueProducerTypelessBase *next;
+            std::atomic<bool> inactive;
+            ProducerToken *token;
+
+            ConcurrentQueueProducerTypelessBase()
+                    : next(nullptr), inactive(false), token(nullptr) {
+            }
+        };
+
+        template<bool use32>
+        struct _hash_32_or_64 {
+            static inline std::uint32_t hash(std::uint32_t h) {
+                // MurmurHash3 finalizer -- see https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
+                // Since the thread ID is already unique, all we really want to do is propagate that
+                // uniqueness evenly across all the bits, so that we can use a subset of the bits while
+                // reducing collisions significantly
+                h ^= h >> 16;
+                h *= 0x85ebca6b;
+                h ^= h >> 13;
+                h *= 0xc2b2ae35;
+                return h ^ (h >> 16);
+            }
+        };
+
+        template<>
+        struct _hash_32_or_64<1> {
+            static inline std::uint64_t hash(std::uint64_t h) {
+                h ^= h >> 33;
+                h *= 0xff51afd7ed558ccd;
+                h ^= h >> 33;
+                h *= 0xc4ceb9fe1a85ec53;
+                return h ^ (h >> 33);
+            }
+        };
+
+        template<std::size_t size>
+        struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> {
+        };
+
+        static inline size_t hash_thread_id(thread_id_t id) {
+            static_assert(sizeof(thread_id_t) <= 8, "Expected a platform where thread IDs are at most 64-bit values");
+            return static_cast<size_t>(hash_32_or_64<sizeof(thread_id_converter<thread_id_t>::thread_id_hash_t)>::hash(
+                    thread_id_converter<thread_id_t>::prehash(id)));
+        }
+
+        template<typename T>
+        static inline bool circular_less_than(T a, T b) {
+#ifdef _MSC_VER
+                                                                                                                                    #pragma warning(push)
+#pragma warning(disable: 4554)
+#endif
+            static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed,
+                          "circular_less_than is intended to be used only with unsigned integer types");
+            return static_cast<T>(a - b) >
+                   static_cast<T>(static_cast<T>(1) << static_cast<T>(sizeof(T) * CHAR_BIT - 1));
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+        }
+
+        template<typename U>
+        static inline char *align_for(char *ptr) {
+            const std::size_t alignment = std::alignment_of<U>::value;
+            return ptr + (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) % alignment;
+        }
+
+        template<typename T>
+        static inline T ceil_to_pow_2(T x) {
+            static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed,
+                          "ceil_to_pow_2 is intended to be used only with unsigned integer types");
+
+            // Adapted from http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
+            --x;
+            x |= x >> 1;
+            x |= x >> 2;
+            x |= x >> 4;
+            for (std::size_t i = 1; i < sizeof(T); i <<= 1) {
+                x |= x >> (i << 3);
+            }
+            ++x;
+            return x;
+        }
+
+        template<typename T>
+        static inline void swap_relaxed(std::atomic <T> &left, std::atomic <T> &right) {
+            T temp = std::move(left.load(std::memory_order_relaxed));
+            left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed);
+            right.store(std::move(temp), std::memory_order_relaxed);
+        }
+
+        template<typename T>
+        static inline T const &nomove(T const &x) {
+            return x;
+        }
+
+        template<bool Enable>
+        struct nomove_if {
+            template<typename T>
+            static inline T const &eval(T const &x) {
+                return x;
+            }
+        };
+
+        template<>
+        struct nomove_if<false> {
+            template<typename U>
+            static inline auto eval(U &&x)
+            ->
+
+            decltype (std::forward<U>(x)) {
+                return std::forward<U>(x);
+            }
+        };
+
+        template<typename It>
+        static inline auto deref_noexcept(It &it)
+
+        MOODYCAMEL_NOEXCEPT ->
+        decltype(*it)
+                {
+                        return *it;
+                }
+
+#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
+        template<typename T>
+        struct is_trivially_destructible : std::is_trivially_destructible<T> {
+        };
+#else
+        template<typename T> struct is_trivially_destructible : std::has_trivial_destructor<T> { };
+#endif
+
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+                                                                                                                                #ifdef MCDBGQ_USE_RELACY
+	typedef RelacyThreadExitListener ThreadExitListener;
+	typedef RelacyThreadExitNotifier ThreadExitNotifier;
+#else
+	struct ThreadExitListener
+	{
+		typedef void (*callback_t)(void*);
+		callback_t callback;
+		void* userData;
+
+		ThreadExitListener* next;		// reserved for use by the ThreadExitNotifier
+	};
+
+
+	class ThreadExitNotifier
+	{
+	public:
+		static void subscribe(ThreadExitListener* listener)
+		{
+			auto& tlsInst = instance();
+			listener->next = tlsInst.tail;
+			tlsInst.tail = listener;
+		}
+
+		static void unsubscribe(ThreadExitListener* listener)
+		{
+			auto& tlsInst = instance();
+			ThreadExitListener** prev = &tlsInst.tail;
+			for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) {
+				if (ptr == listener) {
+					*prev = ptr->next;
+					break;
+				}
+				prev = &ptr->next;
+			}
+		}
+
+	private:
+		ThreadExitNotifier() : tail(nullptr) { }
+		ThreadExitNotifier(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;
+		ThreadExitNotifier& operator=(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;
+
+		~ThreadExitNotifier()
+		{
+			// This thread is about to exit, let everyone know!
+			assert(this == &instance() && "If this assert fails, you likely have a buggy compiler! Change the preprocessor conditions such that MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined.");
+			for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) {
+				ptr->callback(ptr->userData);
+			}
+		}
+
+		// Thread-local
+		static inline ThreadExitNotifier& instance()
+		{
+			static thread_local ThreadExitNotifier notifier;
+			return notifier;
+		}
+
+	private:
+		ThreadExitListener* tail;
+	};
+#endif
+#endif
+
+        template<typename T>
+        struct static_is_lock_free_num {
+            enum {
+                value = 0
+            };
+        };
+        template<>
+        struct static_is_lock_free_num<signed char> {
+            enum {
+                value = ATOMIC_CHAR_LOCK_FREE
+            };
+        };
+        template<>
+        struct static_is_lock_free_num<short> {
+            enum {
+                value = ATOMIC_SHORT_LOCK_FREE
+            };
+        };
+        template<>
+        struct static_is_lock_free_num<int> {
+            enum {
+                value = ATOMIC_INT_LOCK_FREE
+            };
+        };
+        template<>
+        struct static_is_lock_free_num<long> {
+            enum {
+                value = ATOMIC_LONG_LOCK_FREE
+            };
+        };
+        template<>
+        struct static_is_lock_free_num<long long> {
+            enum {
+                value = ATOMIC_LLONG_LOCK_FREE
+            };
+        };
+        template<typename T>
+        struct static_is_lock_free : static_is_lock_free_num<typename std::make_signed<T>::type> {
+        };
+        template<>
+        struct static_is_lock_free<bool> {
+            enum {
+                value = ATOMIC_BOOL_LOCK_FREE
+            };
+        };
+        template<typename U>
+        struct static_is_lock_free<U *> {
+            enum {
+                value = ATOMIC_POINTER_LOCK_FREE
+            };
+        };
+    }
+
+
+    struct ProducerToken {
+        template<typename T, typename Traits>
+        explicit ProducerToken(ConcurrentQueue<T, Traits> &queue);
+
+        template<typename T, typename Traits>
+        explicit ProducerToken(BlockingConcurrentQueue<T, Traits> &queue);
+
+        ProducerToken(ProducerToken &&other)
+
+        MOODYCAMEL_NOEXCEPT
+                : producer(other.producer)
+                {
+                        other.producer = nullptr;
+                if (producer != nullptr) {
+                    producer->token = this;
+                }
+                }
+
+        inline ProducerToken &operator=(ProducerToken &&other)
+
+        MOODYCAMEL_NOEXCEPT
+        {
+            swap(other);
+            return *this;
+        }
+
+        void swap(ProducerToken &other)
+
+        MOODYCAMEL_NOEXCEPT
+        {
+            std::swap(producer, other.producer);
+            if (producer != nullptr) {
+                producer->token = this;
+            }
+            if (other.producer != nullptr) {
+                other.producer->token = &other;
+            }
+        }
+
+        // A token is always valid unless:
+        //     1) Memory allocation failed during construction
+        //     2) It was moved via the move constructor
+        //        (Note: assignment does a swap, leaving both potentially valid)
+        //     3) The associated queue was destroyed
+        // Note that if valid() returns true, that only indicates
+        // that the token is valid for use with a specific queue,
+        // but not which one; that's up to the user to track.
+        inline bool valid() const { return producer != nullptr; }
+
+        ~ProducerToken() {
+            if (producer != nullptr) {
+                producer->token = nullptr;
+                producer->inactive.store(true, std::memory_order_release);
+            }
+        }
+
+        // Disable copying and assignment
+        ProducerToken(ProducerToken const &) MOODYCAMEL_DELETE_FUNCTION;
+
+        ProducerToken &operator=(ProducerToken const &) MOODYCAMEL_DELETE_FUNCTION;
+
+    private:
+        template<typename T, typename Traits> friend
+        class ConcurrentQueue;
+
+        friend class ConcurrentQueueTests;
+
+    protected:
+        details::ConcurrentQueueProducerTypelessBase *producer;
+    };
+
+
+    struct ConsumerToken {
+        template<typename T, typename Traits>
+        explicit ConsumerToken(ConcurrentQueue<T, Traits> &q);
+
+        template<typename T, typename Traits>
+        explicit ConsumerToken(BlockingConcurrentQueue<T, Traits> &q);
+
+        ConsumerToken(ConsumerToken &&other)
+
+        MOODYCAMEL_NOEXCEPT
+                : initialOffset(other.initialOffset), lastKnownGlobalOffset(other
+        .lastKnownGlobalOffset),
+        itemsConsumedFromCurrent(other
+        .itemsConsumedFromCurrent),
+        currentProducer(other
+        .currentProducer),
+        desiredProducer(other
+        .desiredProducer)
+        {
+        }
+
+        inline ConsumerToken &operator=(ConsumerToken &&other)
+
+        MOODYCAMEL_NOEXCEPT
+        {
+            swap(other);
+            return *this;
+        }
+
+        void swap(ConsumerToken &other)
+
+        MOODYCAMEL_NOEXCEPT
+        {
+            std::swap(initialOffset, other.initialOffset);
+            std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset);
+            std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent);
+            std::swap(currentProducer, other.currentProducer);
+            std::swap(desiredProducer, other.desiredProducer);
+        }
+
+        // Disable copying and assignment
+        ConsumerToken(ConsumerToken const &) MOODYCAMEL_DELETE_FUNCTION;
+
+        ConsumerToken &operator=(ConsumerToken const &) MOODYCAMEL_DELETE_FUNCTION;
+
+    private:
+        template<typename T, typename Traits> friend
+        class ConcurrentQueue;
+
+        friend class ConcurrentQueueTests;
+
+    private: // but shared with ConcurrentQueue
+        std::uint32_t initialOffset;
+        std::uint32_t lastKnownGlobalOffset;
+        std::uint32_t itemsConsumedFromCurrent;
+        details::ConcurrentQueueProducerTypelessBase *currentProducer;
+        details::ConcurrentQueueProducerTypelessBase *desiredProducer;
+    };
+
+// Need to forward-declare this swap because it's in a namespace.
+// See http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces
+    template<typename T, typename Traits>
+    inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP &a,
+                     typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP &b)
+
+    MOODYCAMEL_NOEXCEPT;
+
+
+    template<typename T, typename Traits = ConcurrentQueueDefaultTraits>
+    class ConcurrentQueue {
+    public:
+        typedef ::moodycamel::ProducerToken producer_token_t;
+        typedef ::moodycamel::ConsumerToken consumer_token_t;
+
+        typedef typename Traits::index_t index_t;
+        typedef typename Traits::size_t size_t;
+
+        static const size_t BLOCK_SIZE = static_cast<size_t>(Traits::BLOCK_SIZE);
+        static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast<size_t>(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD);
+        static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::EXPLICIT_INITIAL_INDEX_SIZE);
+        static const size_t IMPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::IMPLICIT_INITIAL_INDEX_SIZE);
+        static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = static_cast<size_t>(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE);
+        static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = static_cast<std::uint32_t>(Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE);
+#ifdef _MSC_VER
+                                                                                                                                #pragma warning(push)
+#pragma warning(disable: 4307)		// + integral constant overflow (that's what the ternary expression is for!)
+#pragma warning(disable: 4309)		// static_cast: Truncation of constant value
+#endif
+        static const size_t MAX_SUBQUEUE_SIZE = (details::const_numeric_max<size_t>::value -
+                                                 static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) < BLOCK_SIZE)
+                                                ? details::const_numeric_max<size_t>::value : (
+                                                        (static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) +
+                                                         (BLOCK_SIZE - 1)) / BLOCK_SIZE * BLOCK_SIZE);
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+        static_assert(!
+        std::numeric_limits<size_t>::is_signed &&std::is_integral<size_t>::value,
+        "Traits::size_t must be an unsigned integral type");
+        static_assert(!
+        std::numeric_limits<index_t>::is_signed &&std::is_integral<index_t>::value,
+        "Traits::index_t must be an unsigned integral type");
+        static_assert(sizeof(index_t) >= sizeof(size_t), "Traits::index_t must be at least as wide as Traits::size_t");
+        static_assert((BLOCK_SIZE
+        > 1) && !(
+        BLOCK_SIZE &(BLOCK_SIZE
+        - 1)), "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)");
+        static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD
+        > 1) && !(
+        EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD &(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD
+        - 1)), "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a power of 2 (and greater than 1)");
+        static_assert((EXPLICIT_INITIAL_INDEX_SIZE
+        > 1) && !(
+        EXPLICIT_INITIAL_INDEX_SIZE &(EXPLICIT_INITIAL_INDEX_SIZE
+        - 1)), "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");
+        static_assert((IMPLICIT_INITIAL_INDEX_SIZE
+        > 1) && !(
+        IMPLICIT_INITIAL_INDEX_SIZE &(IMPLICIT_INITIAL_INDEX_SIZE
+        - 1)), "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");
+        static_assert((INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+        == 0) || !(
+        INITIAL_IMPLICIT_PRODUCER_HASH_SIZE &(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+        - 1)), "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2");
+        static_assert(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+        == 0 || INITIAL_IMPLICIT_PRODUCER_HASH_SIZE >= 1, "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least 1 (or 0 to disable implicit enqueueing)");
+
+    public:
+        // Creates a queue with at least `capacity` element slots; note that the
+        // actual number of elements that can be inserted without additional memory
+        // allocation depends on the number of producers and the block size (e.g. if
+        // the block size is equal to `capacity`, only a single block will be allocated
+        // up-front, which means only a single producer will be able to enqueue elements
+        // without an extra allocation -- blocks aren't shared between producers).
+        // This method is not thread safe -- it is up to the user to ensure that the
+        // queue is fully constructed before it starts being used by other threads (this
+        // includes making the memory effects of construction visible, possibly with a
+        // memory barrier).
+        explicit ConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE)
+                : producerListTail(nullptr),
+                  producerCount(0),
+                  initialBlockPoolIndex(0),
+                  nextExplicitConsumerId(0),
+                  globalExplicitConsumerOffset(0) {
+            implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+            populate_initial_implicit_producer_hash();
+            populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1));
+
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+                                                                                                                                    // Track all the producers using a fully-resolved typed list for
+		// each kind; this makes it possible to debug them starting from
+		// the root queue object (otherwise wacky casts are needed that
+		// don't compile in the debugger's expression evaluator).
+		explicitProducers.store(nullptr, std::memory_order_relaxed);
+		implicitProducers.store(nullptr, std::memory_order_relaxed);
+#endif
+        }
+
+        // Computes the correct amount of pre-allocated blocks for you based
+        // on the minimum number of elements you want available at any given
+        // time, and the maximum concurrent number of each type of producer.
+        ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
+                : producerListTail(nullptr),
+                  producerCount(0),
+                  initialBlockPoolIndex(0),
+                  nextExplicitConsumerId(0),
+                  globalExplicitConsumerOffset(0) {
+            implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+            populate_initial_implicit_producer_hash();
+            size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) +
+                            2 * (maxExplicitProducers + maxImplicitProducers);
+            populate_initial_block_list(blocks);
+
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+                                                                                                                                    explicitProducers.store(nullptr, std::memory_order_relaxed);
+		implicitProducers.store(nullptr, std::memory_order_relaxed);
+#endif
+        }
+
+        // Note: The queue should not be accessed concurrently while it's
+        // being deleted. It's up to the user to synchronize this.
+        // This method is not thread safe.
+        ~ConcurrentQueue() {
+            // Destroy producers
+            auto ptr = producerListTail.load(std::memory_order_relaxed);
+            while (ptr != nullptr) {
+                auto next = ptr->next_prod();
+                if (ptr->token != nullptr) {
+                    ptr->token->producer = nullptr;
+                }
+                destroy(ptr);
+                ptr = next;
+            }
+
+            // Destroy implicit producer hash tables
+            MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) {
+                auto hash = implicitProducerHash.load(std::memory_order_relaxed);
+                while (hash != nullptr) {
+                    auto prev = hash->prev;
+                    if (prev !=
+                        nullptr) {        // The last hash is part of this object and was not allocated dynamically
+                        for (size_t i = 0; i != hash->capacity; ++i) {
+                            hash->entries[i].~ImplicitProducerKVP();
+                        }
+                        hash->~ImplicitProducerHash();
+                        (Traits::free)(hash);
+                    }
+                    hash = prev;
+                }
+            }
+
+            // Destroy global free list
+            auto block = freeList.head_unsafe();
+            while (block != nullptr) {
+                auto next = block->freeListNext.load(std::memory_order_relaxed);
+                if (block->dynamicallyAllocated) {
+                    destroy(block);
+                }
+                block = next;
+            }
+
+            // Destroy initial free list
+            destroy_array(initialBlockPool, initialBlockPoolSize);
+        }
+
+        // Disable copying and copy assignment
+        ConcurrentQueue(ConcurrentQueue const &) MOODYCAMEL_DELETE_FUNCTION;
+
+        ConcurrentQueue &operator=(ConcurrentQueue const &) MOODYCAMEL_DELETE_FUNCTION;
+
+        // Moving is supported, but note that it is *not* a thread-safe operation.
+        // Nobody can use the queue while it's being moved, and the memory effects
+        // of that move must be propagated to other threads before they can use it.
+        // Note: When a queue is moved, its tokens are still valid but can only be
+        // used with the destination queue (i.e. semantically they are moved along
+        // with the queue itself).
+        ConcurrentQueue(ConcurrentQueue &&other)
+
+        MOODYCAMEL_NOEXCEPT
+                : producerListTail(other.producerListTail.load(std::memory_order_relaxed)),
+                producerCount(other
+        .producerCount.
+        load(std::memory_order_relaxed)
+        ),
+        initialBlockPoolIndex(other
+        .initialBlockPoolIndex.
+        load(std::memory_order_relaxed)
+        ),
+        initialBlockPool(other
+        .initialBlockPool),
+        initialBlockPoolSize(other
+        .initialBlockPoolSize),
+
+        freeList (std::move(other
+
+        .freeList)),
+        nextExplicitConsumerId(other
+        .nextExplicitConsumerId.
+        load(std::memory_order_relaxed)
+        ),
+        globalExplicitConsumerOffset(other
+        .globalExplicitConsumerOffset.
+        load(std::memory_order_relaxed)
+        )
+        {
+            // Move the other one into this, and leave the other one as an empty queue
+            implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+            populate_initial_implicit_producer_hash();
+            swap_implicit_producer_hashes(other);
+
+            other.producerListTail.store(nullptr, std::memory_order_relaxed);
+            other.producerCount.store(0, std::memory_order_relaxed);
+            other.nextExplicitConsumerId.store(0, std::memory_order_relaxed);
+            other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed);
+
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+                                                                                                                                    explicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
+		other.explicitProducers.store(nullptr, std::memory_order_relaxed);
+		implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
+		other.implicitProducers.store(nullptr, std::memory_order_relaxed);
+#endif
+
+            other.initialBlockPoolIndex.store(0, std::memory_order_relaxed);
+            other.initialBlockPoolSize = 0;
+            other.initialBlockPool = nullptr;
+
+            reown_producers();
+        }
+
+        inline ConcurrentQueue &operator=(ConcurrentQueue &&other)
+
+        MOODYCAMEL_NOEXCEPT
+        {
+            return swap_internal(other);
+        }
+
+        // Swaps this queue's state with the other's. Not thread-safe.
+        // Swapping two queues does not invalidate their tokens, however
+        // the tokens that were created for one queue must be used with
+        // only the swapped queue (i.e. the tokens are tied to the
+        // queue's movable state, not the object itself).
+        inline void swap(ConcurrentQueue &other)
+
+        MOODYCAMEL_NOEXCEPT
+        {
+            swap_internal(other);
+        }
+
+    private:
+        ConcurrentQueue &swap_internal(ConcurrentQueue &other) {
+            if (this == &other) {
+                return *this;
+            }
+
+            details::swap_relaxed(producerListTail, other.producerListTail);
+            details::swap_relaxed(producerCount, other.producerCount);
+            details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex);
+            std::swap(initialBlockPool, other.initialBlockPool);
+            std::swap(initialBlockPoolSize, other.initialBlockPoolSize);
+            freeList.swap(other.freeList);
+            details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId);
+            details::swap_relaxed(globalExplicitConsumerOffset, other.globalExplicitConsumerOffset);
+
+            swap_implicit_producer_hashes(other);
+
+            reown_producers();
+            other.reown_producers();
+
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+                                                                                                                                    details::swap_relaxed(explicitProducers, other.explicitProducers);
+		details::swap_relaxed(implicitProducers, other.implicitProducers);
+#endif
+
+            return *this;
+        }
+
+    public:
+        // Enqueues a single item (by copying it).
+        // Allocates memory if required. Only fails if memory allocation fails (or implicit
+        // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
+        // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+        // Thread-safe.
+        inline bool enqueue(T const &item) {
+            MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+            else return inner_enqueue<CanAlloc>(item);
+        }
+
+        // Enqueues a single item (by moving it, if possible).
+        // Allocates memory if required. Only fails if memory allocation fails (or implicit
+        // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
+        // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+        // Thread-safe.
+        inline bool enqueue(T &&item) {
+            MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+            else return inner_enqueue<CanAlloc>(std::move(item));
+        }
+
+        // Enqueues a single item (by copying it) using an explicit producer token.
+        // Allocates memory if required. Only fails if memory allocation fails (or
+        // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+        // Thread-safe.
+        inline bool enqueue(producer_token_t const &token, T const &item) {
+            return inner_enqueue<CanAlloc>(token, item);
+        }
+
+        // Enqueues a single item (by moving it, if possible) using an explicit producer token.
+        // Allocates memory if required. Only fails if memory allocation fails (or
+        // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+        // Thread-safe.
+        inline bool enqueue(producer_token_t const &token, T &&item) {
+            return inner_enqueue<CanAlloc>(token, std::move(item));
+        }
+
+        // Enqueues several items.
+        // Allocates memory if required. Only fails if memory allocation fails (or
+        // implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+        // is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+        // Note: Use std::make_move_iterator if the elements should be moved instead of copied.
+        // Thread-safe.
+        template<typename It>
+        bool enqueue_bulk(It itemFirst, size_t count) {
+            MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+            else return inner_enqueue_bulk<CanAlloc>(itemFirst, count);
+        }
+
+        // Enqueues several items using an explicit producer token.
+        // Allocates memory if required. Only fails if memory allocation fails
+        // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+        // Note: Use std::make_move_iterator if the elements should be moved
+        // instead of copied.
+        // Thread-safe.
+        template<typename It>
+        bool enqueue_bulk(producer_token_t const &token, It itemFirst, size_t count) {
+            return inner_enqueue_bulk<CanAlloc>(token, itemFirst, count);
+        }
+
+        // Enqueues a single item (by copying it).
+        // Does not allocate memory. Fails if not enough room to enqueue (or implicit
+        // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+        // is 0).
+        // Thread-safe.
+        inline bool try_enqueue(T const &item) {
+            MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+            else return inner_enqueue<CannotAlloc>(item);
+        }
+
+        // Enqueues a single item (by moving it, if possible).
+        // Does not allocate memory (except for one-time implicit producer).
+        // Fails if not enough room to enqueue (or implicit production is
+        // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
+        // Thread-safe.
+        inline bool try_enqueue(T &&item) {
+            MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+            else return inner_enqueue<CannotAlloc>(std::move(item));
+        }
+
+        // Enqueues a single item (by copying it) using an explicit producer token.
+        // Does not allocate memory. Fails if not enough room to enqueue.
+        // Thread-safe.
+        inline bool try_enqueue(producer_token_t const &token, T const &item) {
+            return inner_enqueue<CannotAlloc>(token, item);
+        }
+
+        // Enqueues a single item (by moving it, if possible) using an explicit producer token.
+        // Does not allocate memory. Fails if not enough room to enqueue.
+        // Thread-safe.
+        inline bool try_enqueue(producer_token_t const &token, T &&item) {
+            return inner_enqueue<CannotAlloc>(token, std::move(item));
+        }
+
+        // Enqueues several items.
+        // Does not allocate memory (except for one-time implicit producer).
+        // Fails if not enough room to enqueue (or implicit production is
+        // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
+        // Note: Use std::make_move_iterator if the elements should be moved
+        // instead of copied.
+        // Thread-safe.
+        template<typename It>
+        bool try_enqueue_bulk(It itemFirst, size_t count) {
+            MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+            else return inner_enqueue_bulk<CannotAlloc>(itemFirst, count);
+        }
+
+        // Enqueues several items using an explicit producer token.
+        // Does not allocate memory. Fails if not enough room to enqueue.
+        // Note: Use std::make_move_iterator if the elements should be moved
+        // instead of copied.
+        // Thread-safe.
+        template<typename It>
+        bool try_enqueue_bulk(producer_token_t const &token, It itemFirst, size_t count) {
+            return inner_enqueue_bulk<CannotAlloc>(token, itemFirst, count);
+        }
+
+
+        // Attempts to dequeue from the queue.
+        // Returns false if all producer streams appeared empty at the time they
+        // were checked (so, the queue is likely but not guaranteed to be empty).
+        // Never allocates. Thread-safe.
+        template<typename U>
+        bool try_dequeue(U &item) {
+            // Instead of simply trying each producer in turn (which could cause needless contention on the first
+            // producer), we score them heuristically.
+            size_t nonEmptyCount = 0;
+            ProducerBase *best = nullptr;
+            size_t bestSize = 0;
+            for (auto ptr = producerListTail.load(std::memory_order_acquire);
+                 nonEmptyCount < 3 && ptr != nullptr; ptr = ptr->next_prod()) {
+                auto size = ptr->size_approx();
+                if (size > 0) {
+                    if (size > bestSize) {
+                        bestSize = size;
+                        best = ptr;
+                    }
+                    ++nonEmptyCount;
+                }
+            }
+
+            // If there was at least one non-empty queue but it appears empty at the time
+            // we try to dequeue from it, we need to make sure every queue's been tried
+            if (nonEmptyCount > 0) {
+                if ((details::likely)(best->dequeue(item))) {
+                    return true;
+                }
+                for (auto ptr = producerListTail.load(std::memory_order_acquire);
+                     ptr != nullptr; ptr = ptr->next_prod()) {
+                    if (ptr != best && ptr->dequeue(item)) {
+                        return true;
+                    }
+                }
+            }
+            return false;
+        }
+
+        // Attempts to dequeue from the queue.
+        // Returns false if all producer streams appeared empty at the time they
+        // were checked (so, the queue is likely but not guaranteed to be empty).
+        // This differs from the try_dequeue(item) method in that this one does
+        // not attempt to reduce contention by interleaving the order that producer
+        // streams are dequeued from. So, using this method can reduce overall throughput
+        // under contention, but will give more predictable results in single-threaded
+        // consumer scenarios. This is mostly only useful for internal unit tests.
+        // Never allocates. Thread-safe.
+        template<typename U>
+        bool try_dequeue_non_interleaved(U &item) {
+            for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+                if (ptr->dequeue(item)) {
+                    return true;
+                }
+            }
+            return false;
+        }
+
+        // Attempts to dequeue from the queue using an explicit consumer token.
+        // Returns false if all producer streams appeared empty at the time they
+        // were checked (so, the queue is likely but not guaranteed to be empty).
+        // Never allocates. Thread-safe.
+        template<typename U>
+        bool try_dequeue(consumer_token_t &token, U &item) {
+            // The idea is roughly as follows:
+            // Every 256 items from one producer, make everyone rotate (increase the global offset) -> this means the highest efficiency consumer dictates the rotation speed of everyone else, more or less
+            // If you see that the global offset has changed, you must reset your consumption counter and move to your designated place
+            // If there's no items where you're supposed to be, keep moving until you find a producer with some items
+            // If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in it
+
+            if (token.desiredProducer == nullptr ||
+                token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
+                if (!update_current_producer_after_rotation(token)) {
+                    return false;
+                }
+            }
+
+            // If there was at least one non-empty queue but it appears empty at the time
+            // we try to dequeue from it, we need to make sure every queue's been tried
+            if (static_cast<ProducerBase *>(token.currentProducer)->dequeue(item)) {
+                if (++token.itemsConsumedFromCurrent == EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
+                    globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
+                }
+                return true;
+            }
+
+            auto tail = producerListTail.load(std::memory_order_acquire);
+            auto ptr = static_cast<ProducerBase *>(token.currentProducer)->next_prod();
+            if (ptr == nullptr) {
+                ptr = tail;
+            }
+            while (ptr != static_cast<ProducerBase *>(token.currentProducer)) {
+                if (ptr->dequeue(item)) {
+                    token.currentProducer = ptr;
+                    token.itemsConsumedFromCurrent = 1;
+                    return true;
+                }
+                ptr = ptr->next_prod();
+                if (ptr == nullptr) {
+                    ptr = tail;
+                }
+            }
+            return false;
+        }
+
+        // Attempts to dequeue several elements from the queue.
+        // Returns the number of items actually dequeued.
+        // Returns 0 if all producer streams appeared empty at the time they
+        // were checked (so, the queue is likely but not guaranteed to be empty).
+        // Never allocates. Thread-safe.
+        template<typename It>
+        size_t try_dequeue_bulk(It itemFirst, size_t max) {
+            size_t count = 0;
+            for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+                count += ptr->dequeue_bulk(itemFirst, max - count);
+                if (count == max) {
+                    break;
+                }
+            }
+            return count;
+        }
+
+        // Attempts to dequeue several elements from the queue using an explicit consumer token.
+        // Returns the number of items actually dequeued.
+        // Returns 0 if all producer streams appeared empty at the time they
+        // were checked (so, the queue is likely but not guaranteed to be empty).
+        // Never allocates. Thread-safe.
+        template<typename It>
+        size_t try_dequeue_bulk(consumer_token_t &token, It itemFirst, size_t max) {
+            if (token.desiredProducer == nullptr ||
+                token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
+                if (!update_current_producer_after_rotation(token)) {
+                    return 0;
+                }
+            }
+
+            size_t count = static_cast<ProducerBase *>(token.currentProducer)->dequeue_bulk(itemFirst, max);
+            if (count == max) {
+                if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >=
+                    EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
+                    globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
+                }
+                return max;
+            }
+            token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count);
+            max -= count;
+
+            auto tail = producerListTail.load(std::memory_order_acquire);
+            auto ptr = static_cast<ProducerBase *>(token.currentProducer)->next_prod();
+            if (ptr == nullptr) {
+                ptr = tail;
+            }
+            while (ptr != static_cast<ProducerBase *>(token.currentProducer)) {
+                auto dequeued = ptr->dequeue_bulk(itemFirst, max);
+                count += dequeued;
+                if (dequeued != 0) {
+                    token.currentProducer = ptr;
+                    token.itemsConsumedFromCurrent = static_cast<std::uint32_t>(dequeued);
+                }
+                if (dequeued == max) {
+                    break;
+                }
+                max -= dequeued;
+                ptr = ptr->next_prod();
+                if (ptr == nullptr) {
+                    ptr = tail;
+                }
+            }
+            return count;
+        }
+
+
+        // Attempts to dequeue from a specific producer's inner queue.
+        // If you happen to know which producer you want to dequeue from, this
+        // is significantly faster than using the general-case try_dequeue methods.
+        // Returns false if the producer's queue appeared empty at the time it
+        // was checked (so, the queue is likely but not guaranteed to be empty).
+        // Never allocates. Thread-safe.
+        template<typename U>
+        inline bool try_dequeue_from_producer(producer_token_t const &producer, U &item) {
+            return static_cast<ExplicitProducer *>(producer.producer)->dequeue(item);
+        }
+
+        // Attempts to dequeue several elements from a specific producer's inner queue.
+        // Returns the number of items actually dequeued.
+        // If you happen to know which producer you want to dequeue from, this
+        // is significantly faster than using the general-case try_dequeue methods.
+        // Returns 0 if the producer's queue appeared empty at the time it
+        // was checked (so, the queue is likely but not guaranteed to be empty).
+        // Never allocates. Thread-safe.
+        template<typename It>
+        inline size_t try_dequeue_bulk_from_producer(producer_token_t const &producer, It itemFirst, size_t max) {
+            return static_cast<ExplicitProducer *>(producer.producer)->dequeue_bulk(itemFirst, max);
+        }
+
+
+        // Returns an estimate of the total number of elements currently in the queue. This
+        // estimate is only accurate if the queue has completely stabilized before it is called
+        // (i.e. all enqueue and dequeue operations have completed and their memory effects are
+        // visible on the calling thread, and no further operations start while this method is
+        // being called).
+        // Thread-safe.
+        size_t size_approx() const {
+            size_t size = 0;
+            for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+                size += ptr->size_approx();
+            }
+            return size;
+        }
+
+
+        // Returns true if the underlying atomic variables used by
+        // the queue are lock-free (they should be on most platforms).
+        // Thread-safe.
+        static constexpr bool is_lock_free() {
+            return
+                    details::static_is_lock_free<bool>::value == 2 &&
+                    details::static_is_lock_free<size_t>::value == 2 &&
+                    details::static_is_lock_free<std::uint32_t>::value == 2 &&
+                    details::static_is_lock_free<index_t>::value == 2 &&
+                    details::static_is_lock_free<void *>::value == 2 &&
+                    details::static_is_lock_free<typename details::thread_id_converter<details::thread_id_t>::thread_id_numeric_size_t>::value ==
+                    2;
+        }
+
+
+    private:
+        friend struct ProducerToken;
+        friend struct ConsumerToken;
+        struct ExplicitProducer;
+        friend struct ExplicitProducer;
+        struct ImplicitProducer;
+        friend struct ImplicitProducer;
+
+        friend class ConcurrentQueueTests;
+
+        enum AllocationMode {
+            CanAlloc, CannotAlloc
+        };
+
+
+        ///////////////////////////////
+        // Queue methods
+        ///////////////////////////////
+
+        template<AllocationMode canAlloc, typename U>
+        inline bool inner_enqueue(producer_token_t const &token, U &&element) {
+            return static_cast<ExplicitProducer *>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue<canAlloc>(
+                    std::forward<U>(element));
+        }
+
+        template<AllocationMode canAlloc, typename U>
+        inline bool inner_enqueue(U &&element) {
+            auto producer = get_or_add_implicit_producer();
+            return producer == nullptr ? false
+                                       : producer->ConcurrentQueue::ImplicitProducer::template enqueue<canAlloc>(
+                            std::forward<U>(element));
+        }
+
+        template<AllocationMode canAlloc, typename It>
+        inline bool inner_enqueue_bulk(producer_token_t const &token, It itemFirst, size_t count) {
+            return static_cast<ExplicitProducer *>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk<canAlloc>(
+                    itemFirst, count);
+        }
+
+        template<AllocationMode canAlloc, typename It>
+        inline bool inner_enqueue_bulk(It itemFirst, size_t count) {
+            auto producer = get_or_add_implicit_producer();
+            return producer == nullptr ? false
+                                       : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk<canAlloc>(
+                            itemFirst, count);
+        }
+
+        inline bool update_current_producer_after_rotation(consumer_token_t &token) {
+            // Ah, there's been a rotation, figure out where we should be!
+            auto tail = producerListTail.load(std::memory_order_acquire);
+            if (token.desiredProducer == nullptr && tail == nullptr) {
+                return false;
+            }
+            auto prodCount = producerCount.load(std::memory_order_relaxed);
+            auto globalOffset = globalExplicitConsumerOffset.load(std::memory_order_relaxed);
+            if ((details::unlikely)(token.desiredProducer == nullptr)) {
+                // Aha, first time we're dequeueing anything.
+                // Figure out our local position
+                // Note: offset is from start, not end, but we're traversing from end -- subtract from count first
+                std::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount);
+                token.desiredProducer = tail;
+                for (std::uint32_t i = 0; i != offset; ++i) {
+                    token.desiredProducer = static_cast<ProducerBase *>(token.desiredProducer)->next_prod();
+                    if (token.desiredProducer == nullptr) {
+                        token.desiredProducer = tail;
+                    }
+                }
+            }
+
+            std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset;
+            if (delta >= prodCount) {
+                delta = delta % prodCount;
+            }
+            for (std::uint32_t i = 0; i != delta; ++i) {
+                token.desiredProducer = static_cast<ProducerBase *>(token.desiredProducer)->next_prod();
+                if (token.desiredProducer == nullptr) {
+                    token.desiredProducer = tail;
+                }
+            }
+
+            token.lastKnownGlobalOffset = globalOffset;
+            token.currentProducer = token.desiredProducer;
+            token.itemsConsumedFromCurrent = 0;
+            return true;
+        }
+
+
+        ///////////////////////////
+        // Free list
+        ///////////////////////////
+
+        template<typename N>
+        struct FreeListNode {
+            FreeListNode() : freeListRefs(0), freeListNext(nullptr) {}
+
+            std::atomic<std::uint32_t> freeListRefs;
+            std::atomic<N *> freeListNext;
+        };
+
+        // A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but
+        // simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly
+        // speedy under low contention.
+        template<typename N>        // N must inherit FreeListNode or have the same fields (and initialization of them)
+        struct FreeList {
+            FreeList() : freeListHead(nullptr) {}
+
+            FreeList(FreeList &&other) : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) {
+                other.freeListHead.store(nullptr, std::memory_order_relaxed);
+            }
+
+            void swap(FreeList &other) { details::swap_relaxed(freeListHead, other.freeListHead); }
+
+            FreeList(FreeList const &) MOODYCAMEL_DELETE_FUNCTION;
+
+            FreeList &operator=(FreeList const &) MOODYCAMEL_DELETE_FUNCTION;
+
+            inline void add(N *node) {
+#ifdef MCDBGQ_NOLOCKFREE_FREELIST
+                debug::DebugLock lock(mutex);
+#endif
+                // We know that the should-be-on-freelist bit is 0 at this point, so it's safe to
+                // set it using a fetch_add
+                if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel) == 0) {
+                    // Oh look! We were the last ones referencing this node, and we know
+                    // we want to add it to the free list, so let's do it!
+                    add_knowing_refcount_is_zero(node);
+                }
+            }
+
+            inline N *try_get() {
+#ifdef MCDBGQ_NOLOCKFREE_FREELIST
+                debug::DebugLock lock(mutex);
+#endif
+                auto head = freeListHead.load(std::memory_order_acquire);
+                while (head != nullptr) {
+                    auto prevHead = head;
+                    auto refs = head->freeListRefs.load(std::memory_order_relaxed);
+                    if ((refs & REFS_MASK) == 0 ||
+                        !head->freeListRefs.compare_exchange_strong(refs, refs + 1, std::memory_order_acquire,
+                                                                    std::memory_order_relaxed)) {
+                        head = freeListHead.load(std::memory_order_acquire);
+                        continue;
+                    }
+
+                    // Good, reference count has been incremented (it wasn't at zero), which means we can read the
+                    // next and not worry about it changing between now and the time we do the CAS
+                    auto next = head->freeListNext.load(std::memory_order_relaxed);
+                    if (freeListHead.compare_exchange_strong(head, next, std::memory_order_acquire,
+                                                             std::memory_order_relaxed)) {
+                        // Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no
+                        // matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on).
+                        assert((head->freeListRefs.load(std::memory_order_relaxed) & SHOULD_BE_ON_FREELIST) == 0);
+
+                        // Decrease refcount twice, once for our ref, and once for the list's ref
+                        head->freeListRefs.fetch_sub(2, std::memory_order_release);
+                        return head;
+                    }
+
+                    // OK, the head must have changed on us, but we still need to decrease the refcount we increased.
+                    // Note that we don't need to release any memory effects, but we do need to ensure that the reference
+                    // count decrement happens-after the CAS on the head.
+                    refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel);
+                    if (refs == SHOULD_BE_ON_FREELIST + 1) {
+                        add_knowing_refcount_is_zero(prevHead);
+                    }
+                }
+
+                return nullptr;
+            }
+
+            // Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes)
+            N *head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); }
+
+        private:
+            inline void add_knowing_refcount_is_zero(N *node) {
+                // Since the refcount is zero, and nobody can increase it once it's zero (except us, and we run
+                // only one copy of this method per node at a time, i.e. the single thread case), then we know
+                // we can safely change the next pointer of the node; however, once the refcount is back above
+                // zero, then other threads could increase it (happens under heavy contention, when the refcount
+                // goes to zero in between a load and a refcount increment of a node in try_get, then back up to
+                // something non-zero, then the refcount increment is done by the other thread) -- so, if the CAS
+                // to add the node to the actual list fails, decrease the refcount and leave the add operation to
+                // the next thread who puts the refcount back at zero (which could be us, hence the loop).
+                auto head = freeListHead.load(std::memory_order_relaxed);
+                while (true) {
+                    node->freeListNext.store(head, std::memory_order_relaxed);
+                    node->freeListRefs.store(1, std::memory_order_release);
+                    if (!freeListHead.compare_exchange_strong(head, node, std::memory_order_release,
+                                                              std::memory_order_relaxed)) {
+                        // Hmm, the add failed, but we can only try again when the refcount goes back to zero
+                        if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1, std::memory_order_release) == 1) {
+                            continue;
+                        }
+                    }
+                    return;
+                }
+            }
+
+        private:
+            // Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention)
+            std::atomic<N *> freeListHead;
+
+            static const std::uint32_t REFS_MASK = 0x7FFFFFFF;
+            static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000;
+
+#ifdef MCDBGQ_NOLOCKFREE_FREELIST
+            debug::DebugMutex mutex;
+#endif
+        };
+
+
+        ///////////////////////////
+        // Block
+        ///////////////////////////
+
+        enum InnerQueueContext {
+            implicit_context = 0, explicit_context = 1
+        };
+
+        struct Block {
+            Block()
+                    : next(nullptr), elementsCompletelyDequeued(0), freeListRefs(0), freeListNext(nullptr),
+                      shouldBeOnFreeList(false), dynamicallyAllocated(true) {
+#ifdef MCDBGQ_TRACKMEM
+                owner = nullptr;
+#endif
+            }
+
+            template<InnerQueueContext context>
+            inline bool is_empty() const {
+                MOODYCAMEL_CONSTEXPR_IF (context == explicit_context &&
+                                         BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+                    // Check flags
+                    for (size_t i = 0; i < BLOCK_SIZE; ++i) {
+                        if (!emptyFlags[i].load(std::memory_order_relaxed)) {
+                            return false;
+                        }
+                    }
+
+                    // Aha, empty; make sure we have all other memory effects that happened before the empty flags were set
+                    std::atomic_thread_fence(std::memory_order_acquire);
+                    return true;
+                } else {
+                    // Check counter
+                    if (elementsCompletelyDequeued.load(std::memory_order_relaxed) == BLOCK_SIZE) {
+                        std::atomic_thread_fence(std::memory_order_acquire);
+                        return true;
+                    }
+                    assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <= BLOCK_SIZE);
+                    return false;
+                }
+            }
+
+            // Returns true if the block is now empty (does not apply in explicit context)
+            template<InnerQueueContext context>
+            inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i) {
+                MOODYCAMEL_CONSTEXPR_IF (context == explicit_context &&
+                                         BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+                    // Set flag
+                    assert(!emptyFlags[BLOCK_SIZE - 1 -
+                                       static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].load(
+                            std::memory_order_relaxed));
+                    emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].store(
+                            true, std::memory_order_release);
+                    return false;
+                } else {
+                    // Increment counter
+                    auto prevVal = elementsCompletelyDequeued.fetch_add(1, std::memory_order_release);
+                    assert(prevVal < BLOCK_SIZE);
+                    return prevVal == BLOCK_SIZE - 1;
+                }
+            }
+
+            // Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0).
+            // Returns true if the block is now empty (does not apply in explicit context).
+            template<InnerQueueContext context>
+            inline bool set_many_empty(MOODYCAMEL_MAYBE_UNUSED index_t i, size_t count) {
+                MOODYCAMEL_CONSTEXPR_IF (context == explicit_context &&
+                                         BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+                    // Set flags
+                    std::atomic_thread_fence(std::memory_order_release);
+                    i = BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1)) - count + 1;
+                    for (size_t j = 0; j != count; ++j) {
+                        assert(!emptyFlags[i + j].load(std::memory_order_relaxed));
+                        emptyFlags[i + j].store(true, std::memory_order_relaxed);
+                    }
+                    return false;
+                } else {
+                    // Increment counter
+                    auto prevVal = elementsCompletelyDequeued.fetch_add(count, std::memory_order_release);
+                    assert(prevVal + count <= BLOCK_SIZE);
+                    return prevVal + count == BLOCK_SIZE;
+                }
+            }
+
+            template<InnerQueueContext context>
+            inline void set_all_empty() {
+                MOODYCAMEL_CONSTEXPR_IF (context == explicit_context &&
+                                         BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+                    // Set all flags
+                    for (size_t i = 0; i != BLOCK_SIZE; ++i) {
+                        emptyFlags[i].store(true, std::memory_order_relaxed);
+                    }
+                } else {
+                    // Reset counter
+                    elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed);
+                }
+            }
+
+            template<InnerQueueContext context>
+            inline void reset_empty() {
+                MOODYCAMEL_CONSTEXPR_IF (context == explicit_context &&
+                                         BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+                    // Reset flags
+                    for (size_t i = 0; i != BLOCK_SIZE; ++i) {
+                        emptyFlags[i].store(false, std::memory_order_relaxed);
+                    }
+                } else {
+                    // Reset counter
+                    elementsCompletelyDequeued.store(0, std::memory_order_relaxed);
+                }
+            }
+
+            inline T *operator[](index_t idx)
+
+            MOODYCAMEL_NOEXCEPT {
+                return static_cast<T *>(static_cast<void *>(elements)) +
+                       static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1));
+            }
+
+            inline T const *operator[](index_t idx) const
+
+            MOODYCAMEL_NOEXCEPT {
+                return static_cast<T const *>(static_cast<void const *>(elements)) +
+                       static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1));
+            }
+
+        private:
+            static_assert(std::alignment_of<T>::value
+            <= sizeof(T), "The queue does not support types with an alignment greater than their size at this time");
+
+            MOODYCAMEL_ALIGNED_TYPE_LIKE(char[sizeof(T) * BLOCK_SIZE], T) elements;
+        public:
+            Block *next;
+            std::atomic<size_t> elementsCompletelyDequeued;
+            std::atomic<bool> emptyFlags[BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1];
+        public:
+            std::atomic<std::uint32_t> freeListRefs;
+            std::atomic<Block *> freeListNext;
+            std::atomic<bool> shouldBeOnFreeList;
+            bool dynamicallyAllocated;        // Perhaps a better name for this would be 'isNotPartOfInitialBlockPool'
+
+#ifdef MCDBGQ_TRACKMEM
+            void* owner;
+#endif
+        };
+
+        static_assert(std::alignment_of<Block>::value
+        >= std::alignment_of<T>::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping");
+
+
+#ifdef MCDBGQ_TRACKMEM
+                                                                                                                                public:
+	struct MemStats;
+private:
+#endif
+
+        ///////////////////////////
+        // Producer base
+        ///////////////////////////
+
+        struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase {
+            ProducerBase(ConcurrentQueue *parent_, bool isExplicit_) :
+                    tailIndex(0),
+                    headIndex(0),
+                    dequeueOptimisticCount(0),
+                    dequeueOvercommit(0),
+                    tailBlock(nullptr),
+                    isExplicit(isExplicit_),
+                    parent(parent_) {
+            }
+
+            virtual ~ProducerBase() {}
+
+            template<typename U>
+            inline bool dequeue(U &element) {
+                if (isExplicit) {
+                    return static_cast<ExplicitProducer *>(this)->dequeue(element);
+                } else {
+                    return static_cast<ImplicitProducer *>(this)->dequeue(element);
+                }
+            }
+
+            template<typename It>
+            inline size_t dequeue_bulk(It &itemFirst, size_t max) {
+                if (isExplicit) {
+                    return static_cast<ExplicitProducer *>(this)->dequeue_bulk(itemFirst, max);
+                } else {
+                    return static_cast<ImplicitProducer *>(this)->dequeue_bulk(itemFirst, max);
+                }
+            }
+
+            inline ProducerBase *next_prod() const { return static_cast<ProducerBase *>(next); }
+
+            inline size_t size_approx() const {
+                auto tail = tailIndex.load(std::memory_order_relaxed);
+                auto head = headIndex.load(std::memory_order_relaxed);
+                return details::circular_less_than(head, tail) ? static_cast<size_t>(tail - head) : 0;
+            }
+
+            inline index_t getTail() const { return tailIndex.load(std::memory_order_relaxed); }
+
+        protected:
+            std::atomic<index_t> tailIndex;        // Where to enqueue to next
+            std::atomic<index_t> headIndex;        // Where to dequeue from next
+
+            std::atomic<index_t> dequeueOptimisticCount;
+            std::atomic<index_t> dequeueOvercommit;
+
+            Block *tailBlock;
+
+        public:
+            bool isExplicit;
+            ConcurrentQueue *parent;
+
+        protected:
+#ifdef MCDBGQ_TRACKMEM
+            friend struct MemStats;
+#endif
+        };
+
+
+        ///////////////////////////
+        // Explicit queue
+        ///////////////////////////
+
+        struct ExplicitProducer : public ProducerBase {
+            explicit ExplicitProducer(ConcurrentQueue *parent_) :
+                    ProducerBase(parent_, true),
+                    blockIndex(nullptr),
+                    pr_blockIndexSlotsUsed(0),
+                    pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1),
+                    pr_blockIndexFront(0),
+                    pr_blockIndexEntries(nullptr),
+                    pr_blockIndexRaw(nullptr) {
+                size_t poolBasedIndexSize = details::ceil_to_pow_2(parent_->initialBlockPoolSize) >> 1;
+                if (poolBasedIndexSize > pr_blockIndexSize) {
+                    pr_blockIndexSize = poolBasedIndexSize;
+                }
+
+                new_block_index(
+                        0);        // This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE
+            }
+
+            ~ExplicitProducer() {
+                // Destruct any elements not yet dequeued.
+                // Since we're in the destructor, we can assume all elements
+                // are either completely dequeued or completely not (no halfways).
+                if (this->tailBlock != nullptr) {        // Note this means there must be a block index too
+                    // First find the block that's partially dequeued, if any
+                    Block *halfDequeuedBlock = nullptr;
+                    if ((this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) != 0) {
+                        // The head's not on a block boundary, meaning a block somewhere is partially dequeued
+                        // (or the head block is the tail block and was fully dequeued, but the head/tail are still not on a boundary)
+                        size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & (pr_blockIndexSize - 1);
+                        while (details::circular_less_than<index_t>(pr_blockIndexEntries[i].base + BLOCK_SIZE,
+                                                                    this->headIndex.load(std::memory_order_relaxed))) {
+                            i = (i + 1) & (pr_blockIndexSize - 1);
+                        }
+                        assert(details::circular_less_than<index_t>(pr_blockIndexEntries[i].base,
+                                                                    this->headIndex.load(std::memory_order_relaxed)));
+                        halfDequeuedBlock = pr_blockIndexEntries[i].block;
+                    }
+
+                    // Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration)
+                    auto block = this->tailBlock;
+                    do {
+                        block = block->next;
+                        if (block->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
+                            continue;
+                        }
+
+                        size_t i = 0;    // Offset into block
+                        if (block == halfDequeuedBlock) {
+                            i = static_cast<size_t>(this->headIndex.load(std::memory_order_relaxed) &
+                                                    static_cast<index_t>(BLOCK_SIZE - 1));
+                        }
+
+                        // Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index
+                        auto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) &
+                                               static_cast<index_t>(BLOCK_SIZE - 1)) == 0 ? BLOCK_SIZE
+                                                                                          : static_cast<size_t>(
+                                                      this->tailIndex.load(std::memory_order_relaxed) &
+                                                      static_cast<index_t>(BLOCK_SIZE - 1));
+                        while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) {
+                            (*block)[i++]->~T();
+                        }
+                    } while (block != this->tailBlock);
+                }
+
+                // Destroy all blocks that we own
+                if (this->tailBlock != nullptr) {
+                    auto block = this->tailBlock;
+                    do {
+                        auto nextBlock = block->next;
+                        if (block->dynamicallyAllocated) {
+                            destroy(block);
+                        } else {
+                            this->parent->add_block_to_free_list(block);
+                        }
+                        block = nextBlock;
+                    } while (block != this->tailBlock);
+                }
+
+                // Destroy the block indices
+                auto header = static_cast<BlockIndexHeader *>(pr_blockIndexRaw);
+                while (header != nullptr) {
+                    auto prev = static_cast<BlockIndexHeader *>(header->prev);
+                    header->~BlockIndexHeader();
+                    (Traits::free)(header);
+                    header = prev;
+                }
+            }
+
+            template<AllocationMode allocMode, typename U>
+            inline bool enqueue(U &&element) {
+                index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
+                index_t newTailIndex = 1 + currentTailIndex;
+                if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
+                    // We reached the end of a block, start a new one
+                    auto startBlock = this->tailBlock;
+                    auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
+                    if (this->tailBlock != nullptr &&
+                        this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
+                        // We can re-use the block ahead of us, it's empty!
+                        this->tailBlock = this->tailBlock->next;
+                        this->tailBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
+
+                        // We'll put the block on the block index (guaranteed to be room since we're conceptually removing the
+                        // last block from it first -- except instead of removing then adding, we can just overwrite).
+                        // Note that there must be a valid block index here, since even if allocation failed in the ctor,
+                        // it would have been re-attempted when adding the first block to the queue; since there is such
+                        // a block, a block index must have been successfully allocated.
+                    } else {
+                        // Whatever head value we see here is >= the last value we saw here (relatively),
+                        // and <= its current value. Since we have the most recent tail, the head must be
+                        // <= to it.
+                        auto head = this->headIndex.load(std::memory_order_relaxed);
+                        assert(!details::circular_less_than<index_t>(currentTailIndex, head));
+                        if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE)
+                            || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value &&
+                                (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
+                            // We can't enqueue in another block because there's not enough leeway -- the
+                            // tail could surpass the head by the time the block fills up! (Or we'll exceed
+                            // the size limit, if the second part of the condition was true.)
+                            return false;
+                        }
+                        // We're going to need a new block; check that the block index has room
+                        if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize) {
+                            // Hmm, the circular block index is already full -- we'll need
+                            // to allocate a new index. Note pr_blockIndexRaw can only be nullptr if
+                            // the initial allocation failed in the constructor.
+
+                            MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
+                                return false;
+                            } else if (!new_block_index(pr_blockIndexSlotsUsed)) {
+                                return false;
+                            }
+                        }
+
+                        // Insert a new block in the circular linked list
+                        auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
+                        if (newBlock == nullptr) {
+                            return false;
+                        }
+#ifdef MCDBGQ_TRACKMEM
+                        newBlock->owner = this;
+#endif
+                        newBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
+                        if (this->tailBlock == nullptr) {
+                            newBlock->next = newBlock;
+                        } else {
+                            newBlock->next = this->tailBlock->next;
+                            this->tailBlock->next = newBlock;
+                        }
+                        this->tailBlock = newBlock;
+                        ++pr_blockIndexSlotsUsed;
+                    }
+
+                    MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new(static_cast<T *>(nullptr)) T(
+                            std::forward<U>(element)))) {
+                        // The constructor may throw. We want the element not to appear in the queue in
+                        // that case (without corrupting the queue):
+                        MOODYCAMEL_TRY {
+                            new((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
+                        }
+                        MOODYCAMEL_CATCH (...) {
+                            // Revert change to the current block, but leave the new block available
+                            // for next time
+                            pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+                            this->tailBlock = startBlock == nullptr ? this->tailBlock : startBlock;
+                            MOODYCAMEL_RETHROW;
+                        }
+                    } else {
+                        (void) startBlock;
+                        (void) originalBlockIndexSlotsUsed;
+                    }
+
+                    // Add block to block index
+                    auto &entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
+                    entry.base = currentTailIndex;
+                    entry.block = this->tailBlock;
+                    blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront,
+                                                                            std::memory_order_release);
+                    pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
+
+                    MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new(static_cast<T *>(nullptr)) T(
+                            std::forward<U>(element)))) {
+                        this->tailIndex.store(newTailIndex, std::memory_order_release);
+                        return true;
+                    }
+                }
+
+                // Enqueue
+                new((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
+
+                this->tailIndex.store(newTailIndex, std::memory_order_release);
+                return true;
+            }
+
+            template<typename U>
+            bool dequeue(U &element) {
+                auto tail = this->tailIndex.load(std::memory_order_relaxed);
+                auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
+                if (details::circular_less_than<index_t>(
+                        this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {
+                    // Might be something to dequeue, let's give it a try
+
+                    // Note that this if is purely for performance purposes in the common case when the queue is
+                    // empty and the values are eventually consistent -- we may enter here spuriously.
+
+                    // Note that whatever the values of overcommit and tail are, they are not going to change (unless we
+                    // change them) and must be the same value at this point (inside the if) as when the if condition was
+                    // evaluated.
+
+                    // We insert an acquire fence here to synchronize-with the release upon incrementing dequeueOvercommit below.
+                    // This ensures that whatever the value we got loaded into overcommit, the load of dequeueOptisticCount in
+                    // the fetch_add below will result in a value at least as recent as that (and therefore at least as large).
+                    // Note that I believe a compiler (signal) fence here would be sufficient due to the nature of fetch_add (all
+                    // read-modify-write operations are guaranteed to work on the latest value in the modification order), but
+                    // unfortunately that can't be shown to be correct using only the C++11 standard.
+                    // See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case
+                    std::atomic_thread_fence(std::memory_order_acquire);
+
+                    // Increment optimistic counter, then check if it went over the boundary
+                    auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
+
+                    // Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever
+                    // incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now
+                    // have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon
+                    // incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount.
+                    // However, we can't assert this since both dequeueOptimisticCount and dequeueOvercommit may (independently)
+                    // overflow; in such a case, though, the logic still holds since the difference between the two is maintained.
+
+                    // Note that we reload tail here in case it changed; it will be the same value as before or greater, since
+                    // this load is sequenced after (happens after) the earlier load above. This is supported by read-read
+                    // coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_order
+                    tail = this->tailIndex.load(std::memory_order_acquire);
+                    if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {
+                        // Guaranteed to be at least one element to dequeue!
+
+                        // Get the index. Note that since there's guaranteed to be at least one element, this
+                        // will never exceed tail. We need to do an acquire-release fence here since it's possible
+                        // that whatever condition got us to this point was for an earlier enqueued element (that
+                        // we already see the memory effects for), but that by the time we increment somebody else
+                        // has incremented it, and we need to see the memory effects for *that* element, which is
+                        // in such a case is necessarily visible on the thread that incremented it in the first
+                        // place with the more current condition (they must have acquired a tail that is at least
+                        // as recent).
+                        auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);
+
+
+                        // Determine which block the element is in
+
+                        auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
+                        auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
+
+                        // We need to be careful here about subtracting and dividing because of index wrap-around.
+                        // When an index wraps, we need to preserve the sign of the offset when dividing it by the
+                        // block size (in order to get a correct signed block count offset in all cases):
+                        auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
+                        auto blockBaseIndex = index & ~static_cast<index_t>(BLOCK_SIZE - 1);
+                        auto offset = static_cast<size_t>(
+                                static_cast<typename std::make_signed<index_t>::type>(blockBaseIndex - headBase) /
+                                BLOCK_SIZE);
+                        auto block = localBlockIndex->entries[(localBlockIndexHead + offset) &
+                                                              (localBlockIndex->size - 1)].block;
+
+                        // Dequeue
+                        auto &el = *((*block)[index]);
+                        if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&, element = std::move(el))) {
+                            // Make sure the element is still fully dequeued and destroyed even if the assignment
+                            // throws
+                            struct Guard {
+                                Block *block;
+                                index_t index;
+
+                                ~Guard() {
+                                    (*block)[index]->~T();
+                                    block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
+                                }
+                            } guard = {block, index};
+
+                            element = std::move(el); // NOLINT
+                        } else {
+                            element = std::move(el); // NOLINT
+                            el.~T(); // NOLINT
+                            block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
+                        }
+
+                        return true;
+                    } else {
+                        // Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent
+                        this->dequeueOvercommit.fetch_add(1,
+                                                          std::memory_order_release);        // Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write
+                    }
+                }
+
+                return false;
+            }
+
+            template<AllocationMode allocMode, typename It>
+            bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, size_t count) {
+                // First, we need to make sure we have enough room to enqueue all of the elements;
+                // this means pre-allocating blocks and putting them in the block index (but only if
+                // all the allocations succeeded).
+                index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);
+                auto startBlock = this->tailBlock;
+                auto originalBlockIndexFront = pr_blockIndexFront;
+                auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
+
+                Block *firstAllocatedBlock = nullptr;
+
+                // Figure out how many blocks we'll need to allocate, and do so
+                size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) -
+                                       ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
+                index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
+                if (blockBaseDiff > 0) {
+                    // Allocate as many blocks as possible from ahead
+                    while (blockBaseDiff > 0 && this->tailBlock != nullptr &&
+                           this->tailBlock->next != firstAllocatedBlock &&
+                           this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
+                        blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
+                        currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+
+                        this->tailBlock = this->tailBlock->next;
+                        firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
+
+                        auto &entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
+                        entry.base = currentTailIndex;
+                        entry.block = this->tailBlock;
+                        pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
+                    }
+
+                    // Now allocate as many blocks as necessary from the block pool
+                    while (blockBaseDiff > 0) {
+                        blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
+                        currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+
+                        auto head = this->headIndex.load(std::memory_order_relaxed);
+                        assert(!details::circular_less_than<index_t>(currentTailIndex, head));
+                        bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) ||
+                                    (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value &&
+                                     (MAX_SUBQUEUE_SIZE == 0 ||
+                                      MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
+                        if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize || full) {
+                            MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
+                                // Failed to allocate, undo changes (but keep injected blocks)
+                                pr_blockIndexFront = originalBlockIndexFront;
+                                pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+                                this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
+                                return false;
+                            } else if (full || !new_block_index(originalBlockIndexSlotsUsed)) {
+                                // Failed to allocate, undo changes (but keep injected blocks)
+                                pr_blockIndexFront = originalBlockIndexFront;
+                                pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+                                this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
+                                return false;
+                            }
+
+                            // pr_blockIndexFront is updated inside new_block_index, so we need to
+                            // update our fallback value too (since we keep the new index even if we
+                            // later fail)
+                            originalBlockIndexFront = originalBlockIndexSlotsUsed;
+                        }
+
+                        // Insert a new block in the circular linked list
+                        auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
+                        if (newBlock == nullptr) {
+                            pr_blockIndexFront = originalBlockIndexFront;
+                            pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+                            this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
+                            return false;
+                        }
+
+#ifdef MCDBGQ_TRACKMEM
+                        newBlock->owner = this;
+#endif
+                        newBlock->ConcurrentQueue::Block::template set_all_empty<explicit_context>();
+                        if (this->tailBlock == nullptr) {
+                            newBlock->next = newBlock;
+                        } else {
+                            newBlock->next = this->tailBlock->next;
+                            this->tailBlock->next = newBlock;
+                        }
+                        this->tailBlock = newBlock;
+                        firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
+
+                        ++pr_blockIndexSlotsUsed;
+
+                        auto &entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
+                        entry.base = currentTailIndex;
+                        entry.block = this->tailBlock;
+                        pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
+                    }
+
+                    // Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and
+                    // publish the new block index front
+                    auto block = firstAllocatedBlock;
+                    while (true) {
+                        block->ConcurrentQueue::Block::template reset_empty<explicit_context>();
+                        if (block == this->tailBlock) {
+                            break;
+                        }
+                        block = block->next;
+                    }
+
+                    MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst),
+                                                                      new(static_cast<T *>(nullptr)) T(
+                                                                              details::deref_noexcept(itemFirst)))) {
+                        blockIndex.load(std::memory_order_relaxed)->front.store(
+                                (pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
+                    }
+                }
+
+                // Enqueue, one block at a time
+                index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
+                currentTailIndex = startTailIndex;
+                auto endBlock = this->tailBlock;
+                this->tailBlock = startBlock;
+                assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr ||
+                       count == 0);
+                if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {
+                    this->tailBlock = firstAllocatedBlock;
+                }
+                while (true) {
+                    index_t stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) +
+                                        static_cast<index_t>(BLOCK_SIZE);
+                    if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
+                        stopIndex = newTailIndex;
+                    }
+                    MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst),
+                                                                      new(static_cast<T *>(nullptr)) T(
+                                                                              details::deref_noexcept(itemFirst)))) {
+                        while (currentTailIndex != stopIndex) {
+                            new((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
+                        }
+                    } else {
+                        MOODYCAMEL_TRY {
+                            while (currentTailIndex != stopIndex) {
+                                // Must use copy constructor even if move constructor is available
+                                // because we may have to revert if there's an exception.
+                                // Sorry about the horrible templated next line, but it was the only way
+                                // to disable moving *at compile time*, which is important because a type
+                                // may only define a (noexcept) move constructor, and so calls to the
+                                // cctor will not compile, even if they are in an if branch that will never
+                                // be executed
+                                new((*this->tailBlock)[currentTailIndex]) T(
+                                        details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst),
+                                                                                     new(static_cast<T *>(nullptr)) T(
+                                                                                             details::deref_noexcept(
+                                                                                                     itemFirst)))>::eval(
+                                                *itemFirst));
+                                ++currentTailIndex;
+                                ++itemFirst;
+                            }
+                        }
+                        MOODYCAMEL_CATCH (...) {
+                            // Oh dear, an exception's been thrown -- destroy the elements that
+                            // were enqueued so far and revert the entire bulk operation (we'll keep
+                            // any allocated blocks in our linked list for later, though).
+                            auto constructedStopIndex = currentTailIndex;
+                            auto lastBlockEnqueued = this->tailBlock;
+
+                            pr_blockIndexFront = originalBlockIndexFront;
+                            pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+                            this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
+
+                            if (!details::is_trivially_destructible<T>::value) {
+                                auto block = startBlock;
+                                if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
+                                    block = firstAllocatedBlock;
+                                }
+                                currentTailIndex = startTailIndex;
+                                while (true) {
+                                    stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) +
+                                                static_cast<index_t>(BLOCK_SIZE);
+                                    if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {
+                                        stopIndex = constructedStopIndex;
+                                    }
+                                    while (currentTailIndex != stopIndex) {
+                                        (*block)[currentTailIndex++]->~T();
+                                    }
+                                    if (block == lastBlockEnqueued) {
+                                        break;
+                                    }
+                                    block = block->next;
+                                }
+                            }
+                            MOODYCAMEL_RETHROW;
+                        }
+                    }
+
+                    if (this->tailBlock == endBlock) {
+                        assert(currentTailIndex == newTailIndex);
+                        break;
+                    }
+                    this->tailBlock = this->tailBlock->next;
+                }
+
+                MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst),
+                                                                   new(static_cast<T *>(nullptr)) T(
+                                                                           details::deref_noexcept(itemFirst)))) {
+                    if (firstAllocatedBlock != nullptr)
+                        blockIndex.load(std::memory_order_relaxed)->front.store(
+                                (pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
+                }
+
+                this->tailIndex.store(newTailIndex, std::memory_order_release);
+                return true;
+            }
+
+            template<typename It>
+            size_t dequeue_bulk(It &itemFirst, size_t max) {
+                auto tail = this->tailIndex.load(std::memory_order_relaxed);
+                auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
+                auto desiredCount = static_cast<size_t>(tail -
+                                                        (this->dequeueOptimisticCount.load(std::memory_order_relaxed) -
+                                                         overcommit));
+                if (details::circular_less_than<size_t>(0, desiredCount)) {
+                    desiredCount = desiredCount < max ? desiredCount : max;
+                    std::atomic_thread_fence(std::memory_order_acquire);
+
+                    auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount,
+                                                                                 std::memory_order_relaxed);
+
+                    tail = this->tailIndex.load(std::memory_order_acquire);
+                    auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
+                    if (details::circular_less_than<size_t>(0, actualCount)) {
+                        actualCount = desiredCount < actualCount ? desiredCount : actualCount;
+                        if (actualCount < desiredCount) {
+                            this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);
+                        }
+
+                        // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
+                        // will never exceed tail.
+                        auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
+
+                        // Determine which block the first element is in
+                        auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
+                        auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
+
+                        auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
+                        auto firstBlockBaseIndex = firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1);
+                        auto offset = static_cast<size_t>(
+                                static_cast<typename std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) /
+                                BLOCK_SIZE);
+                        auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1);
+
+                        // Iterate the blocks and dequeue
+                        auto index = firstIndex;
+                        do {
+                            auto firstIndexInBlock = index;
+                            index_t endIndex =
+                                    (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+                            endIndex = details::circular_less_than<index_t>(
+                                    firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex +
+                                                                                                static_cast<index_t>(actualCount)
+                                                                                              : endIndex;
+                            auto block = localBlockIndex->entries[indexIndex].block;
+                            if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&, details::deref_noexcept(itemFirst) = std::move(
+                                    (*(*block)[index])))) {
+                                while (index != endIndex) {
+                                    auto &el = *((*block)[index]);
+                                    *itemFirst++ = std::move(el);
+                                    el.~T();
+                                    ++index;
+                                }
+                            } else {
+                                MOODYCAMEL_TRY {
+                                    while (index != endIndex) {
+                                        auto &el = *((*block)[index]);
+                                        *itemFirst = std::move(el);
+                                        ++itemFirst;
+                                        el.~T();
+                                        ++index;
+                                    }
+                                }
+                                MOODYCAMEL_CATCH (...) {
+                                    // It's too late to revert the dequeue, but we can make sure that all
+                                    // the dequeued objects are properly destroyed and the block index
+                                    // (and empty count) are properly updated before we propagate the exception
+                                    do {
+                                        block = localBlockIndex->entries[indexIndex].block;
+                                        while (index != endIndex) {
+                                            (*block)[index++]->~T();
+                                        }
+                                        block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(
+                                                firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
+                                        indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
+
+                                        firstIndexInBlock = index;
+                                        endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) +
+                                                   static_cast<index_t>(BLOCK_SIZE);
+                                        endIndex = details::circular_less_than<index_t>(
+                                                firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex +
+                                                                                                            static_cast<index_t>(actualCount)
+                                                                                                          : endIndex;
+                                    } while (index != firstIndex + actualCount);
+
+                                    MOODYCAMEL_RETHROW;
+                                }
+                            }
+                            block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock,
+                                                                                                     static_cast<size_t>(
+                                                                                                             endIndex -
+                                                                                                             firstIndexInBlock));
+                            indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
+                        } while (index != firstIndex + actualCount);
+
+                        return actualCount;
+                    } else {
+                        // Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent
+                        this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
+                    }
+                }
+
+                return 0;
+            }
+
+        private:
+            struct BlockIndexEntry {
+                index_t base;
+                Block *block;
+            };
+
+            struct BlockIndexHeader {
+                size_t size;
+                std::atomic<size_t> front;        // Current slot (not next, like pr_blockIndexFront)
+                BlockIndexEntry *entries;
+                void *prev;
+            };
+
+
+            bool new_block_index(size_t numberOfFilledSlotsToExpose) {
+                auto prevBlockSizeMask = pr_blockIndexSize - 1;
+
+                // Create the new block
+                pr_blockIndexSize <<= 1;
+                auto newRawPtr = static_cast<char *>((Traits::malloc)(
+                        sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value - 1 +
+                        sizeof(BlockIndexEntry) * pr_blockIndexSize));
+                if (newRawPtr == nullptr) {
+                    pr_blockIndexSize >>= 1;        // Reset to allow graceful retry
+                    return false;
+                }
+
+                auto newBlockIndexEntries = reinterpret_cast<BlockIndexEntry *>(details::align_for<BlockIndexEntry>(
+                        newRawPtr + sizeof(BlockIndexHeader)));
+
+                // Copy in all the old indices, if any
+                size_t j = 0;
+                if (pr_blockIndexSlotsUsed != 0) {
+                    auto i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask;
+                    do {
+                        newBlockIndexEntries[j++] = pr_blockIndexEntries[i];
+                        i = (i + 1) & prevBlockSizeMask;
+                    } while (i != pr_blockIndexFront);
+                }
+
+                // Update everything
+                auto header = new(newRawPtr) BlockIndexHeader;
+                header->size = pr_blockIndexSize;
+                header->front.store(numberOfFilledSlotsToExpose - 1, std::memory_order_relaxed);
+                header->entries = newBlockIndexEntries;
+                header->prev = pr_blockIndexRaw;        // we link the new block to the old one so we can free it later
+
+                pr_blockIndexFront = j;
+                pr_blockIndexEntries = newBlockIndexEntries;
+                pr_blockIndexRaw = newRawPtr;
+                blockIndex.store(header, std::memory_order_release);
+
+                return true;
+            }
+
+        private:
+            std::atomic<BlockIndexHeader *> blockIndex;
+
+            // To be used by producer only -- consumer must use the ones in referenced by blockIndex
+            size_t pr_blockIndexSlotsUsed;
+            size_t pr_blockIndexSize;
+            size_t pr_blockIndexFront;        // Next slot (not current)
+            BlockIndexEntry *pr_blockIndexEntries;
+            void *pr_blockIndexRaw;
+
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+                                                                                                                                    public:
+		ExplicitProducer* nextExplicitProducer;
+	private:
+#endif
+
+#ifdef MCDBGQ_TRACKMEM
+            friend struct MemStats;
+#endif
+        };
+
+
+        //////////////////////////////////
+        // Implicit queue
+        //////////////////////////////////
+
+        struct ImplicitProducer : public ProducerBase {
+            ImplicitProducer(ConcurrentQueue *parent_) :
+                    ProducerBase(parent_, false),
+                    nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE),
+                    blockIndex(nullptr) {
+                new_block_index();
+            }
+
+            ~ImplicitProducer() {
+                // Note that since we're in the destructor we can assume that all enqueue/dequeue operations
+                // completed already; this means that all undequeued elements are placed contiguously across
+                // contiguous blocks, and that only the first and last remaining blocks can be only partially
+                // empty (all other remaining blocks must be completely full).
+
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+                                                                                                                                        // Unregister ourselves for thread termination notification
+			if (!this->inactive.load(std::memory_order_relaxed)) {
+				details::ThreadExitNotifier::unsubscribe(&threadExitListener);
+			}
+#endif
+
+                // Destroy all remaining elements!
+                auto tail = this->tailIndex.load(std::memory_order_relaxed);
+                auto index = this->headIndex.load(std::memory_order_relaxed);
+                Block *block = nullptr;
+                assert(index == tail || details::circular_less_than(index, tail));
+                bool forceFreeLastBlock =
+                        index != tail;        // If we enter the loop, then the last (tail) block will not be freed
+                while (index != tail) {
+                    if ((index & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 || block == nullptr) {
+                        if (block != nullptr) {
+                            // Free the old block
+                            this->parent->add_block_to_free_list(block);
+                        }
+
+                        block = get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed);
+                    }
+
+                    ((*block)[index])->~T();
+                    ++index;
+                }
+                // Even if the queue is empty, there's still one block that's not on the free list
+                // (unless the head index reached the end of it, in which case the tail will be poised
+                // to create a new block).
+                if (this->tailBlock != nullptr &&
+                    (forceFreeLastBlock || (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0)) {
+                    this->parent->add_block_to_free_list(this->tailBlock);
+                }
+
+                // Destroy block index
+                auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
+                if (localBlockIndex != nullptr) {
+                    for (size_t i = 0; i != localBlockIndex->capacity; ++i) {
+                        localBlockIndex->index[i]->~BlockIndexEntry();
+                    }
+                    do {
+                        auto prev = localBlockIndex->prev;
+                        localBlockIndex->~BlockIndexHeader();
+                        (Traits::free)(localBlockIndex);
+                        localBlockIndex = prev;
+                    } while (localBlockIndex != nullptr);
+                }
+            }
+
+            template<AllocationMode allocMode, typename U>
+            inline bool enqueue(U &&element) {
+                index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
+                index_t newTailIndex = 1 + currentTailIndex;
+                if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
+                    // We reached the end of a block, start a new one
+                    auto head = this->headIndex.load(std::memory_order_relaxed);
+                    assert(!details::circular_less_than<index_t>(currentTailIndex, head));
+                    if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) ||
+                        (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value &&
+                         (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
+                        return false;
+                    }
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+                    debug::DebugLock lock(mutex);
+#endif
+                    // Find out where we'll be inserting this block in the block index
+                    BlockIndexEntry *idxEntry;
+                    if (!insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) {
+                        return false;
+                    }
+
+                    // Get ahold of a new block
+                    auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
+                    if (newBlock == nullptr) {
+                        rewind_block_index_tail();
+                        idxEntry->value.store(nullptr, std::memory_order_relaxed);
+                        return false;
+                    }
+#ifdef MCDBGQ_TRACKMEM
+                    newBlock->owner = this;
+#endif
+                    newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
+
+                    MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new(static_cast<T *>(nullptr)) T(
+                            std::forward<U>(element)))) {
+                        // May throw, try to insert now before we publish the fact that we have this new block
+                        MOODYCAMEL_TRY {
+                            new((*newBlock)[currentTailIndex]) T(std::forward<U>(element));
+                        }
+                        MOODYCAMEL_CATCH (...) {
+                            rewind_block_index_tail();
+                            idxEntry->value.store(nullptr, std::memory_order_relaxed);
+                            this->parent->add_block_to_free_list(newBlock);
+                            MOODYCAMEL_RETHROW;
+                        }
+                    }
+
+                    // Insert the new block into the index
+                    idxEntry->value.store(newBlock, std::memory_order_relaxed);
+
+                    this->tailBlock = newBlock;
+
+                    MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new(static_cast<T *>(nullptr)) T(
+                            std::forward<U>(element)))) {
+                        this->tailIndex.store(newTailIndex, std::memory_order_release);
+                        return true;
+                    }
+                }
+
+                // Enqueue
+                new((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
+
+                this->tailIndex.store(newTailIndex, std::memory_order_release);
+                return true;
+            }
+
+            template<typename U>
+            bool dequeue(U &element) {
+                // See ExplicitProducer::dequeue for rationale and explanation
+                index_t tail = this->tailIndex.load(std::memory_order_relaxed);
+                index_t overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
+                if (details::circular_less_than<index_t>(
+                        this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {
+                    std::atomic_thread_fence(std::memory_order_acquire);
+
+                    index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
+                    tail = this->tailIndex.load(std::memory_order_acquire);
+                    if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {
+                        index_t index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);
+
+                        // Determine which block the element is in
+                        auto entry = get_block_index_entry_for_index(index);
+
+                        // Dequeue
+                        auto block = entry->value.load(std::memory_order_relaxed);
+                        auto &el = *((*block)[index]);
+
+                        if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&, element = std::move(el))) {
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+                                                                                                                                                    // Note: Acquiring the mutex with every dequeue instead of only when a block
+						// is released is very sub-optimal, but it is, after all, purely debug code.
+						debug::DebugLock lock(producer->mutex);
+#endif
+                            struct Guard {
+                                Block *block;
+                                index_t index;
+                                BlockIndexEntry *entry;
+                                ConcurrentQueue *parent;
+
+                                ~Guard() {
+                                    (*block)[index]->~T();
+                                    if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {
+                                        entry->value.store(nullptr, std::memory_order_relaxed);
+                                        parent->add_block_to_free_list(block);
+                                    }
+                                }
+                            } guard = {block, index, entry, this->parent};
+
+                            element = std::move(el); // NOLINT
+                        } else {
+                            element = std::move(el); // NOLINT
+                            el.~T(); // NOLINT
+
+                            if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {
+                                {
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+                                    debug::DebugLock lock(mutex);
+#endif
+                                    // Add the block back into the global free pool (and remove from block index)
+                                    entry->value.store(nullptr, std::memory_order_relaxed);
+                                }
+                                this->parent->add_block_to_free_list(block);        // releases the above store
+                            }
+                        }
+
+                        return true;
+                    } else {
+                        this->dequeueOvercommit.fetch_add(1, std::memory_order_release);
+                    }
+                }
+
+                return false;
+            }
+
+#ifdef _MSC_VER
+                                                                                                                                    #pragma warning(push)
+#pragma warning(disable: 4706)  // assignment within conditional expression
+#endif
+
+            template<AllocationMode allocMode, typename It>
+            bool enqueue_bulk(It itemFirst, size_t count) {
+                // First, we need to make sure we have enough room to enqueue all of the elements;
+                // this means pre-allocating blocks and putting them in the block index (but only if
+                // all the allocations succeeded).
+
+                // Note that the tailBlock we start off with may not be owned by us any more;
+                // this happens if it was filled up exactly to the top (setting tailIndex to
+                // the first index of the next block which is not yet allocated), then dequeued
+                // completely (putting it on the free list) before we enqueue again.
+
+                index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);
+                auto startBlock = this->tailBlock;
+                Block *firstAllocatedBlock = nullptr;
+                auto endBlock = this->tailBlock;
+
+                // Figure out how many blocks we'll need to allocate, and do so
+                size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) -
+                                       ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
+                index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
+                if (blockBaseDiff > 0) {
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+                    debug::DebugLock lock(mutex);
+#endif
+                    do {
+                        blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
+                        currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+
+                        // Find out where we'll be inserting this block in the block index
+                        BlockIndexEntry *idxEntry = nullptr;  // initialization here unnecessary but compiler can't always tell
+                        Block *newBlock;
+                        bool indexInserted = false;
+                        auto head = this->headIndex.load(std::memory_order_relaxed);
+                        assert(!details::circular_less_than<index_t>(currentTailIndex, head));
+                        bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) ||
+                                    (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value &&
+                                     (MAX_SUBQUEUE_SIZE == 0 ||
+                                      MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
+
+                        if (full ||
+                            !(indexInserted = insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) ||
+                            (newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>()) ==
+                            nullptr) {
+                            // Index allocation or block allocation failed; revert any other allocations
+                            // and index insertions done so far for this operation
+                            if (indexInserted) {
+                                rewind_block_index_tail();
+                                idxEntry->value.store(nullptr, std::memory_order_relaxed);
+                            }
+                            currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
+                            for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {
+                                currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+                                idxEntry = get_block_index_entry_for_index(currentTailIndex);
+                                idxEntry->value.store(nullptr, std::memory_order_relaxed);
+                                rewind_block_index_tail();
+                            }
+                            this->parent->add_blocks_to_free_list(firstAllocatedBlock);
+                            this->tailBlock = startBlock;
+
+                            return false;
+                        }
+
+#ifdef MCDBGQ_TRACKMEM
+                        newBlock->owner = this;
+#endif
+                        newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
+                        newBlock->next = nullptr;
+
+                        // Insert the new block into the index
+                        idxEntry->value.store(newBlock, std::memory_order_relaxed);
+
+                        // Store the chain of blocks so that we can undo if later allocations fail,
+                        // and so that we can find the blocks when we do the actual enqueueing
+                        if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 ||
+                            firstAllocatedBlock != nullptr) {
+                            assert(this->tailBlock != nullptr);
+                            this->tailBlock->next = newBlock;
+                        }
+                        this->tailBlock = newBlock;
+                        endBlock = newBlock;
+                        firstAllocatedBlock = firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock;
+                    } while (blockBaseDiff > 0);
+                }
+
+                // Enqueue, one block at a time
+                index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
+                currentTailIndex = startTailIndex;
+                this->tailBlock = startBlock;
+                assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr ||
+                       count == 0);
+                if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {
+                    this->tailBlock = firstAllocatedBlock;
+                }
+                while (true) {
+                    index_t stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) +
+                                        static_cast<index_t>(BLOCK_SIZE);
+                    if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
+                        stopIndex = newTailIndex;
+                    }
+                    MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst),
+                                                                      new(static_cast<T *>(nullptr)) T(
+                                                                              details::deref_noexcept(itemFirst)))) {
+                        while (currentTailIndex != stopIndex) {
+                            new((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
+                        }
+                    } else {
+                        MOODYCAMEL_TRY {
+                            while (currentTailIndex != stopIndex) {
+                                new((*this->tailBlock)[currentTailIndex]) T(
+                                        details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst),
+                                                                                     new(static_cast<T *>(nullptr)) T(
+                                                                                             details::deref_noexcept(
+                                                                                                     itemFirst)))>::eval(
+                                                *itemFirst));
+                                ++currentTailIndex;
+                                ++itemFirst;
+                            }
+                        }
+                        MOODYCAMEL_CATCH (...) {
+                            auto constructedStopIndex = currentTailIndex;
+                            auto lastBlockEnqueued = this->tailBlock;
+
+                            if (!details::is_trivially_destructible<T>::value) {
+                                auto block = startBlock;
+                                if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
+                                    block = firstAllocatedBlock;
+                                }
+                                currentTailIndex = startTailIndex;
+                                while (true) {
+                                    stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) +
+                                                static_cast<index_t>(BLOCK_SIZE);
+                                    if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {
+                                        stopIndex = constructedStopIndex;
+                                    }
+                                    while (currentTailIndex != stopIndex) {
+                                        (*block)[currentTailIndex++]->~T();
+                                    }
+                                    if (block == lastBlockEnqueued) {
+                                        break;
+                                    }
+                                    block = block->next;
+                                }
+                            }
+
+                            currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
+                            for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {
+                                currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+                                auto idxEntry = get_block_index_entry_for_index(currentTailIndex);
+                                idxEntry->value.store(nullptr, std::memory_order_relaxed);
+                                rewind_block_index_tail();
+                            }
+                            this->parent->add_blocks_to_free_list(firstAllocatedBlock);
+                            this->tailBlock = startBlock;
+                            MOODYCAMEL_RETHROW;
+                        }
+                    }
+
+                    if (this->tailBlock == endBlock) {
+                        assert(currentTailIndex == newTailIndex);
+                        break;
+                    }
+                    this->tailBlock = this->tailBlock->next;
+                }
+                this->tailIndex.store(newTailIndex, std::memory_order_release);
+                return true;
+            }
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+            template<typename It>
+            size_t dequeue_bulk(It &itemFirst, size_t max) {
+                auto tail = this->tailIndex.load(std::memory_order_relaxed);
+                auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
+                auto desiredCount = static_cast<size_t>(tail -
+                                                        (this->dequeueOptimisticCount.load(std::memory_order_relaxed) -
+                                                         overcommit));
+                if (details::circular_less_than<size_t>(0, desiredCount)) {
+                    desiredCount = desiredCount < max ? desiredCount : max;
+                    std::atomic_thread_fence(std::memory_order_acquire);
+
+                    auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount,
+                                                                                 std::memory_order_relaxed);
+
+                    tail = this->tailIndex.load(std::memory_order_acquire);
+                    auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
+                    if (details::circular_less_than<size_t>(0, actualCount)) {
+                        actualCount = desiredCount < actualCount ? desiredCount : actualCount;
+                        if (actualCount < desiredCount) {
+                            this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);
+                        }
+
+                        // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
+                        // will never exceed tail.
+                        auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
+
+                        // Iterate the blocks and dequeue
+                        auto index = firstIndex;
+                        BlockIndexHeader *localBlockIndex;
+                        auto indexIndex = get_block_index_index_for_index(index, localBlockIndex);
+                        do {
+                            auto blockStartIndex = index;
+                            index_t endIndex =
+                                    (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+                            endIndex = details::circular_less_than<index_t>(
+                                    firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex +
+                                                                                                static_cast<index_t>(actualCount)
+                                                                                              : endIndex;
+
+                            auto entry = localBlockIndex->index[indexIndex];
+                            auto block = entry->value.load(std::memory_order_relaxed);
+                            if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&, details::deref_noexcept(itemFirst) = std::move(
+                                    (*(*block)[index])))) {
+                                while (index != endIndex) {
+                                    auto &el = *((*block)[index]);
+                                    *itemFirst++ = std::move(el);
+                                    el.~T();
+                                    ++index;
+                                }
+                            } else {
+                                MOODYCAMEL_TRY {
+                                    while (index != endIndex) {
+                                        auto &el = *((*block)[index]);
+                                        *itemFirst = std::move(el);
+                                        ++itemFirst;
+                                        el.~T();
+                                        ++index;
+                                    }
+                                }
+                                MOODYCAMEL_CATCH (...) {
+                                    do {
+                                        entry = localBlockIndex->index[indexIndex];
+                                        block = entry->value.load(std::memory_order_relaxed);
+                                        while (index != endIndex) {
+                                            (*block)[index++]->~T();
+                                        }
+
+                                        if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(
+                                                blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+                                            debug::DebugLock lock(mutex);
+#endif
+                                            entry->value.store(nullptr, std::memory_order_relaxed);
+                                            this->parent->add_block_to_free_list(block);
+                                        }
+                                        indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
+
+                                        blockStartIndex = index;
+                                        endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) +
+                                                   static_cast<index_t>(BLOCK_SIZE);
+                                        endIndex = details::circular_less_than<index_t>(
+                                                firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex +
+                                                                                                            static_cast<index_t>(actualCount)
+                                                                                                          : endIndex;
+                                    } while (index != firstIndex + actualCount);
+
+                                    MOODYCAMEL_RETHROW;
+                                }
+                            }
+                            if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(
+                                    blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {
+                                {
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+                                    debug::DebugLock lock(mutex);
+#endif
+                                    // Note that the set_many_empty above did a release, meaning that anybody who acquires the block
+                                    // we're about to free can use it safely since our writes (and reads!) will have happened-before then.
+                                    entry->value.store(nullptr, std::memory_order_relaxed);
+                                }
+                                this->parent->add_block_to_free_list(block);        // releases the above store
+                            }
+                            indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
+                        } while (index != firstIndex + actualCount);
+
+                        return actualCount;
+                    } else {
+                        this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
+                    }
+                }
+
+                return 0;
+            }
+
+        private:
+            // The block size must be > 1, so any number with the low bit set is an invalid block base index
+            static const index_t INVALID_BLOCK_BASE = 1;
+
+            struct BlockIndexEntry {
+                std::atomic<index_t> key;
+                std::atomic<Block *> value;
+            };
+
+            struct BlockIndexHeader {
+                size_t capacity;
+                std::atomic<size_t> tail;
+                BlockIndexEntry *entries;
+                BlockIndexEntry **index;
+                BlockIndexHeader *prev;
+            };
+
+            template<AllocationMode allocMode>
+            inline bool insert_block_index_entry(BlockIndexEntry *&idxEntry, index_t blockStartIndex) {
+                auto localBlockIndex = blockIndex.load(
+                        std::memory_order_relaxed);        // We're the only writer thread, relaxed is OK
+                if (localBlockIndex == nullptr) {
+                    return false;  // this can happen if new_block_index failed in the constructor
+                }
+                size_t newTail =
+                        (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);
+                idxEntry = localBlockIndex->index[newTail];
+                if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE ||
+                    idxEntry->value.load(std::memory_order_relaxed) == nullptr) {
+
+                    idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
+                    localBlockIndex->tail.store(newTail, std::memory_order_release);
+                    return true;
+                }
+
+                // No room in the old block index, try to allocate another one!
+                MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
+                    return false;
+                } else if (!new_block_index()) {
+                    return false;
+                } else {
+                    localBlockIndex = blockIndex.load(std::memory_order_relaxed);
+                    newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) &
+                              (localBlockIndex->capacity - 1);
+                    idxEntry = localBlockIndex->index[newTail];
+                    assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE);
+                    idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
+                    localBlockIndex->tail.store(newTail, std::memory_order_release);
+                    return true;
+                }
+            }
+
+            inline void rewind_block_index_tail() {
+                auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
+                localBlockIndex->tail.store(
+                        (localBlockIndex->tail.load(std::memory_order_relaxed) - 1) & (localBlockIndex->capacity - 1),
+                        std::memory_order_relaxed);
+            }
+
+            inline BlockIndexEntry *get_block_index_entry_for_index(index_t index) const {
+                BlockIndexHeader *localBlockIndex;
+                auto idx = get_block_index_index_for_index(index, localBlockIndex);
+                return localBlockIndex->index[idx];
+            }
+
+            inline size_t get_block_index_index_for_index(index_t index, BlockIndexHeader *&localBlockIndex) const {
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+                debug::DebugLock lock(mutex);
+#endif
+                index &= ~static_cast<index_t>(BLOCK_SIZE - 1);
+                localBlockIndex = blockIndex.load(std::memory_order_acquire);
+                auto tail = localBlockIndex->tail.load(std::memory_order_acquire);
+                auto tailBase = localBlockIndex->index[tail]->key.load(std::memory_order_relaxed);
+                assert(tailBase != INVALID_BLOCK_BASE);
+                // Note: Must use division instead of shift because the index may wrap around, causing a negative
+                // offset, whose negativity we want to preserve
+                auto offset = static_cast<size_t>(
+                        static_cast<typename std::make_signed<index_t>::type>(index - tailBase) / BLOCK_SIZE);
+                size_t idx = (tail + offset) & (localBlockIndex->capacity - 1);
+                assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == index &&
+                       localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != nullptr);
+                return idx;
+            }
+
+            bool new_block_index() {
+                auto prev = blockIndex.load(std::memory_order_relaxed);
+                size_t prevCapacity = prev == nullptr ? 0 : prev->capacity;
+                auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity;
+                auto raw = static_cast<char *>((Traits::malloc)(
+                        sizeof(BlockIndexHeader) +
+                        std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * entryCount +
+                        std::alignment_of<BlockIndexEntry *>::value - 1 +
+                        sizeof(BlockIndexEntry *) * nextBlockIndexCapacity));
+                if (raw == nullptr) {
+                    return false;
+                }
+
+                auto header = new(raw) BlockIndexHeader;
+                auto entries = reinterpret_cast<BlockIndexEntry *>(details::align_for<BlockIndexEntry>(
+                        raw + sizeof(BlockIndexHeader)));
+                auto index = reinterpret_cast<BlockIndexEntry **>(details::align_for<BlockIndexEntry *>(
+                        reinterpret_cast<char *>(entries) + sizeof(BlockIndexEntry) * entryCount));
+                if (prev != nullptr) {
+                    auto prevTail = prev->tail.load(std::memory_order_relaxed);
+                    auto prevPos = prevTail;
+                    size_t i = 0;
+                    do {
+                        prevPos = (prevPos + 1) & (prev->capacity - 1);
+                        index[i++] = prev->index[prevPos];
+                    } while (prevPos != prevTail);
+                    assert(i == prevCapacity);
+                }
+                for (size_t i = 0; i != entryCount; ++i) {
+                    new(entries + i) BlockIndexEntry;
+                    entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed);
+                    index[prevCapacity + i] = entries + i;
+                }
+                header->prev = prev;
+                header->entries = entries;
+                header->index = index;
+                header->capacity = nextBlockIndexCapacity;
+                header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), std::memory_order_relaxed);
+
+                blockIndex.store(header, std::memory_order_release);
+
+                nextBlockIndexCapacity <<= 1;
+
+                return true;
+            }
+
+        private:
+            size_t nextBlockIndexCapacity;
+            std::atomic<BlockIndexHeader *> blockIndex;
+
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+                                                                                                                                    public:
+		details::ThreadExitListener threadExitListener;
+	private:
+#endif
+
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+                                                                                                                                    public:
+		ImplicitProducer* nextImplicitProducer;
+	private:
+#endif
+
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+            mutable debug::DebugMutex mutex;
+#endif
+#ifdef MCDBGQ_TRACKMEM
+            friend struct MemStats;
+#endif
+        };
+
+
+        //////////////////////////////////
+        // Block pool manipulation
+        //////////////////////////////////
+
+        void populate_initial_block_list(size_t blockCount) {
+            initialBlockPoolSize = blockCount;
+            if (initialBlockPoolSize == 0) {
+                initialBlockPool = nullptr;
+                return;
+            }
+
+            initialBlockPool = create_array<Block>(blockCount);
+            if (initialBlockPool == nullptr) {
+                initialBlockPoolSize = 0;
+            }
+            for (size_t i = 0; i < initialBlockPoolSize; ++i) {
+                initialBlockPool[i].dynamicallyAllocated = false;
+            }
+        }
+
+        inline Block *try_get_block_from_initial_pool() {
+            if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= initialBlockPoolSize) {
+                return nullptr;
+            }
+
+            auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed);
+
+            return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr;
+        }
+
+        inline void add_block_to_free_list(Block *block) {
+#ifdef MCDBGQ_TRACKMEM
+            block->owner = nullptr;
+#endif
+            freeList.add(block);
+        }
+
+        inline void add_blocks_to_free_list(Block *block) {
+            while (block != nullptr) {
+                auto next = block->next;
+                add_block_to_free_list(block);
+                block = next;
+            }
+        }
+
+        inline Block *try_get_block_from_free_list() {
+            return freeList.try_get();
+        }
+
+        // Gets a free block from one of the memory pools, or allocates a new one (if applicable)
+        template<AllocationMode canAlloc>
+        Block *requisition_block() {
+            auto block = try_get_block_from_initial_pool();
+            if (block != nullptr) {
+                return block;
+            }
+
+            block = try_get_block_from_free_list();
+            if (block != nullptr) {
+                return block;
+            }
+
+            MOODYCAMEL_CONSTEXPR_IF (canAlloc == CanAlloc) {
+                return create<Block>();
+            } else {
+                return nullptr;
+            }
+        }
+
+
+#ifdef MCDBGQ_TRACKMEM
+                                                                                                                                public:
+		struct MemStats {
+			size_t allocatedBlocks;
+			size_t usedBlocks;
+			size_t freeBlocks;
+			size_t ownedBlocksExplicit;
+			size_t ownedBlocksImplicit;
+			size_t implicitProducers;
+			size_t explicitProducers;
+			size_t elementsEnqueued;
+			size_t blockClassBytes;
+			size_t queueClassBytes;
+			size_t implicitBlockIndexBytes;
+			size_t explicitBlockIndexBytes;
+
+			friend class ConcurrentQueue;
+
+		private:
+			static MemStats getFor(ConcurrentQueue* q)
+			{
+				MemStats stats = { 0 };
+
+				stats.elementsEnqueued = q->size_approx();
+
+				auto block = q->freeList.head_unsafe();
+				while (block != nullptr) {
+					++stats.allocatedBlocks;
+					++stats.freeBlocks;
+					block = block->freeListNext.load(std::memory_order_relaxed);
+				}
+
+				for (auto ptr = q->producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+					bool implicit = dynamic_cast<ImplicitProducer*>(ptr) != nullptr;
+					stats.implicitProducers += implicit ? 1 : 0;
+					stats.explicitProducers += implicit ? 0 : 1;
+
+					if (implicit) {
+						auto prod = static_cast<ImplicitProducer*>(ptr);
+						stats.queueClassBytes += sizeof(ImplicitProducer);
+						auto head = prod->headIndex.load(std::memory_order_relaxed);
+						auto tail = prod->tailIndex.load(std::memory_order_relaxed);
+						auto hash = prod->blockIndex.load(std::memory_order_relaxed);
+						if (hash != nullptr) {
+							for (size_t i = 0; i != hash->capacity; ++i) {
+								if (hash->index[i]->key.load(std::memory_order_relaxed) != ImplicitProducer::INVALID_BLOCK_BASE && hash->index[i]->value.load(std::memory_order_relaxed) != nullptr) {
+									++stats.allocatedBlocks;
+									++stats.ownedBlocksImplicit;
+								}
+							}
+							stats.implicitBlockIndexBytes += hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry);
+							for (; hash != nullptr; hash = hash->prev) {
+								stats.implicitBlockIndexBytes += sizeof(typename ImplicitProducer::BlockIndexHeader) + hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry*);
+							}
+						}
+						for (; details::circular_less_than<index_t>(head, tail); head += BLOCK_SIZE) {
+							//auto block = prod->get_block_index_entry_for_index(head);
+							++stats.usedBlocks;
+						}
+					}
+					else {
+						auto prod = static_cast<ExplicitProducer*>(ptr);
+						stats.queueClassBytes += sizeof(ExplicitProducer);
+						auto tailBlock = prod->tailBlock;
+						bool wasNonEmpty = false;
+						if (tailBlock != nullptr) {
+							auto block = tailBlock;
+							do {
+								++stats.allocatedBlocks;
+								if (!block->ConcurrentQueue::Block::template is_empty<explicit_context>() || wasNonEmpty) {
+									++stats.usedBlocks;
+									wasNonEmpty = wasNonEmpty || block != tailBlock;
+								}
+								++stats.ownedBlocksExplicit;
+								block = block->next;
+							} while (block != tailBlock);
+						}
+						auto index = prod->blockIndex.load(std::memory_order_relaxed);
+						while (index != nullptr) {
+							stats.explicitBlockIndexBytes += sizeof(typename ExplicitProducer::BlockIndexHeader) + index->size * sizeof(typename ExplicitProducer::BlockIndexEntry);
+							index = static_cast<typename ExplicitProducer::BlockIndexHeader*>(index->prev);
+						}
+					}
+				}
+
+				auto freeOnInitialPool = q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= q->initialBlockPoolSize ? 0 : q->initialBlockPoolSize - q->initialBlockPoolIndex.load(std::memory_order_relaxed);
+				stats.allocatedBlocks += freeOnInitialPool;
+				stats.freeBlocks += freeOnInitialPool;
+
+				stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks;
+				stats.queueClassBytes += sizeof(ConcurrentQueue);
+
+				return stats;
+			}
+		};
+
+		// For debugging only. Not thread-safe.
+		MemStats getMemStats()
+		{
+			return MemStats::getFor(this);
+		}
+	private:
+		friend struct MemStats;
+#endif
+
+
+        //////////////////////////////////
+        // Producer list manipulation
+        //////////////////////////////////
+
+        ProducerBase *recycle_or_create_producer(bool isExplicit) {
+            bool recycled;
+            return recycle_or_create_producer(isExplicit, recycled);
+        }
+
+        ProducerBase *recycle_or_create_producer(bool isExplicit, bool &recycled) {
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+            debug::DebugLock lock(implicitProdMutex);
+#endif
+            // Try to re-use one first
+            for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+                if (ptr->inactive.load(std::memory_order_relaxed) && ptr->isExplicit == isExplicit) {
+                    bool expected = true;
+                    if (ptr->inactive.compare_exchange_strong(expected, /* desired */ false, std::memory_order_acquire,
+                                                              std::memory_order_relaxed)) {
+                        // We caught one! It's been marked as activated, the caller can have it
+                        recycled = true;
+                        return ptr;
+                    }
+                }
+            }
+
+            recycled = false;
+            return add_producer(
+                    isExplicit ? static_cast<ProducerBase *>(create<ExplicitProducer>(this)) : create<ImplicitProducer>(
+                            this));
+        }
+
+        ProducerBase *add_producer(ProducerBase *producer) {
+            // Handle failed memory allocation
+            if (producer == nullptr) {
+                return nullptr;
+            }
+
+            producerCount.fetch_add(1, std::memory_order_relaxed);
+
+            // Add it to the lock-free list
+            auto prevTail = producerListTail.load(std::memory_order_relaxed);
+            do {
+                producer->next = prevTail;
+            } while (!producerListTail.compare_exchange_weak(prevTail, producer, std::memory_order_release,
+                                                             std::memory_order_relaxed));
+
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+                                                                                                                                    if (producer->isExplicit) {
+			auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed);
+			do {
+				static_cast<ExplicitProducer*>(producer)->nextExplicitProducer = prevTailExplicit;
+			} while (!explicitProducers.compare_exchange_weak(prevTailExplicit, static_cast<ExplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));
+		}
+		else {
+			auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed);
+			do {
+				static_cast<ImplicitProducer*>(producer)->nextImplicitProducer = prevTailImplicit;
+			} while (!implicitProducers.compare_exchange_weak(prevTailImplicit, static_cast<ImplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));
+		}
+#endif
+
+            return producer;
+        }
+
+        void reown_producers() {
+            // After another instance is moved-into/swapped-with this one, all the
+            // producers we stole still think their parents are the other queue.
+            // So fix them up!
+            for (auto ptr = producerListTail.load(std::memory_order_relaxed); ptr != nullptr; ptr = ptr->next_prod()) {
+                ptr->parent = this;
+            }
+        }
+
+
+        //////////////////////////////////
+        // Implicit producer hash
+        //////////////////////////////////
+
+        struct ImplicitProducerKVP {
+            std::atomic<details::thread_id_t> key;
+            ImplicitProducer *value;        // No need for atomicity since it's only read by the thread that sets it in the first place
+
+            ImplicitProducerKVP() : value(nullptr) {}
+
+            ImplicitProducerKVP(ImplicitProducerKVP &&other)
+
+            MOODYCAMEL_NOEXCEPT
+            {
+                key.store(other.key.load(std::memory_order_relaxed), std::memory_order_relaxed);
+                value = other.value;
+            }
+
+            inline ImplicitProducerKVP &operator=(ImplicitProducerKVP &&other)
+
+            MOODYCAMEL_NOEXCEPT
+            {
+                swap(other);
+                return *this;
+            }
+
+            inline void swap(ImplicitProducerKVP &other)
+
+            MOODYCAMEL_NOEXCEPT
+            {
+                if (this != &other) {
+                    details::swap_relaxed(key, other.key);
+                    std::swap(value, other.value);
+                }
+            }
+        };
+
+        template<typename XT, typename XTraits>
+        friend void moodycamel::swap(typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP &,
+                                     typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP &)
+
+        MOODYCAMEL_NOEXCEPT;
+
+        struct ImplicitProducerHash {
+            size_t capacity;
+            ImplicitProducerKVP *entries;
+            ImplicitProducerHash *prev;
+        };
+
+        inline void populate_initial_implicit_producer_hash() {
+            MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) {
+                return;
+            } else {
+                implicitProducerHashCount.store(0, std::memory_order_relaxed);
+                auto hash = &initialImplicitProducerHash;
+                hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
+                hash->entries = &initialImplicitProducerHashEntries[0];
+                for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) {
+                    initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id,
+                                                                    std::memory_order_relaxed);
+                }
+                hash->prev = nullptr;
+                implicitProducerHash.store(hash, std::memory_order_relaxed);
+            }
+        }
+
+        void swap_implicit_producer_hashes(ConcurrentQueue &other) {
+            MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) {
+                return;
+            } else {
+                // Swap (assumes our implicit producer hash is initialized)
+                initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries);
+                initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0];
+                other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0];
+
+                details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount);
+
+                details::swap_relaxed(implicitProducerHash, other.implicitProducerHash);
+                if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) {
+                    implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed);
+                } else {
+                    ImplicitProducerHash *hash;
+                    for (hash = implicitProducerHash.load(std::memory_order_relaxed);
+                         hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) {
+                        continue;
+                    }
+                    hash->prev = &initialImplicitProducerHash;
+                }
+                if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) {
+                    other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed);
+                } else {
+                    ImplicitProducerHash *hash;
+                    for (hash = other.implicitProducerHash.load(std::memory_order_relaxed);
+                         hash->prev != &initialImplicitProducerHash; hash = hash->prev) {
+                        continue;
+                    }
+                    hash->prev = &other.initialImplicitProducerHash;
+                }
+            }
+        }
+
+        // Only fails (returns nullptr) if memory allocation fails
+        ImplicitProducer *get_or_add_implicit_producer() {
+            // Note that since the data is essentially thread-local (key is thread ID),
+            // there's a reduced need for fences (memory ordering is already consistent
+            // for any individual thread), except for the current table itself.
+
+            // Start by looking for the thread ID in the current and all previous hash tables.
+            // If it's not found, it must not be in there yet, since this same thread would
+            // have added it previously to one of the tables that we traversed.
+
+            // Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table
+
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+            debug::DebugLock lock(implicitProdMutex);
+#endif
+
+            auto id = details::thread_id();
+            auto hashedId = details::hash_thread_id(id);
+
+            auto mainHash = implicitProducerHash.load(std::memory_order_acquire);
+            assert(mainHash != nullptr);  // silence clang-tidy and MSVC warnings (hash cannot be null)
+            for (auto hash = mainHash; hash != nullptr; hash = hash->prev) {
+                // Look for the id in this hash
+                auto index = hashedId;
+                while (true) {        // Not an infinite loop because at least one slot is free in the hash table
+                    index &= hash->capacity - 1;
+
+                    auto probedKey = hash->entries[index].key.load(std::memory_order_relaxed);
+                    if (probedKey == id) {
+                        // Found it! If we had to search several hashes deep, though, we should lazily add it
+                        // to the current main hash table to avoid the extended search next time.
+                        // Note there's guaranteed to be room in the current hash table since every subsequent
+                        // table implicitly reserves space for all previous tables (there's only one
+                        // implicitProducerHashCount).
+                        auto value = hash->entries[index].value;
+                        if (hash != mainHash) {
+                            index = hashedId;
+                            while (true) {
+                                index &= mainHash->capacity - 1;
+                                probedKey = mainHash->entries[index].key.load(std::memory_order_relaxed);
+                                auto empty = details::invalid_thread_id;
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+                                                                                                                                                        auto reusable = details::invalid_thread_id2;
+							if ((probedKey == empty    && mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_relaxed, std::memory_order_relaxed)) ||
+								(probedKey == reusable && mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_acquire, std::memory_order_acquire))) {
+#else
+                                if ((probedKey == empty &&
+                                     mainHash->entries[index].key.compare_exchange_strong(empty, id,
+                                                                                          std::memory_order_relaxed,
+                                                                                          std::memory_order_relaxed))) {
+#endif
+                                    mainHash->entries[index].value = value;
+                                    break;
+                                }
+                                ++index;
+                            }
+                        }
+
+                        return value;
+                    }
+                    if (probedKey == details::invalid_thread_id) {
+                        break;        // Not in this hash table
+                    }
+                    ++index;
+                }
+            }
+
+            // Insert!
+            auto newCount = 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed);
+            while (true) {
+                // NOLINTNEXTLINE(clang-analyzer-core.NullDereference)
+                if (newCount >= (mainHash->capacity >> 1) &&
+                    !implicitProducerHashResizeInProgress.test_and_set(std::memory_order_acquire)) {
+                    // We've acquired the resize lock, try to allocate a bigger hash table.
+                    // Note the acquire fence synchronizes with the release fence at the end of this block, and hence when
+                    // we reload implicitProducerHash it must be the most recent version (it only gets changed within this
+                    // locked block).
+                    mainHash = implicitProducerHash.load(std::memory_order_acquire);
+                    if (newCount >= (mainHash->capacity >> 1)) {
+                        auto newCapacity = mainHash->capacity << 1;
+                        while (newCount >= (newCapacity >> 1)) {
+                            newCapacity <<= 1;
+                        }
+                        auto raw = static_cast<char *>((Traits::malloc)(
+                                sizeof(ImplicitProducerHash) + std::alignment_of<ImplicitProducerKVP>::value - 1 +
+                                sizeof(ImplicitProducerKVP) * newCapacity));
+                        if (raw == nullptr) {
+                            // Allocation failed
+                            implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
+                            implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+                            return nullptr;
+                        }
+
+                        auto newHash = new(raw) ImplicitProducerHash;
+                        newHash->capacity = static_cast<size_t>(newCapacity);
+                        newHash->entries = reinterpret_cast<ImplicitProducerKVP *>(details::align_for<ImplicitProducerKVP>(
+                                raw + sizeof(ImplicitProducerHash)));
+                        for (size_t i = 0; i != newCapacity; ++i) {
+                            new(newHash->entries + i) ImplicitProducerKVP;
+                            newHash->entries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
+                        }
+                        newHash->prev = mainHash;
+                        implicitProducerHash.store(newHash, std::memory_order_release);
+                        implicitProducerHashResizeInProgress.clear(std::memory_order_release);
+                        mainHash = newHash;
+                    } else {
+                        implicitProducerHashResizeInProgress.clear(std::memory_order_release);
+                    }
+                }
+
+                // If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table
+                // to finish being allocated by another thread (and if we just finished allocating above, the condition will
+                // always be true)
+                if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) {
+                    bool recycled;
+                    auto producer = static_cast<ImplicitProducer *>(recycle_or_create_producer(false, recycled));
+                    if (producer == nullptr) {
+                        implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
+                        return nullptr;
+                    }
+                    if (recycled) {
+                        implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
+                    }
+
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+                                                                                                                                            producer->threadExitListener.callback = &ConcurrentQueue::implicit_producer_thread_exited_callback;
+				producer->threadExitListener.userData = producer;
+				details::ThreadExitNotifier::subscribe(&producer->threadExitListener);
+#endif
+
+                    auto index = hashedId;
+                    while (true) {
+                        index &= mainHash->capacity - 1;
+                        auto probedKey = mainHash->entries[index].key.load(std::memory_order_relaxed);
+
+                        auto empty = details::invalid_thread_id;
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+                                                                                                                                                auto reusable = details::invalid_thread_id2;
+					if ((probedKey == empty    && mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_relaxed, std::memory_order_relaxed)) ||
+						(probedKey == reusable && mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_acquire, std::memory_order_acquire))) {
+#else
+                        if ((probedKey == empty &&
+                             mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_relaxed,
+                                                                                  std::memory_order_relaxed))) {
+#endif
+                            mainHash->entries[index].value = producer;
+                            break;
+                        }
+                        ++index;
+                    }
+                    return producer;
+                }
+
+                // Hmm, the old hash is quite full and somebody else is busy allocating a new one.
+                // We need to wait for the allocating thread to finish (if it succeeds, we add, if not,
+                // we try to allocate ourselves).
+                mainHash = implicitProducerHash.load(std::memory_order_acquire);
+            }
+        }
+
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+                                                                                                                                void implicit_producer_thread_exited(ImplicitProducer* producer)
+	{
+		// Remove from thread exit listeners
+		details::ThreadExitNotifier::unsubscribe(&producer->threadExitListener);
+
+		// Remove from hash
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+		debug::DebugLock lock(implicitProdMutex);
+#endif
+		auto hash = implicitProducerHash.load(std::memory_order_acquire);
+		assert(hash != nullptr);		// The thread exit listener is only registered if we were added to a hash in the first place
+		auto id = details::thread_id();
+		auto hashedId = details::hash_thread_id(id);
+		details::thread_id_t probedKey;
+
+		// We need to traverse all the hashes just in case other threads aren't on the current one yet and are
+		// trying to add an entry thinking there's a free slot (because they reused a producer)
+		for (; hash != nullptr; hash = hash->prev) {
+			auto index = hashedId;
+			do {
+				index &= hash->capacity - 1;
+				probedKey = hash->entries[index].key.load(std::memory_order_relaxed);
+				if (probedKey == id) {
+					hash->entries[index].key.store(details::invalid_thread_id2, std::memory_order_release);
+					break;
+				}
+				++index;
+			} while (probedKey != details::invalid_thread_id);		// Can happen if the hash has changed but we weren't put back in it yet, or if we weren't added to this hash in the first place
+		}
+
+		// Mark the queue as being recyclable
+		producer->inactive.store(true, std::memory_order_release);
+	}
+
+	static void implicit_producer_thread_exited_callback(void* userData)
+	{
+		auto producer = static_cast<ImplicitProducer*>(userData);
+		auto queue = producer->parent;
+		queue->implicit_producer_thread_exited(producer);
+	}
+#endif
+
+        //////////////////////////////////
+        // Utility functions
+        //////////////////////////////////
+
+        template<typename TAlign>
+        static inline void *aligned_malloc(size_t size) {
+            MOODYCAMEL_CONSTEXPR_IF (std::alignment_of<TAlign>::value <=
+                                     std::alignment_of<details::max_align_t>::value) return (Traits::malloc)(size);
+            else {
+                size_t alignment = std::alignment_of<TAlign>::value;
+                void *raw = (Traits::malloc)(size + alignment - 1 + sizeof(void *));
+                if (!raw)
+                    return nullptr;
+                char *ptr = details::align_for<TAlign>(reinterpret_cast<char *>(raw) + sizeof(void *));
+                *(reinterpret_cast<void **>(ptr) - 1) = raw;
+                return ptr;
+            }
+        }
+
+        template<typename TAlign>
+        static inline void aligned_free(void *ptr) {
+            MOODYCAMEL_CONSTEXPR_IF (std::alignment_of<TAlign>::value <=
+                                     std::alignment_of<details::max_align_t>::value) return (Traits::free)(ptr);
+            else
+                (Traits::free)(ptr ? *(reinterpret_cast<void **>(ptr) - 1) : nullptr);
+        }
+
+        template<typename U>
+        static inline U *create_array(size_t count) {
+            assert(count > 0);
+            U *p = static_cast<U *>(aligned_malloc<U>(sizeof(U) * count));
+            if (p == nullptr)
+                return nullptr;
+
+            for (size_t i = 0; i != count; ++i)
+                new(p + i) U();
+            return p;
+        }
+
+        template<typename U>
+        static inline void destroy_array(U *p, size_t count) {
+            if (p != nullptr) {
+                assert(count > 0);
+                for (size_t i = count; i != 0;)
+                    (p + --i)->~U();
+            }
+            aligned_free<U>(p);
+        }
+
+        template<typename U>
+        static inline U *create() {
+            void *p = aligned_malloc<U>(sizeof(U));
+            return p != nullptr ? new(p) U : nullptr;
+        }
+
+        template<typename U, typename A1>
+        static inline U *create(A1 &&a1) {
+            void *p = aligned_malloc<U>(sizeof(U));
+            return p != nullptr ? new(p) U(std::forward<A1>(a1)) : nullptr;
+        }
+
+        template<typename U>
+        static inline void destroy(U *p) {
+            if (p != nullptr)
+                p->~U();
+            aligned_free<U>(p);
+        }
+
+    private:
+        std::atomic<ProducerBase *> producerListTail;
+        std::atomic<std::uint32_t> producerCount;
+
+        std::atomic<size_t> initialBlockPoolIndex;
+        Block *initialBlockPool;
+        size_t initialBlockPoolSize;
+
+#ifndef MCDBGQ_USEDEBUGFREELIST
+        FreeList<Block> freeList;
+#else
+        debug::DebugFreeList<Block> freeList;
+#endif
+
+        std::atomic<ImplicitProducerHash *> implicitProducerHash;
+        std::atomic<size_t> implicitProducerHashCount;        // Number of slots logically used
+        ImplicitProducerHash initialImplicitProducerHash;
+        std::array<ImplicitProducerKVP, INITIAL_IMPLICIT_PRODUCER_HASH_SIZE> initialImplicitProducerHashEntries;
+        std::atomic_flag implicitProducerHashResizeInProgress;
+
+        std::atomic<std::uint32_t> nextExplicitConsumerId;
+        std::atomic<std::uint32_t> globalExplicitConsumerOffset;
+
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+        debug::DebugMutex implicitProdMutex;
+#endif
+
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+                                                                                                                                std::atomic<ExplicitProducer*> explicitProducers;
+	std::atomic<ImplicitProducer*> implicitProducers;
+#endif
+    };
+
+
+    template<typename T, typename Traits>
+    ProducerToken::ProducerToken(ConcurrentQueue<T, Traits> &queue)
+            : producer(queue.recycle_or_create_producer(true)) {
+        if (producer != nullptr) {
+            producer->token = this;
+        }
+    }
+
+    template<typename T, typename Traits>
+    ProducerToken::ProducerToken(BlockingConcurrentQueue<T, Traits> &queue)
+            : producer(reinterpret_cast<ConcurrentQueue<T, Traits> *>(&queue)->recycle_or_create_producer(true)) {
+        if (producer != nullptr) {
+            producer->token = this;
+        }
+    }
+
+    template<typename T, typename Traits>
+    ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits> &queue)
+            : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr) {
+        initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
+        lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
+    }
+
+    template<typename T, typename Traits>
+    ConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, Traits> &queue)
+            : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr) {
+        initialOffset = reinterpret_cast<ConcurrentQueue<T, Traits> *>(&queue)->nextExplicitConsumerId.fetch_add(1,
+                                                                                                                 std::memory_order_release);
+        lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
+    }
+
+    template<typename T, typename Traits>
+    inline void swap(ConcurrentQueue<T, Traits> &a, ConcurrentQueue<T, Traits> &b)
+
+    MOODYCAMEL_NOEXCEPT {
+    a.
+    swap(b);
+}
+
+inline void swap(ProducerToken &a, ProducerToken &b)
+
+MOODYCAMEL_NOEXCEPT
+{
+a.
+swap(b);
+}
+
+inline void swap(ConsumerToken &a, ConsumerToken &b)
+
+MOODYCAMEL_NOEXCEPT
+{
+a.
+swap(b);
+}
+
+template<typename T, typename Traits>
+inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP &a,
+                 typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP &b)
+
+MOODYCAMEL_NOEXCEPT
+{
+a.
+swap(b);
+}
+
+}
+
+#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
+#pragma warning(pop)
+#endif
+
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
+#pragma GCC diagnostic pop
+#endif
diff --git a/c_src/eLfq/eLfq.cc b/c_src/eLfq/eLfq.cc
new file mode 100644
index 0000000..c5c9017
--- /dev/null
+++ b/c_src/eLfq/eLfq.cc
@@ -0,0 +1,3 @@
+#include "erl_nif.h"
+
+#include "concurrentqueue.h"
diff --git a/c_src/eLfq/rebar.config b/c_src/eLfq/rebar.config
new file mode 100644
index 0000000..66b7907
--- /dev/null
+++ b/c_src/eLfq/rebar.config
@@ -0,0 +1,3 @@
+{port_specs, [
+   {"../../priv/eLfq.so", ["*.cc"]}
+]}.
\ No newline at end of file
diff --git a/c_src/eNpc b/c_src/eNpc
new file mode 100644
index 0000000000000000000000000000000000000000..4e643d5a6509797e7e60abff36560667ca90a6b5
GIT binary patch
literal 25590
zcmaI71F$g7vL(80+s3zT+qP}nwr$(CZQHi(Z+q|epY!HU#GILXJ1Vj}qSnfe%81U)
z=vqZgNay0@NM~qaOJ`#1N?_t-<Y-~<3`I)%&mbhAu{R(vk+nCX`~S^9C@6U;U=S35
ze_dD7%1Zx!_+JL}pZmXY|2+iizaS?6df|WJApY&Do3Vkt|DSnLKmY)w{{#0gr?dw4
z_A0Rg@PYI&A$MPqp-+O#MQlRclLNy^gr<Tcql+}yhXMC{JMzTO^UilYu6y}4CY8=&
zqA2<XR4D-c9If&~F};q)e6CpEG}pLR7WoiTJc72GQqi=yD|QvH4BeBf8<wS<*5hvH
z^{V!?zaP>4;Aa7{@u+XLrk%5nH5GZ$42%YW>qi1f(n7Y!qBCw^_tJ4nV%KJDgJOSt
zITLTIYi}X406N6Shv)zmJ_ul}Gog*}H*_~A)MFXs$&6CF0sm{g|99w+|DWsqj{qqL
zLLusd{DTAo0Qi3p3{4Dde3Z4Ux0Eq>|4JN^^yFx(V`_zrlm;x5xpd0*(37<3WQs`A
zU23{xC>;*VUXnF1jZuJtps)_}n_rT!PFyy(VzXI@P^L+oR8}Ja4M;_u14CHghNI)@
zUjw=W{UmkB>#|SM`JMgt{dyMq*&9;_#y`dVnD5EwaV*3jd|(xi6u_i0#ugS^##U>}
zw>KJ+F~v=2-_EA3JXKeAHFhc_OS=isWJzeSWTPZ=%u_0wE>>WK+S+ScSVIi%2j*5$
zT{@QJlc>;Cb)ggySagtK*@ty92s^ck32sE9AqiSrQHedFFE>lkEaAW{mH)xfuXHsn
z5!4vS<hUqRlcA)%ZC0)YRMSi(B|cMh!dhm^pMnycUYf@iaGPZmM-C4nL^Jq)MGUPY
z7aoRV2(2G+0?_~p8y7@*vKUPAm9|G|0%Q=aCw1hO>jV*oIE3ts#F+uvcDIgk6jo;k
z!Q+PyhqEr(?iR-Y1oD64Wgr6?X0cA$9{gh<EX5v~p#a_RvlT88;ma+pC|%)7K3yFS
ztUQvI&S91M;`Pnlq)vA~#f%OY8Srf=4WsUa!zUF5XK0l`>{OGr-UW@MgeHcDKb*%J
zwcEwk(^bnrWk^r(K|fa4Y-1>g!OgA+ih<ptYF@;*D5|s-b7+ombnaVDm^!G6iUk(N
z#NDuJz!<*|Gi??{d}UZ2T%%>1PN_r+<blfewA&*em1PY%-Fa>^;7izVtwANmAbOr|
z5xsOU7Df7@)3OJ!<OX11ecikl4(t$A-@0q$)J@>h+6!2`(7M$2vXN~wy~(bBL@=(y
z8g_V!%iy+^?Zy9h{R2#^4Yw)x@Lam)2mHEQrgg{0$Wxo^YkeS6^xf;GYp>PAhW5y^
z7fq&|>_i)=VpiMx{5sxEhG1?i_2+m9e}+|d4`<tL@Oz~{TFK|gMr_xQ;Q5KG&)3h@
z;$;GxZ`v=klDf0rzR#MKbp5qzhz(B44tFm5^LFa}fNIW}!sdXE@lHpT7QV;v<7jDQ
z3wR>Q@7wo$XL?GF_u(XOGS}Vs{mD*OUeAkpcgf`-s3hjB{VGfdyN%a6O55|Ik}Wly
zH=&dL*YIvw&V_hF7Pufd7qiXmAy4;1dMTfYLnS{|LRHkgvwDRVOJH_o{@u%uW1oF@
zQA*eEsja1=BA`7L^r(%6x76EKlVP&5_9UT+i#iy7)!B0bL?t6JoAFVZS!1FylSqlU
z=oGQcGDA>Am5gEA1k>Y_{4j!e;^9r6IqnNrP>ci<F%cJNZ^;Y6cV~<HbqIJY@!?<~
zf&1}9IVp{+i!0e$4k9)B{f)0U;Xxs5*wtBfA-lFwmw{0d@8PD=Sn_6{OMafEB&T-z
zZ(~A50R43vMR07f6D10-Yb2O;e2O!v+LqJAMR>G#zl7vPqlGx8dR2X)xpx@YfO*^X
zuM2boaz3~w-N!3P+i%oQ(p;|A<MrR`hi2`)_6tJ1ys<k-_rP*rvjB*1*#AC*{i`JW
z?;`Q<azXpAg!CUNtf=VHUi%LK00cJx0M`G(u(ekdva_+bur_i0H!)gW!(K@(b9JSo
zH8vPt`;#)AgiJfW5z1gg5eb_fhfa_hj(uHX+;VEM1#vu)Xu^Nul!SzkYe&N_6;2{4
zq}o`I6MCbwtc-lenG*hqc|>|ed1gpi2Jh4Yuk`eDF2#CVDG5tW<Mr$8cdjqTugh&I
zHU}4P&QngW@ilVyQbh+Z?oJ|Z>}jA-(8A2mp)z@D%S3Nf$34pH+p>#3A<H_uxoTpN
zmq#uA&quwN2&bC<`=PNAuUdtwH}Xhh&g`}A`0<T%fZFw?b40n@);m%eaaP0qX=F@(
zmZTc6h8Gb}4tXd}cWveABkG=ery>0DB)3%>WswFofrA>@;5$e%MRA$6j~s*AJ)nqx
zoBBc(6KNJo6Z9t*+2VWTj(OVC#b7ZT(nDJ0$>kpeRD1}q0+PBrka`Avxnn5Tku;aQ
zm=sG6wRGl5SBkvZ^$B2ojR{m}->%-n6T=2cNA$IBChcrmvunvAVTQb!tKvdk>Wz@m
zi0>2al~CeHR4;`XR>-yUmB;28f?c)N*Nv0N8K#W~;{wfGi)yr2L2+Vt<r~J0r9Qr`
zbgI}>r=#8o?@lPx%xhrRQeT1H$y=~4ZQ9f<sS_gSa{d?IHLu=hcM*>=>rcy3=Hf8}
zccp>WrW7pjJ@;CA0JcvDiw%WE9|oNx^A>g}=}zt?Wl5o-UaXZj=@q=1W4KM~oH;gS
z$#?CQx^>u7C@&vpoXkjXES%EFjcwuCSx+y|b>$C#Hz9bPBm0S2WuVDlRL#X*eTJmj
zws|}Qwv;HPpufhPOh<XN?P-ZGDv?FT(7-KpI+QkJorr)55^7o6y0@t3!%JBc)DF1p
z%0?O*-3YeRDxOmrJ5l{Ly(5bun5i80`U#3dC#BrHDYO+mxJO>s(k+F8VcG>~T#^uP
zn?&p>|0IvF*R#ru9VZ1NWfX2jobfZa<RozWm6j}IdSX7NeNjJ=xl=}!Ul3`0(pNU=
zjT4D2H5$rt2>}!iwX&&RLk_9o@Fv054PJRk^hR+!eaDN3?j89GqF=PG&-H2;v?x;5
zTv}#=jrf!$RP!q;6x8D_t2u;f5v%A{z+J_5&YZl~kgg#5jKQI}d&-Khze5&9pW%fR
zY7&%KiC1nsFf!h*9hw4P7a4}DsvUw9*G2qcLTD<clqz19$&>;@4L~2H8``8vv}O|(
z#rGPhT<~&hhfc$}g6FC>sWf*+;a?`u^}r`SE`cEs?wzsb4u~lEFE#uuuwklV@=Teh
zX+^kF@=UEL`lBF;pkxsJ^UbXXLgq;nuq!(!hrW8X@e^`pHlaGb_-TR~0-&KpQMtq;
zhQ5f2`kZ!DR%!7^v;H&XjD0+4GiHNqF0F3DT!l3Y5f_7*eGU-x&=y?v$Wf7=xrP0w
zH?8M`RCaQI7SoArVrU^+|H>i-eU~hFA{bfadPCM{P|{j4C&|gFS*JxW1A69OU!P-o
zGypiMK1WYm#;q08#Qn&q(y})(Yu3TB=x`>B{j9m}6n0cL>HlV*XPj69R+4JgH9BF@
zMA267vj~s<hU6nZ;Ti>eNEKG@&2*so(8SpaOKfSIsS3FYyEu<XCP7t4)^LBfG3)yS
zG3vBZ>~rV(7AKB5?V6p>g^qo5-9DJxxZ5tljNP|ZH!@o}Oea!bt9OznCW0cTF>Xv~
z8kV9Z(Uih4k)JkkGh!qfAUlr<ojRa77ULnGVWuX)cnT$6FrF}+usel%GZ-Namol9;
zQF9hFNd)a9wN<B+<!IPVd8M*8uh(Z2*9!Zz%ycKEF3*_ma6}qyVo>O(Yff++*u0O<
zn`XfPJI_k&AgCiOfk{!bG!xHK*iE&U;Kpzd_-dQ+E>P}$`mTg+LS`_k7d_N%N@O%H
zA0|6(Zavb4x@IbyR-{uXC35iDg-P;p;lx5PV`H(QY3buTyEo0n%VSw(U_nVFO#n#m
z)AX*#6GjYRNP%HmfmK{EjV{|s<iLDzHz6)eE0Shpu6PMYjb@o#hSxrzWpGa7_@>UW
zBO#JOH)Ar)F|1ma#DaMV%O90mVR-kaeN-2LfM#^D8iq<uFyk>x%J)yjW~{R#N@qs5
zsF#k;Il5D2n^<^TC{0_gmS7fZolH5$y%jxYHa89{Ijgm!)>$n>S?AqE<-`Z1AmA5H
zot_~`Kc69eJ8W78?WN(uGq&_>n+PQ=!2C>|Ig{G8Bec#1t)n}XgwX^6NZ}Cl-vbaJ
z88!oTfn&okTrId)4bf;1Qfd!2lEn#pfC}boK*n)-r`QS~j3lkApESuPwl7nhqY~}G
zbo6q}j8oCzwI5oCg$-PLza3_i2^UmURdMW@Wh?T+32cO>n*;x7hw1SbGsLv9U@~9^
zzS2Ewloe2;;as!yZXciroD$7vx3QxonW~3QPo%?y8DE+udD}P8{dK~B3jRz-)TU7l
z?qM8(Js;01I40$d!lnyHXyObIE4@cM@xyHoryI>8HblKD<}QdQCa*1BmT+*2rM^t8
zQ8q>)u<{18i-3r7vGfL|4Qx!&THlL?(1-wj)&~7~h7kbw6kKp!Z!i@<I&+JFp3!w;
zU!ocOwtBE*!KGlini1g0j?Vzx;3GQHz^s&XEw9@{P92gbD&0m;hGeA1;GEqrfKv`=
zJwhqXfk*)IXsi4ulxYL)(!s`P#tu4>#E$%^k--(RWcH7SBu0DT24oo=f*Yy*3sMfx
zjp6P7pqnv-Dfj{EXpm!bkxL^2?(`1ploQnEbiiAG$`b{0aIfrI)B^>*2x!F!QdwXk
zo}{q)`s=wTKyWe+r4XAUh8h?smA@R7DJS`}mxd%gvzc)e&PJJi4FrQ{JYF$`>_M)s
z*!I6^-S@a<(Th}Du9sCN4$(Qfv2|~So3*!5iozY492~=u#7FKmg@x^T%~k=qr$(yP
z7Giqv^1|q3!lnLbYtG;yF1(Sd(+2s6Ggy(z)MdbA9&@C^Fg@8%17B~<n**m4X0X}^
z(Fy6+xW0*+FOjm~*+fowYQ#Gzh4Rw~__(%-*SEa2Aq<j*oO3FXXqxEPnBSpTG53aY
zV>}Z%433+QbAXe3`KU(!b)gFx>R7)xT3>MITEEaQ(`o@)fXB~iij81bZ;goM@A9`Y
zWW=~d4Bv%QIR@i*qJ3U;Cue}ADkUL<Wbo-6P;Pu^Wv7K|<@8RHdt_CaIy)}ozr3H>
z9!eX!X9uwpnJ3pL;h}SRLt_^tI-|>~f-Db!+lw5(QOv2{RreHd?igwd=tW0!p=nUo
z%KtdL7l@B@iFm0_%P0s|$5)iD^#?p7?Cu)jd~?M0uJFQ$)-C&LZ&h-(>h5yg+Y;yk
z63swp0W4gVtkW7?3z$=Nj(mw1gxos1$DKC-V8+Jbb+!r`p+WN$v1T@U_%<}s4mIT|
z5K^}vD%$Ta)sW*RVhvSK5*wtMnqWH%F~-5tjlgPb90<R(pA`+e+lC2PJQzZ##{}|3
zE}RoSae0GS)@msa2unXrUFszpDx4<esuLt#y$6s{H|bvy#zp*=P6}{k1O9M$^Umq$
z_aCYY2){u{$bRKYdBqKX)sepp3|=3Wx0~SNd_U7n_NTesX~audd?TauSR1(8vVhoM
zD;lXh#Zg>|kKOjSeB8s5E6DmnC$VvTi$U-tP1Tw;4`r-Jb_^qYB^&RdCu(i%msN36
zIl>lU)Ly0ma|j5UF5Dc35iKzTjW?=)GF)-D2M5s<7Z087kvY<8fXAaH(-|`N&Vtbv
zdZrR;k;V`+8=-^g2xu<op96T))<|)wA+l<;?<eVqXl@j6NMev8Ek#)`0k4G(%}`tU
z6eyQAa10qD*S~RrSh<M_eUpp1vwtITtsR?{Z(AQ&WojsLnMN97Xi$ozV(}$a#O-#o
zV`HbBl0~2Fn5WkvS&w7)hELxPx9+LbiVj{48-`%$pe3tKIkb$?>uC>uWF=7O)@Y>@
z9=&t1k&=!rP~_r<*Q<^Vx00VW(MN&olR8=(?{67@(y82(OK|W37L7lQ`cjbp;djF{
zFmF_Z$1A<46o<Qz(zg^cF(X!#!(k0o2BZRH;yud=<^{NGh7tI*iYZXn8%Mr_#{8bA
zai(6TJo+ZIGm_RWdAK@lfI?o!tShQ@dz+#6g6ZuV=($sgSK76GC?B<Rb_X4IAg~Lp
z`V~4Vf~zwm56WS<{^}ZCg3B9~MH^s?Rb4ZW@LO^pJ@~A*Fk<=)te0_aA&Xlv!@6Je
z%`w$zivTgSo!uB`<tD@N1sEf<w`L@AT~f9%%tiHL?%gUG=kNwYNs8|)V<mkGI$uU?
zKu?eoN!lJ?K6ZhIWu?Fioz|k?%tyGE3Fpp4;@eYp8m+nQPfBaiWj(py>i1^*3#DU-
zpTdoX_$mFH$r-HLP^a?oDKe6WC=!E?PGI_tr~p=YNf(jz4GyyD0C`rh4$5PwdBi3r
zjM^Up^`|R63PoOnrW8=S`oR)t3o9=)H7$PNvp#&@uy4pN%#q{mv|rmKgSx`v7|!?#
zOC`_LLS!XmVcBgx5c^j6v5o7Cws{=GoBrs&YoH8(7rblOW}X-24vnecpDdJFocvJF
zj2xayP&g8^C=`AbYwG~*C9r)P>q1B=erxRtzwSol=s8@2)kg{x*{?b1DZ~=!Wd(xO
zmjXs24a%I7|F{b-?0x%kO3>|_<u%bJM8eX*h2x7+sp?<RhxG875>O8pb3ek4n{r5t
zuzhh49*&mtTN~caRV%SVp(gYVjyMe&v$sQf8`APUv~wsc&EaM^NfA?^Y6ubyKRzGM
z+{FB4a9Xg5R<dLzIQbKne_pb4h;Yp}UER-&DjCKXz5Q**oj-44d=CLOCnlecpRdO4
z0Ix(0kED*zVGUd1?uFt_TLy@C$LL990oP7P=#V*Q01CDugeVSiz4pfQ(h)l<EJ&1k
zD-+J8A-EmZi>kD;4<S4z4mGnQ);`i@-}lP#rPjtGzKd%YyCfC2W<Bf>gCB}}y!?s#
zW5s?+aJdbvb7-(iG7w$cod)VcDQNs))c(-2b10zxHIB)a%P(X%8vgLMox@lB08X4g
zkV9_o1i!FfxU@A2+#VjbbL9>4CH`0h=LeK%2Y4!;6v7|Gp8QKrXocHjSLWWO1%!w#
zM*+!cGaBC~=j7jhqQ|if7`4^##nQu8;trnR9C(b`NfJ%qP5~z!H3s}SVua-f!<4RN
zfT#AIsVe86y6mxdFc-LnD;`|dIqcLT?4Ahqn^d!iG&efDgyn~2L;~C5AT4KL44VCf
zvyB%}e6|w&3PZ*%b_56Jl)3%kJXJh7N?#p1kpXd2wtr=IoH5^o!H`JU{V<FPb*hBr
zM@yJ%Y<cXCX9KCp3(rw}Vu$mCZ7x9<Lq}a&`4>p^<Yk!;$c-rwX4fdX;(##Y&|x$Y
zL!>^=x(`E_^YXrk-4{N#FZ?K}i84YLBo5a0v_kToDCXFmpzs%xX@oEEgEXu!IOsKS
z%&u#=>V@c)vQkg6`+AQe*5XSY7ZI^8^XC|-+zxQL3t+PY080k|_6umP0CfKTH)|?2
zWYKGw3d`>uof+#3bT+83-=kn=2+J>oRo4)N$q#;n$oB8bX8%LxkRLSsUf3H~thp!b
zg0p%zV!K{<?Zetm?`!Sop1qo##g|MNKJYhidqSHzMqD_Xj9<G{H|8bmO8yULaF(ai
zONHB4*YFmWANIL?BzZhlTH>`El0TrfC_rd)+Q6Ftz3-Zdw;%(rg3-UN*+147KYDNJ
z&5M5xLr=DQR}KJdUZfu!&JQ#iKQIJ8mqEEtQfQ7-#~UoF4mA%%bVLm6_YV=TX~3sL
z^OnshzEQ`jjxQ2oUT3?@?P!^Lty@~&Oiyee1L^b~&JR{FFN_th8t{)520X&D9#MIp
z{hPx+9^ma1?E{Td&9@-S8{Oq!p9Rj-xyZWjR`!d!@H*xnRB?SHxwtpcBXS*mp}05M
zBfYME(HOs=DdMC)nQ*%NVK+v5zJNWhFBb@J6d=DlSNh4$e#AeYV2-v%!e4+kdRU~M
zFL9u;=M=kDr-d!~?#LX}4v+AY^L<WlTF*Ohre6*Z7M{O1z%yICN8!eNfS2IUv4Oow
zd<sv3z!DF|p9hD*v?K73T+&JS?c*|)Y{BLkf_t872OA9N9U49~L&)TNLSmmu#&KQg
zG%V9-`nzfvhaTf1wmD}VsN8(^9;!OWt(y}z?C%HL6PcAkPvqbZkHoLWt0KAzTppmZ
zzSovahlZ_d^jiV%!y_>5>9{N~J<dLr^y{j&Sd|8${_oQ~N$~+=``vql)P8lmr#`h_
z!_fI1NX&UlpRi=Mo(!83q^{`;<Pe%M^e>rDA)l4OYc*p7CsK!pzRV|Th$}0~v3<@K
z^EbVM7dI**ZzQTdU~M~h9$#25w=GlO^ATx+ABz!3YYm&TFPB?ont5<*@oeRL*eiE3
zmu|}6u@!sGAGm6Lm@io>eXSok@_k7zf4h*Ku03wc-d}P9-d}bT-d%XP`f@*PeRaxb
z>!QL=*Gk6D*4Xf~^wRj`^PJ^~2Y+`D^oR#5_<kfp5s=)yY>F;Uz+--chWZF`OvCUE
z59i9u%hdC0#J)t1{ABfO9{Tdr0w*fc->NVX`7#4ohayS86+WszZd|X*M}IT>|9+ji
zSw4vc;BJGn%DfUUygsL()*b!)VHyAZXQSr#*GC;(8AYP`CIc@Y8xseE1mb+$Uphi)
zmXbAt)gS7QA%U0#5R$N(!Z1u{?I8gaB_#%Zlm#MF;xMQPf`|eN@PL0i#@mYg_3Yl$
z+*!8Rp0CT*s#jg*hcYZJp)GPC5MESyn!dHk`wm8wd4943<moIij>hm1A)U3K@WEFB
z7~sJ$5f<q%?s_7$7M_+<mV;Mz+?IXTTkAXsoVPT4!WM}!)jbGG9&iN?-YbJ#ztib{
zJ7e4&QKoT0kO;HTo3P+@n50*}BJ?w!F``@;x5xa`bmmqh)xG;)_SpjM2oS=rwDB|q
zx0C1yhv^_Oc62BEb->trY0qF)^EuLC)j!SfP(Aghy*Mz7nOmc6XVCAb_Z9t2rNQ*w
zHT6%>7qd3>@#xftB^<9DaC&+Y;ZWsCcGE<gsL?SL7|?nlNSBI&?2kO_?{2`iR@%Ga
zhxZhygeA~DDZvZ|Q15jVsM;GCmfZ;mD)L4A{Y0GA9d-_&fYdQi(c!?D^)bZzU`#m<
zsF6WRf<It*cH>)wxa(R@w_Id}ZqdkQPZaOp(q`=BwA6<^!R4VsIQ|#D^{1|qD>j(a
z8Z66LJnB`J7!TkUE3!_vUZO2hg3J5?LGt-qnOnr9`9PqSKZxZ4Ks$>-Jg3{VqWeX`
z<sm>J^7~0ZV94buM=Jxr@%T)Fp$Lby!yww2LDO?chrGskfzx!W+q~*<?8%ezG`B(@
z)e`KlSy-fXBj1#E!#5^~J@wQ<EC!l=;_yGHt>gUo)z*0&uT02eY~op=P2qi{!nimm
z^f)DZnV&}Xfb91a1stzk2(k`*QE$B6!dLm$ea+YK=6s~Xg6qE!+2eYv+kn{_-UU!2
zYb|3T_OSaXN@*cDMzw4uhN2gP8%wyEJd@Jx0{z&QpQJUM{bmvLkO#0TQSQ`;Q<~Fw
zv`o)~C)G^q26k|A(}|blhFMVz=@}5MJpMjW270kA>aO&}zzr~aDdLGI5ggPB)AoY!
z`5<DNf~(rmOh}GMhjf1&P$Usn4Uzc#g+P(WT?|?>ToV)7409im42h0$k?VztQRo3j
z(9n+PcOvYsC@{lO?UhW4M4;x6LYsuOAr;^^#wb$78T7c3;*wq!=u3Y#DB;@qlu#sh
zF-+$HNVRd<xeI`;OYE5>M-XkXGBmV#j8e@>;*i*%HoVQU<5Jze_aD&)954G3W&7Jx
zsz)Z-)5IRqpK~P0dG|6<m?W<Hi&Kh|R9VrV(v9`^;SJ7^)-_1=rp*SA1apCHr``I^
z9QOU~^Tq0aY*CbcMT1%I_l<c%rH1f%A<l=?3l)$GAtiGH8!AtuR=}A`<~#H4okVTr
zi!d$cM>>ZfM4{cNZ>yt@Bs-A~A)jTE5NPfhb_TZ9)84xox6#_Cu3?8Q@0||yY5OU3
zFy58Ux$gAgsKT>`?1B>~<uUUZOoZ`a)*RnP=u;7D90MJp(BXU|_9KTWVsG~atxx2)
zZMrqO1$#63che`(FL}v9g2Af8vE=+Yt{OAqtQh@(Llgq!zeBPa;uvwaYlvQcUpg$X
zGT~t%7r?IU(V(Jd(XC)s0gWi3DCi6{nP7%9%@fQ8<FI3m(#;IJ(L)WawE#mG790?c
zK<|CXnBkZ)OJOr{n1Qga0}|$s6Z}Y+@dhX7-{#fJ@BR#{g_MSV!KjL<18<sJpT`Yk
zj@QB3z$_X&gNTcvU(_eex>Lyb9Ge*ehw}ku1}JAEmwXv6ryXPwOrK{-T%=2#m;@X)
zCr_pws2B9YjEP3_Nl>;<K*$A{hB+FtD<zO2#|0_h#wxCBRMt^N=7qA#L^A16=3*-T
zWGJ#^hkP)uLmQBR*;Vv9l^EjHG7V|`QK=eH`~nCjE}#*>0TUzi0TUzkqi$xWm?;S#
zdOIpP%!p){?u}UQyP}bh<kOwZ3w)FAtx$Y`iE9Dclnh=fs1aZ>d&B%^ULN@!bJmq&
zX0O5T%#Hkz-aRcjR=DyMsiN1b<g4{J!i;?{Dcni3G8BfFI3&z!eEO3EjH`_P)7ie(
zq~J#MCJw>$2TLBCiL6Nt6(MY6h&bNQ-3~r;su4r933TP<8X=FRn~!03lAX{y8!IxW
zB=UKETrcpAQrs@x4+mS@iCQH2iT-yTA&$kc9M_2OSD{!a|1H!7YQa5EUU%rnyLc-W
z)~6`++Y6x&^o0974>1A4iDYkp;ypFdO^}%AU?NAMD;B!XS#ylr1;4I>{i2(lN0RN+
zXC!uN^0~OYkf809BcNh*>3aYRj!IqT<nPIvC0~?fAM;;VtV+l7*xW1PzTNB2#WfBK
z+uMVlgAP3};;-80Q&txW(9I0<W8bC+3T%xO?Mx3sbL}eu)%_-$4$L(xDLEfn`+3Dv
z9nbR1r?HCfulDEwpH`k7t}absx6k%9H_R21z5-Vd=U(NX>U`Ek8!yVP=i|xv1uaMT
z9T(oM_BOAH@M>9+xS!MW=Q2+XIu@XSCI7pCuGCdJJX^A$!{7Cn_toUuE4-Hn(c3+`
zo`boWuJOAgGLez#m1@5Gzlnd{h+f!!nTN=`=sb6~Q(c-5KAgI`3aaH=iC%n10RD2F
zgvQ}knb9g^zBRSbH!t&69qtB?`=7{KInuJv=G<6haaKBIeyev?V{A}leq42zX^%;H
zY7?oolDdP2R}bN<Y*`!|=dey8q-cc(vt`#Y;Or!~w0a{}vBe#&vmAQFpZVD2qNc8w
z8fCScOJ7EQ&^5XqEF~uncemCRyLZ2=T*0~(c6sZp52&qL9|rDDBlWZ%TeecPkna5*
z#RpZ#|6Us;-<}{fnvKMjPMmW$Ire#C$f2n^%~;{qZ0A^bTWuRpM#`<5_o-Oj+9JH(
z<4S?uT^-)CN9t)rF$J@<;HbdH2<GYY<Zh*Qbm(x>+MN-KnkbDW8X2M8bToRocbXB&
zya8cn`=|+V5fe~}OqvJ8uWP?(&^TbWoZ=X2H~F|i6Vcugr?k_=?NYX{H#gm3ExADD
za;>3-y<&e<yBT1^|7F)^9JRW1-XaEuvdcpkTSZ~fO}E!=jjhvIt3K=)u#RigkQ19M
zdUtfB9i#o_sAHMjdJ4ICV#wg}+0@)R-3#A4_U*d8+uuFaB;FkGsnwi4+ru8u38Ou}
zR@3^$<WB5M(BV|OYWV@0*{EvGCPyWYc^^`;kWAcO<r{NI-o0v{@hPFo@BMp?*P@rB
zA|3ZW-l1af^YG@LS8~URZKdo&zGC&}2t+bDrQBxTWVUYlhwV~UcZ_u#vQl@CDRp=O
zKWdrhChz7MPPXQVvVA4Tt@V7p;RW8It?iKLY<-o@^T`i;h4)az+2>-1>WBKaA*9b%
zSK^sBvzDUWRHw&P<i)mDs@rMu<tZu9)<Ug2uKT?2ti0mq*2#^nhIO4eE0R`{{l22C
zgL|M;00-|T?uk~>A(ApiSI&;zJUd6%W(}#8h*-1c&-!6lKv`S1(J<(jVMS5j!Ph*q
zT+FjjY*_v0)R*0>g_SB&U{BH#5qX`lT1N(U0&SOMnyJnL^t=0JeEah;Vb#6PCU_M^
zVv7M@uGlF~?2xq-tXUea!(aIqsJY%ZqVE`)@=ar%x<7S_QZi|zW_)-ytS!sdb9%2#
zu}hC_tA891YC!**FcxpbUXIl)=ysfG#kpSR;rtYI`^=dBZG5sS(h&jt8@qIDTEbJ#
zUXJzlvdLp`6Vb9M*1Gy7nH=r2bmvttUuk;Bu{&%H+GWjnY$-lBqBbcibAs(>r!(j9
z$4qQ_hSgbG>p_03mW$Zc?zE82in<x)b{SMy)8pE&!`?-sf71Mkt+8xYKilII=IdeQ
zt|20;?438Fd!g-I3-g}$U?q*Js<56Gx@(-~&V?P1chI<G-7>*(Y)R@9>rcXTvj+BQ
ziqBxH4)Kz4@Jj9&K6nfTEHTYqbLZ9b^~kci2iR0Yn^kVBbNS`Nhm@{%qRd{)$<kUw
z%gfKj&2|g?`hDet+UQAU<Ln(D-psQDUr*VhS#qCfhvw@wBX=WDF@dR2gk=6r@Qq*J
z3yow;*Tc@+$On7J7)_zkJ*S?6r;XU}=2X_N=2QRYMiljmbxo4i>+L5qJ*|z1<mAx`
za32!osVOt7{wHJ$IlAbx?!w&oav$3DlgPL1F0V(e&TQzCR3ga5Rq5$><C9I23p3;2
z8RrfIOUGU_<*v~u!54+YF6{3k-0tm1;ghFHwv5tY&p#CqilRGi?Zx-n;jq~omkk%u
znU{)gj8ECVo40N)nSDJrmrV+u-HlBg`y+ERS8Mp&7o4sR?|P<yE^2%<;y*Xq_WHi-
zGvM1%7qk|amz*^jeP}Vg-Ul~dgTxvK4JGi>#N;~_kKaumqr<h0=`p*!e1W<?8}~b1
z3<9Dw`8572`64!}Uat2#CE8L>SgVJ+oLq@Vz0s+8kIp^kQER#13{65IpsA)7oN<2R
znyzYD&_qFSanCq;nwXu>R9YGq6=DS+ii<6joR-8j@8+mhQ{AbqGQ0omd<4E=wl*?l
zGbSpYHqeT=H0M~_+rRXYBbwnp+pyD<n|gl)gPNL-oi1m2`OW8A<RKKEb(t3hS@Iy3
zJGXDCxDIqh5lrl!YV~<%SVB=*X+cL>4tw*l$W{D$zcz<yxoGqay7k;#u;r~Ex65|e
zuqv`!Jrn}zZW!!Ns9D^h`DVs5+7gsrRetGSKl+F1tEg0^RJxD)7HW4_jJlhY9c!F=
ztGHQyE8^yE@1W{TY<6vaOZ-gO&S767+nPjL@lE-zQdTO5<CL$ZZLn%RB<kb@ObP6j
zA)=*D^x7I#5Rk6mBhLk^v-|Myu}D)qO@0ktY>jDLjM8PJJTPokdno)^FL1)n#m-BM
z+!v@jSeiuWE)x#@1@D?le9;`^{bX@kDXZ-lGKIfolihu=^n9qSpG4J~`T8mq)1^|f
zX~t%#c(e3i9d~2(cx&>Kb=;gQ{kRr`rqMa_q!Kb^AuScQ<Mr^nF^@Psome`vL|Z>j
zTh(LOa=$sAYk_7g?XjhQ*`yCqRlpOu)$aDrNt!7IiOarjtkX;p)BBd5f&T2Cyz@%;
zv9&cfh%(o4Xnm|GTZJw!e*b2}5!?&WJBNOx$KCo_nksBPOY{BWbx@gI#N-p?bnYBV
zMEUZjoc+YPRA+4A*AQT_nTznIdiQ#Y%$CLASZCk!<gw>!`My^SInCaiosGrV;*$_b
zQsnKwyK$z=b$aZTlmOq_AN1*|<RDz5-N%k|&(Iga@2ti#!*$EGedzzO?IDr(@$6qt
zDXMBs!PWS=RzCRXX`$ostFWPwzD4`#pxf0~*-O=x?P-l$YM-$Kjs?T`*+14OzFSYi
zJbT^osT?{{vfaVQQ|)ZwrRqqDL5;~!m4a(tol|?NZr?24KFt(OTsw}-U3quCFtm#8
z-cuTLi4fCFshV?bnbh@4)YF0Lz1gyTH1QlyjX>|MyGRyp*Mpv|fm-hUdPRFmx$t_!
zs~fCc$EhB_^6h!oGL^15IQ<byv0-I!9dqrO<Jjf>eX}`SPT5ULx#Rlv#!1cG=9wG3
z5n66|2bD>H)x+jgmy^*PT=mGIZscHoaCyFnlF@?Fo{i8sl#LpFy{^8SpzTf*T37k4
z!Q=FEjP<;|Il|}ZD+f!!=C<~GzWCI}<T{zI#!hnf7103E#_ET@?OGcUa<oQpDl4fk
z=t_R>Q2n5$^6}*fs9*G(ty?rxy>PN*7@X#4g)lh1_5jI|@e|;Ro-O*Gha>zt;XB(5
zf^ZY!^1lZ+2R)bGdB}UZ<sJ3_q=7#$`{npNCHlh9gyXbY`={+!7Elrt`|_G^MEr$o
zrVgIUw-I@xY^M2>`!vsL^t-FXN;ti2$8$*EG9m0s*5_hg{XMTrJbl~@^^no-E98B8
zn-Xfc3ueO+nSn_T`2LSaK~Dzpume$o>21Uz#4ro^p`RY9P$IY^XaRN%%pl+bP{Lk5
zPqTZ&=UM$z-)@MJ(d{yH0zRj+zas0no7Tk=MQ&hBR*SF(Ugok8TLo^Vbu7$K?=%Zs
zNnB6Zp%hqY?=wRP*quxl0?35F82>n-3C6@V{k|gbIeo<dz24hXNsiBZ&k;g25cwPy
zKHa6!oZ7zCIJnk_k&_I^^o}?%;IT$~-9Ub=d~-A6`*4zMBqqkaz$k6C3G&W9m?8}s
z{zr8=N<Cm)4MPJv1Lo7<5HX+9A8#DU;HV*OFK#X@El$5Q9;qHUAu<diLIVv4Id<S>
zPfvEYFEeNV_*7`dNjLCTI$BERJDTcSY6^s`qy-JFrF3LK#IUxo6zA&ppdLsOB0>UX
zEln*GGgC7%fcOwm@f*+GZAyr=wM?Cesmw~~s^Kj^jwHSJhn;|-AL6e|bYu)91C&D?
zG^1oVJY!@;))59aE;=GAk{t#TKB!uPl0HV$>S?u!bH@N-z|`Ouk)fc`ccSz^KUv2g
zpdqBiqv0VWpP?f|I{FRR8RU~DGQWRKU|Y=i`Kle@|IXn@W&EbW$didBWge?5hT+Iv
zCWRg=Clck1XB2Z?h|H4vO!ye{YVhBZLr)k!f_TJsi|Z2CC9X|iAIm(Vd1Q4<sm8yK
zOEzX!6JCvj8xwB`_Y!SQtQ})F27U3nCh;7LJtDtnT$A=C>`ln}0|MCtz=;Cz&I1$y
z0wDXdVt@zpqep{(@Wa3aVA6x5^+QPmy7~ht1rXZ<(e|TG1M2pJzXgKo6W)Uh5TFDH
zh$Fxn4k*S0B=S?L!>RhO=;NxxH}<pE<6eVw1%O-we)jvyL%;<X$^*#;D(y43gHZAV
zyF*Y8@XQ0{2C?m<x<hvgz`ud;2HowW&lI5Y)14QoTr4!B)d)db=xD~}@?+1RHlx}I
zC^hrkh<DB7HIv^6(dDBr`8xl>l@(iJaK`D0mMu)WQ1;}=5v|KtTkKn?I$v?-><QJC
zI$Q9%(C+l-<;O41Tb#RK{UH60bS=seKrIJr*#{U0_s|Cx2a5{ei6_jOATMUR5bR8}
zEk?AE&WtZD=CBaZOjIp~r62+v{pCwt`33mzRgBpGqbBh`#EiCbGaBmuDB7(N001!l
zZ=$WKh1tKQj4^6f)?2HnWG*l5Y7EGd$w%xF$YjcRtwdR=CR(jY>50@_vN&4}N6a>x
z<DT=eh8l~)n=-Hc-Ojl6Km&~he4{n#-7Q`iR$<ot^!n>X>znvH^R!p>dRLKO6ksUd
zh{k<??amU{VjBU6V6yMCo#x(Wlh_X5rnTwDB1V+SJQyr!N!Vvoj@fBNC|f$5TN;%$
z|DHL8-q!C1eiKY}awycEI+-Xh<c;Zf=%-GGqAy>ZMb%zLlBd<J$~3A{TuPNh>I&Nn
zTE1A;XfsL&b(*a?ov`dwtxa`IHj34)Rk`4om+j*$nYF}IAscZ;f10w`T^G!1G-cAL
zNc(8i6|HR3xHTx#^Mq>DT=~|oPJLABGVCBUw?jL!tWKV~S5zO7zLh0vtwi`vEmMhB
zXgY`x@mA8LJ^myn)LczjooMegB~xK{ZdKP^*m_<X5%3bX)tXUWH4D4Dr#d&;0}BmT
zH4`%%Wd(3cp~_{D-K|imh?}qhi>pRr{@LKl%gxH}xt5uej+2vhj}h!qW_n$IS-I@w
zwS&&kDjmB`tgEfNauIsXdO=@=3#YQvKUY@;FGf4}W`%c2rJM>itI(uPjlgv;HO2B}
zcjh#64+x2N8!+-MQO2fPfraVLxvgEXbf~Eo5=yJ|I+xEpPBq^>zlfo&Q7N=^R;yLt
zhC>Sv^pd02qz$gC(k~FrV(H1hh#3SbsS?RKx<?N{<v6p0{sX@}5S4lkg@@|VA%enk
zyh_F{5+#v(t;7K<k%{eLT%dQXBIN>l#;&_n<B)oK)X!==)5&pA7&^F0M9+i;rNN?4
z3mDx3INjY&*c-cP+0agf<5-kW_RzGd<B#}0GLSZZs{w%7#JW>B4`?34Lb3-Jt?_Do
z-|!30jfzgn)t_z*o}TiSS?-Y04s9|@@D7QnVP~L`d%}hVU5@4?=?qZOrIo5^t0K{1
z_NjO~(hU2;BO%~pVIFjh;=9&)2lXR;!iLQU9*mN>fR<PdzEnB6w?t+PQeFOFrva}c
z&{zKW+H?8(4vrRD`T<P&TTd9yTwiS&sg)$Stmh<w2G?$4uxMLf7=kdau3~~0#YzGv
zFO5@gkDwn&85e|Dpz<gc6j<CGPMMPgWSS_Kz4Db<Oc(+ui5f^0S5iLTX+&ZHc$|ce
zEjSK4AymU$tOv9gFIQt>yTt-&NTtfw&v85qFF-`l!W!J;y7fmLs={0<p*7y{phu3g
zjbqje_pHbD(FSez462mi)d$vqPe=g=#hk$Qb;2zy*mW5Ju76ueTv$Ni(7_AjSWG^K
z=Ee<S%Vrq3KU<8O*ZkmnKJ^=+!KjUTai$;K`_NRFJ(x_nD+}D9Y}S&Hb8u;XMQAga
zaswQf_Z?3M(0iP`mnATm*d|jC&G;AbfIbd>DN!(-W1@A)fO;4F0K$EKU5tiW=$byc
zggMw=$*DQ#6vtybTgu;l*+O=m;v`&Wk^!0uzzHTkjx8AZuc3vk-r&+BFkYU0^-$}J
z1T;SI6*59|6QMH;sLnQ|=a<9$oc|Fdxp^9}({^Zt`Ws1qc5r?fmSFOrePfKkk?SBg
zmf-7OYW8#A<%iqmo~{56aNP3-dvuWi<K<8UD=cc@$n~I`kb%D}@U3h>kF)wlaL&V_
zx4@ySY_<nV_FVkF-(4&kNrTU^0}Ac%`q)ecX~_c4YeQH&d$xIR2%>ukp+_z0AztS+
zj3gxU^zqq|Ls>EGpoLvxD1-~8JV;=6(7@by5W&`NsMmE7jp+|=8*SPEu7zv5a3|69
z!$$+*@%Ep@fNV{10Y0f4TxgJ)D1ljI!6fvV^Zk``<Wv6rM}#!Apof}t=ZCy5#JHkF
zJwNyLN^X4Jdh&1KA>jFGW%-9oG(^fU5-@`K#qAWmv@H`q<y^!a=cweLU5Lu?wUF$2
z_O_-7_0b~fPngi~fbme*8)&l4_w{na0m1Z<iQ<YxgXO}~@yW%5#hb{A7myV{!OHG}
zjJffhH(KT(Le#NiMF^?3;id;m@zVGtNyconyhq4)@Q|44vS6h_?CG)P(c*%kvSdD5
z;t%Yg`hRVUi3igU<IHKraFAJ8ium((o<;LQ{LKy|!S7-E7h5pf?*9x&1z8Suz^8I>
z{rO@G32Ig$9Jy6Rs!|Ws&1<rU!a?enB@2e0Ck7P>ev0gy!gKM&x;%LBPzY^LJPwJ<
z978`G<S@pM;}}yyn*|t0K=DOfw}1<C=e9X;kcSI$1XCB|usVR0kM(~<s*iP?9X#Xr
zz)F7V=bW5E1Mg(Y*P0I;KO|73imPMS{BuX48SO)G)Wh8iMLIC?XZhhd-X1m6YsHpU
z$E%r?;_nqZN6jHC9v?6je5(Y1M=C;YkryBfAJuzT_dU`5+s4iJmIuT3_{Fc2!vQA9
z+|@l2fFKk?BP8J%0vc{GbsTydqN{F-O|Z6hJKX4Yw??0|pSN`F>i$Gnzz+B@a7O?_
zJo@;UZ;^a1K!?AkKZt1)wmJM$k#~87oylLXfX&S7SBBint&ADK{2-ZLTxmaubNVHG
zP(=(mpE$O6SJsVZdY&)eGehY;c;<e~f{%ex9Byg3qg<QG1L~oIUaXe6?ZTeJUDLju
zS^BLq47p!4e_2NBln0a}lzGU~vJN~WPfelp3k#c1OjEHX9P_@w=(0J0qt|+Fgsd^z
z(N7H-?yN0WA6vAfOPwRDlqne=d=oP{Cau}exr8Jo(xgpqK=Z5Bs*sTp5rT)&PbqlL
zIX!N_%OL2x--|)~GH*J$Lbu%?ull+4EchJ1WlcYlnJ`|D_nCR}A2~c5?|8pDpUWdP
z^1jWC4bNA-jM=!U7A|~CH{S1St~*oeIffhZg06(RuNVE-Q|zH8waqwfuaVzLsXg!S
zTDIqtJ_Ficg*!@+usXcVF|sPA5yx2|QcS-aK1=qQ$#ifmgD=v2gV8wvScV@W`M`Hs
zpb^Yd5PAHgB<o!6tth4GALgsN9&(AL>ZOc6_0m)}Z4tN{Y*elCaP6xYKX3Q5ek;G9
z=Yofw?~g|@HoH=CDk=N7{#LQ&T`@kJeJbmaCm2_F=hux+%NdXqff#O)+lMt`o?|Jh
z>^fzN<h95TSP;jC@pmQYBqgJbsU&nJs%#V`qaF<x0_>|KFQcLF?;!@4Ed)J@IWzLI
zF*DWne#eRAFi&tpOKwD()XP(7E>Wg5vy23)RktLt7p#>QPXWDy*cTKh(^_!vr4vh$
zyM;2V5WFJY^Nn}$+t9nlETi`mBoe+?q7jrxPhXpjpVZ0PSF{d*Z^x+<*rr^n^bH_y
zyaj|?sQraVo!_d%ta|Kx@FqRSxL?%`oOgl?5}o3#=lG!FARIErhy>n6IV9s`a}pV{
zaSq1-=>!W#A=(6vM*iCPD-tQkV*bcuFbz^vMN${Tc$Y`+4iR24UOCqBv~k==7RNO2
zAnz>iDDN=q#I^CY3HWjNiMa8&fA#;4abL+_fnSkdsb862vE4$xazFgL#IF;2<96-+
zel1}~KmWsF9L)cr8~u+a_A-@g@CpV1K>42&g=qgtlQFQc{dWuNqYiD4a)P_pbC>wp
zJ4IvN2#!t=5CV+=AwlaVCJqtT(@je#_qb1?&FYqAf86MHYYWu8l+i3d^=3X`wst9w
zTQ6TtZ-ctkpj25nE1qIm<3bIknB!onB`SZVZu7ghO9sNVl=W`*`|0P?_j~l|$~L3J
zqXI+Ryv8QNNhInAJCtt%Gi2>rk&z3>QNby85J$|DSVj@lh?WGtlo=qfW=EprsVgWk
z@tG_fmCcAV;e0EviJbzQED4XCuP%wHrYKv&gglqTl%dQ>i#^eNiKQByUKnd6wNr`3
z5m{QoVy1{I$@My(38mmdT%kG2<Se9$+?X1!A=(j{nz&*xSD%VE#qZ5gU@nfWr1S7>
zPL`qQs3@^Buj4P2aMZ<J(VTr=&`z8YowUpDMo}Rxi-WVmEnY}?Rgw^0eIY4~z3Oyi
z;qr$go2hhaD3Ys!ECx(!U;L`WKWh*V2wnwGCA(`$VnyUmaYDdNG0tN0UR`q4G4wdO
zmL1kcns60s8NXr@FL7ZJ6Ploa6eCSZpP`g2xuCd`B)k@5LD^GYwgcQmXQH!X2~(sA
zs=#R8uHc*-Wra+~#vcZC4mP}jGOci|#dvglO4ksjL<(>DBBju7J*l7Bl{#VBvA{B+
zp{K*aMsHO7j6Z`VNDyO+x#GxkR-Q9Od<LN3L1e8!x@bOxTL(|AlMxL|4XuSY$dsr|
zal_hqt-bIfU+l=Ub&hzlm-|mbf%yyt=42JNrXubv%OY`PrI$H_p(rOA_JzVFXZmVN
zM$%xh<)5L*(?t|4<q2#V@=-;}=-3HqEYHmzdESt2Ms;51(p2!thvIP&gP|0<xl(H_
z<rFVF2*roQoPbG6`>8U)eOvK%0S-iLRprg_=F@^%ROe`ZNbC~A9C-{dKRD_+W4}1y
zfM^P1k0dt~>O+l%p{J{CDw%}|CtcS1U5zl5Je2D?J3~?m{0zWpChjere&}lex<{FD
zLU9GKj6|v9ir%e7vuQxFfgM2CL1Iw6*9X~&yOdxg$dD2-(2KM2>2~3V1S4^nRbsph
zWZ9qW;A}+<Kqb#SWI%F{A#jleZV>uM(mFjyD0T#d4IxDYRB<4$;pt|Azs(-Q3io((
zxivy^A7H#c!!vz%vwlH60Q?|-6Ppo!bW_ydqCB3Oi~4o?o7nrJsrd1wYy%>pIWZ$f
z0zJ@@n*=Uu4xki3Abs1R5D7_^L)r!Ngk2q)CfM1cMw%x15l@D+3-*TSPeI9tP*WV7
z<mtl>HZjoVa-BiC5EN`XrFNXt0x$&ApEMk31+w#x{}$lnA`?c(9jSquX9@G#Cpvs!
zOvFuB$wvV~LM9>`G+@u83D)J?lASiupB56FoA=0M!;ePMm;l-vl<x`TGBoU?bgX&~
zLp0x$$Hx;pUg`I6oPz!#&xI~Hcgc<Dkx%HzrvP$qHgZpm!vAv=si0g;qo40SVzTXx
z(!BoO{Nm2~jHiv3z7D=C;xAt<l)XpZ5!E4Li{BPM0!=6(;D{dJVo;WD@IWNL8OV{^
z&&Q$y7(a7^LMR;(aV9Vf+D}`1KxrN(532?@@*FnV?#M?G)+JAJ48QW;;kXAvah96E
zt_J0!_0JeQBsHMVPZ<@FuVTf3LPj?`pv_U7$-54WK<8f&o|2r_Oi`p#2Njqh9*2w!
zX&!9RCe?%`c|`s))`<J?Db#XieFr`G^B4hCm<3AL1$hY2uLXEN39##kMi)jcsIV*8
zU=Eef*X5j1l1yzf0UI7wMnF5USyO-ANg=K|bPgcc0gy0qf~T7df)P@pF%IaD12Dd$
z6rYt1Av2_jcq$o>GBZ!32$`{%0pZWpqngo<;@N<J1|!)lgY2)Lu~Hfzkro6ci-=Qu
zB3Lg85rLP`CJS(!QW)0XJPle>NE3@EK>HKVQb?u!g_xeaLQgT(Zu#nHOB$sn8{^m!
z#RqF@)uBRNI~z%<J*|Pz;I7h>hRA=KvP}e1=Ny4P*^YHw3v$<@O%r9sPbfalhf1>#
zrETa?6H%qjjRWU9fr}!I<2*26dWdl9&^vy(;%@;=qGfhSZ`_Q;sL9l^q1uU|y?@9c
zZSx|NP-Ou#WsI8c-b7YqgN(eUFE5neLRK4+?ovO^87Gbn+`{l+d7se|s1;XOHpLDJ
zs?m+N4fp6@3AfpL1QOE#*3P2%OiiE*&DD{ci^R=C`j*7~_yEz`5wT_>2M%eth(koC
z3x8D;^m17yi~3M}{>nfO!j}z5mdXk2&KiX+jRPn}(SImvF1EbcK2z_IrM^u939)H&
z_#un<slLtau?O%PLLKWkecb!ccZYp52537uYZ?`iMd%E|-@NKbGiXVWU>eu`0u*!!
z%1J0+>uDd+Da;M7UgfAeK4GO&FhW@u#0%vJq)l$zJ}_K^(2RK4wqP{5W6Yy{4+cU1
zidc)AOGj_9>RohB%I}JrY@>cWP=Gsq&hD1pj%z4Pcs7ewt9I`^1T5w3N$RIA7=s9y
zBCaqkl#7ettP8tl#D~ZFLF`+{H~sQffIxN|Y>O^%H`PviYzw^XOeV7pdT1zaM6)(?
zNb4Tk(2jjC6(de*!Hx0%_4Soeab?@qK#*X8;O_434#8aucXtUcAwVFw1ot3?y9P^w
z7Vhru?uC5xy<d0UOTX?_KkC#NwdS5@k9~gZGxwScRbdf%gT`x3{$?USM*!bi-T)&#
zxXO?&w@@AOSWpd)yf9=*njXo!DvfOq*_H5PD9UIktpJM?3WIPq%Bh<zf30rIh#AR}
zVv$IP`UgI*)(_q3HEcz@7W2@k5je4Stj^4RErClm_5HN)RU=#$mW9ln6Y^zPsZGS6
zd`NK|*w3g=_h8)lFX_0ybJpBq;TWS{|A6ARY08`<$KbUF8I|lgO-!!7u0cJ94o}tk
zkxNVfYr}IFKnPpN-)HO)o24G)UU;m)QTdJ`T33Zj{0lTeFxf}!Oat+QJZ7QtJlv+5
z-oUF@TM%1N)gnwJNShEJi%odav%gHR802VQ^Q)7NW>{Ze9P(To-7ti<Nq%NDM5^oB
zPKHXH_r)CzuRObyGF+lN<WVQ8VpVRpN%QMVn$Y{%(28f3NElk7YdTP@P|j%hs?J@9
z>EMV5qhrJ%`MjIUu}xNSpmA<n*Gk(yYuYlX4Qp$`KC9;>h3e!UPdL?dq>gVSpG(2V
zx}&T$LnO0d#UdTp;ZI@$R=AFt=Sl0G6vBU1<5whL3})S}M}%7)w*}iZUX?MPG;HY}
zm71?)aqK?qiagJOm3!B;q_U7PGF#5~cO&;HXA~WubE+H;n3IK{4kLGj^v0i-9GPEM
z*mcqjv8}zJS4H`GDZQb(VG&)MuRX{0@PPf@6(JmxUTLDgvisfW6T5!k6Y{_aKR2%Z
z%~M+et&VH29xJEwx7z22vCrr$-aCOa#I3}SApu`6rz`g1vV;pA_}zQu28IuqCp#Li
zA0H^kENz#^9acJ@&l072PXc^;yR3U#4rwj&ck+n8;e|U+BxT)MZ}tg~C6%_sRQg@4
zF+0AazUjC<=iQhvn>lT{amupI4mcy?OT<3yIw3_l*p#9+WezxgnAy5;@e<eWOwEWq
z>ng6O^amgB64UXcIPkDJId3S*qC||eYC9j^H}qVc@eX-hw+o-+UThY=^L3ieMj2p)
zJLej?(_Op(nL3bfns^8|ZD4n1Ki(HN4nOXB-d8L#r$&G^AC8HWmw9wN&Sg*AL+iLL
zg&&7Qi5(wrEY}XQd14Gl+V-39+TC+1{T$U+gRYOSeQs9K8E3G4Qzxc;W|riA_!Plj
zOIa~{kB5zSkU2u>D`)k~iYJznS@3+~QP(_mknim9`9|XH#jg(Ryabgqht%d|eHgzb
zW6cR<PKCG+l8i!a_dGGzc)hi%uopM41s;V$rH{a+8DC_D8Cy9qE@gj>*N=u4?=URK
zPqb`z&3~w%?<NkhmFvu2?r{Y794{f4@1xQ%>s*J;EBW6<v+#0a28PE{%2^607ON<J
zY(6VcO0;tccBY*2v|-3S6m!a!W;wopn!fG<7}`oryjDX+f-W`D2n4W@Ojo>_s(2Gh
zb#z+zX&GQ^={oZ|K{$h>OQMOY2x%&*Q>(4ovA?Laa)HYQy0c|3t=ZdKK7yV+vV)Bj
z--Ys)D}TI1Rpwb-t=RTV791di8J;w+kj~DNk-yX9kM~86CdjmNddJ<amF~x8kcvD}
zFp4*?L)ZE#uKbKs32Lmx;cWn}_^Eqr(46gHlp%o?I-@rvErqKztF|;&T(=p#wro-y
z0vZLq3}G=<y_sN~bns*X6&fXBe!3k+TYk13RS<<@L)_6OR!szrS=b>3E1QU<%%??J
zH#W)ZSijA5vyTKME98v{{Gc0<u?u%|@}d+G36)pyL&`&B2ANVku#bI{dy~N{T0qHw
zggHLB&+FXh9KkE9RdksQ8)Sa2b<W2to<-F`224#gEG6iC!2V}(0Q(=eN5Ac<xb)>O
zqhAz(2MP#?za?q7S=+h(XH(?{K=j3!YR(4P)mU=#8pmtDc7%2JVhW^$mWCRWAz?rW
zMkNtY=Sp9asi~UNcjpeyk%kam7WLvS`XE3ThC<hF<M$ya$!F^8%>2A&MBR16HR(+0
zw4smgy>Klx$i?yG$=`CDf8}oY=BeG)!g9DYljL<pC4DK~wk8F3;%l*OM;K8aX|n41
zcOYe+xT$<<U?U7;RBn|6%pqbO4EtAl_;jfU#&72DI50p6lyvKX+mTI7-MZwdj8A4c
zb9kTAP6j0dcye{)&oo^-WUx8s_T#3sy#)?qILw=t+Hg)Je8`F$Zy$10##fb9Q7Cjb
zyt#Tq$f8uB;;LsOl^6Rt;6w4Gb!iBYZa03&3lhcjl(QIB2xG+k9HS}~#wz_(k^1z>
zN+6Xm`aWouA07N!)3?wXVs>E~4}L!?H;kw*@W-W9KI^1J&U`$?cRSP&amWBRjHp;>
zss8e}ehNqk`=W-bF^v>1feLk>67(c2c)mze)u5F}Mdfm8wkHj(6uz+^^JICeQ))?(
z#yH5K%Rm&B+9I-yb^#YZ$pM-<jDha-UhL7UpC@*ha?L9T0c>k*ML$YeBT?Pw=&@l~
z%Wkr%2JpPBGBYZZr-m>_ya{q4$z;MA{0Haky`(`iSAqE?INk4MbT!|GiBazGe|l?@
z;lk0=juJHXTmfc@M{C7Yhy}8yaH7b&cypy`w3EZ+Z{X72%ducc%f4+JYT$c&27}W-
zjT6&wn<6+TDtPEP6IA=KmS2F2;zP4$YlR$=TS_@cG{K&bizS$q`uf0o@Qul)G$z2T
z;5&mL+7!8rZIEy-`Dt*Q-w$O0lqZDU&(L*c@{HP#$nZ%Ss$Sa7f@zUcCc>HojNWn>
z(QocQTy(2RM;H(yrlcaDm;)IB=(Ku@D6dwa<w)=*Wd!gP*g%i-4`+Rgv9#U>Y?{Dc
zbZKNPbwpj2^>4zS>_Wz-JnUOvUb`CN++W7d;>{;~i=h<P6sK8_Y1B1&@>FFQ>={gs
zOfwjas;)`H)+Pvt&a?kK*ki?mGc40>FM2Up)j>OhRK$#Tmg<%8_Vv7Ot}hv`MEhm<
zK~aJ9jSUlmnZws&GPJVQsWD_-^r3bq4!m0EN11?zcB<em+53cYc#5AkG-bN@olg||
zfbqOFd1z`pt^_a0OQ6LW^LDM>Z8c(HpeIM%t^unhQaIQA!aB|S30`f`uB>4EZa0#}
zr%*DPHkp_RQ*Y;@0sQNvM8qqs+Hru==a7$@b+4y!*cQa%KEG?Wr{!jpC;gmyHZ{hD
zFUJX(KpU{Ni-HEofLFx<<!}Ykb&T8}*$m5>x9d}<R#EOZ6y_MmjBEki{IPS@<J*3S
zj@O&F@FA;uyR?T>j3lVn<fwiK6G7b|Dy%2DI-s)giL<o|uHj{+OaubEnDYAeA%t^y
zlwOKe4+Vh}<Bd@eP#Lw5JQ0SpgXqx!OSNMH2b(2aVlI~bJQV(G8u{x_HkiTW2^963
znp?6mNhSG6pi@+BEQy%h;dqImQMPmLeiMag2HW`VqUzVwoRzDS46;NPnlXGD^Ljag
z3-_OxrfrF(?`p_;>TI`LI*Xe1(Qa_AaPJJ^*T?iUMq6vly2@&#5(p$i-xVI<j0j={
zPG`7FLu_*AN+*_&ezi+9H;vJlQgxLXV*pU6L|8`U;oiB2GGq~h(f5MZM<~z2ElwD5
zElaoX&PvD(xQ9*KL{L8u%D0+e?z5Yx#du{oCzkh-fljtmjAKe=&r9N+m*@uMqc^W;
zM&;Egg8B${PHnzx%j*$20dwI%js4XQIl**E2od=h6Yb%LaZ1uKbcZ)IWX3dd@b9HD
z<H;bC`mG(~cyK66#S#Z}$+xu63A~tsXqM<E#WMylb|8jH+7#aU@x(yMP_&uH*vP8L
z)B52gHD@Ux$oJqX4&kdcctZ~@6Gz?y1PLQWAVdu%SB;<v1)*WAp&yaPu?_I@!}9~8
zU(~E_l1BzQLN<l3K?od)Zzgvk;*dTR_u&+%s!|q4ntaMY`ci)^QSwoZ<Amt+L}^m+
z^q#~?OfvQdEH1=baT1f)Q~Of_O)~{D-P8myCHfkQG@i)st*(s~gOKXHqmxuM=<=9k
z5^cZhDW^d6P1;0|Y6PiIMQMOphA3GL--ww+(G(~iI?K{6uxR8CL#4=uPL1c1;r3$)
zi)DuQ3h7m7(PPeO6c=GJ-2~T)yw|QXk*URS)X)e*YVaNwtD#k?;W^ikCZL*0kNX6^
z9Z>I@3nB7C$CE`i`-H;Xhl(SF(2f&FM-$b&OQV;sGj-v>r~2)agp_$)uI8OAFCk@o
z90BVfogpTuh}?~%KM_}1Oo|>e99@PB&uho{+$PfkRWf~q71mh{K#H~!CB|YK)1>TM
z^y$4wG-sbvS&TRaf-5H{CR9*l)$7i;T_x*C1+uEK)Rve-LPWY!RTO7gT20APUwP)#
z+M}rjp7`W+0jA7QM>M6fW}320B;kQLnPLDOD3E0~^de4a=w>xzw<-~IjA%Se4wUq{
zUYZ{p&EjlgrB@{lRZ;c62`e3~5F;cfRw)&CEq0t(L?{)YZpke})%M<x;38q$$wofJ
zxmE+6cbR0v_{ON|d*0kY)p~K#XK@O2B(^}t@1*Z|p$OJFj*MYcp%>U;_J3r3hE084
zVoel>vEwO_h7LL~YFN-4Eym%ya-Wdd&8Sd+r#RlKwbp$Q#vVsamZEFEIZ06}dC$VF
zyJ`_((tyJZ3MvAvDu34r?3O!MKpyn7iZMg@L1o1~M?$Hbi)PIc0*p(pidL~{(43}c
z*U$;yLJRuBDr-mgeu8c5)p8F=y;S7h=9=!cd(E_LAsR)qbUy%e;m(ar>lv)5P=LR^
zrI~sux8f>iGol{5UPLm1<U$|p6-c*ZGiOOVuN=%zJc;*nDWF7x9<dH(w|r)ok*vAf
zOik3R4n1zsCRVnHb5wy7Bu9|O7}mXq%GXDc{sY8efT{ryB`?{PqL)yGe=7S~h;K&a
zmV)>VmvWMRtH>o}vnu8I!lZLmRfm!=x0Yg(s@8@flXh;WTBPdJ)(2yQaoxF+^Bx{V
zXYM|G2@-MCdpd5e>2NbD*D86-@1@HoeEMR#A~}|#P-<&jL5puNZE}7drTGcPo4D7@
zitsOxxK=4ggz)M+r$kH?^P5ZoQf*^xB%$y3>i~m6$KpfeK-hYBA9pHTi#SzAk1Ww`
z(_S(fKmxt^86U-wiSjgALPmj`&|&$OC<phThiSBfwIRY+7|mu_B^G@v*s|fsisk@P
zsO^sy+l4x(rqhfpePq%tuS%F0awBG|H{B-Qg_mQo@2~<@!{B$Zw`yKZ8^`hmW9TPk
z=I6(#Zc)M@*fOB$t|<cS;!}Xcd*s!;Z^Vhv0HBX;&<ISf6MTi-5nHN*$q;Pm&Y5?5
zcCqH8cDx+ACTymm1bc{<w^eJ=HcFw=NqC!zqmw}v+VE|1VmNdZg+KLAnTI2{c-WJG
z!$?g7pIuFxy&M*`<xWsRglP%gnMdst3d#IO7pkADkiuG=^r{&EZwv;Ngn}4C^I;jL
z@Vh*e3gsnb5i|r3B|=OA<ZAazw=JUkZwpl%_CwEjd>#+jVA$r_MAwHDzsUPMN>G}{
zqWcGN<kOr#VZVC?v|^El_?Qj}>!QvyMDj!397fxm6F#lGm=}<SFCq11shF#i%|I%9
zN`3@6C9GE@-;vywzpuh5x#N8hhiDhV>@+}D>$~N>g3tsa-yywqA5r1ZwG0kpoeYJ7
zLkYjIRs0<$JP?Y9(_@ILR+~mjL^uR(D9oYGwwUHUvv%NgF7{-QbA+H+r_nc63i|ZP
z^xKeh0l@_pN9%EiG$|yskJ~C%MD`_+WEQcQB3-NH8U-AMUyO^kGew-%4RX|JcSjW2
zO;bZ*MVk|+_KD)UkkNX2G<aaq_If`^$HQDY6o07*(Vcd0*`f+gAT8wE)7jL_XqZE<
z)1WAK_@;xl<W|1EKconcVlwfuj3C_`(}mwgZ5d#XCYB{rx}Jq2GbSlNKgrlN?gwv-
zxp@6tT$R?au&|34SGE^FJS=~a5<aXsheY^;)O_7DBz>MYE*VMEp$qWsa<un}`EkCT
zgEqbeT)iU*s|^Dz4+PId72K>}E6(I;9uJ}IqLXQ__$47j?(kH+N|&_`om^_<^W0D(
z;U}Ug#HUbx`D1)B{}7kck`bB%hctaxo%nqr+P8Ng(J`3{gl^+vx7P?@U)DaGi;0ls
z7)R`HcgWt^(#NK0Edb)>;2Z}F(D3jHV-wECd*OzvZ_75ZT&D(z#tS{;M}xtHB97dS
z05hw7_fEWFyo+xVp9<FN^Mc*e+R(Pfw3QvxMLZU$#`POOZa``GZ`QyGxs05JL@0d-
z58(GT2>#;HrBTV#Pt*SB&o8%zSbzLU2L*y(6{Nan5GP<2+Y;Cgmo=L{pc7M?a%}34
zzeNKCzpFu6wc-I2VM)S!<UEQ*Lf*VTJtHqRYO3~U_MYV77NnT!Pg6D*x6qgyJ-*d&
zYHLoP>ID>AZpWSz_-<2%7>x&pVG-rR3j636OgbeR7YS70&)yvtqR7-IGNsb;DjF|x
zi(TKMc`NfV$sE3yk5ALT{2<B|xQq_f?RPM$Pc4m4@*t##7LMBfieArD)b@nf!a%Ud
zJpbbzYJOE|IHdh77**c%Bow&@*Z*fIlNUuJtTs<4iSDVkH`>jZ>OD>WwhG8J797a;
zQJt+%vu@&ZVz=1M%{mv2nz}ZFM*xF^5~+HKzBR51oVXSHmsVV)ZNrB4<AalHH;fia
zn@H>aM7w=sv6hFy;|<*wY~&jBp$;4iVT5{1%!th@QTHEN;L-Y+su-i1^!jJ=OB&w}
zT<keh01tvtmio1TMXX}1Bjh!JE{^)c9?@iCI#IIF_YD1gV(D}xd3yl|AtjtmQM(S}
z1*Mv!>+*1_bKMmK0_Fr|E8uMT$(ik?57}Lo==GW;C+=npzM1fMS_q}UB+OasAOeV5
zLJ`3@a>OAlZ`ku$m?x47I?_#$xZs+9SO~r-f9=opLM5_APrB>Ov^La;PhRz`-bKiY
zkNK$>O1My)haWS5oK|$J<T(-)Bl%T1d;=M_Ohx(X6f4|)q-AbD2S$>{U3qASX-krF
zQ{bjMw^N-Ir`0dM8-<2s`rQ)!TxN?Cu#A<ex`F(%c2QGObAW>3`GX_|3QSVPe6Hl}
z6X5RQ4I&AxuEItX79p%e0}a6PLJbNT`AchXzGrmHA+ItsT2Yl!p-`IxwuNMmRj$(X
zM!%Xpv!V5a!F`Zb+7LccSCkuOKzUcv`e}q4l_<EoGI{huQ0r^&C=}ihJ@C%U(m(!`
ze&^2Q9?uRC8_8@Iz_c`t*2%vi{$YZJ0txYXg%+_hoYZ8N<<K|tkvXyV><$M(5;hd<
z)Y|8KID?%>+>87jr|+)n&PZ{m%gI?FNXp5RqX$Q^bK9XkW-M~63eHpvPatl>;|E$O
zFn>=w4OVH(XMojV&C}wfd=VQ@5~f$FH|58X$>|`6@M4cSt0z`3yW!ICo~7yFnx~{z
z=630ijKTpF+{I_a@muE2+cg}y@m2GTl}sEDXCb>|r3L&|Csc8z+P1h(q?In??F;3!
z=LBEoAl->e!`{8rm1BjCW+4R!DLjAiG$k%)!4{XY2n*dbltvdkSVyHG58&BKW<#TJ
z=nbNPfrc%>0Z}Vhz5RF{JnWM4nAKCX<%yM!_og;wXp1GHQEL|zC;O(>cfTanB$qll
zA~?&lrWL>UEX22pcwj@6VC0f%)pI-DfWYEPvZ+iy7nsFn(IA<k4{myXb6T%;_s!_B
zgeH3C6~VK>&REDw@0sx7I_M1L{4Qd#km<o7`Z@5s&#PV!mv5HM{$!ma@RYgfmr&{f
zRzu`1Sbn&V-DX~6E13&IUP71tC!*${5n;4pzw_<m(*;+cb`k-d_8jZp`FBgY!pnXd
z-{(NsjRRa$7a^TpaLL7hw%mbx+46%h_Ozl;x&1u#a7jc~4das!T1UHQo>PBc5aR?!
z2iBsp8<^*+at0sY|8C1L>G^H}$Ry$La5-@y{5+9d>9ELk1s+1~u&(rYzMiR=Wq<FO
zt;@F?H63;}!3z{>FnD=6m*JIX?`YRKY^{XE@4E0=ZK20Ev({wD5XD@_4hE3{9I80~
zFumsG(|W<odo+SPcX~vOO`vn#-HcL@flz8c5ObYS8*-kG$kk3MlyNrrqHBXQU2H>u
zh3^SKx4nVH$JYKkTO-5_1eeDtG1z7m+V>OGPe;AU8^ZI~+uIR|;vKC?$$l&8z!ENZ
zGym%61sSEN@Z<L*=-x-O?2nSyw#x&;=ZpNLIDEcBr&FueI(_aWJl6%Cz0a3VO1qZR
zoDG1hv!fG2As(=>Bkt-=RLgUxYtQBV5H&v^6NB&mc)FwBqut!8Em#<Qn{K}Ha8)7K
zp>x>bzCAu|m}MVOc645F(l1p1u9QF9_bNdPJNbES{|XzWBUVe-Y22q*Z^N^v%1|bt
z<!I*#+>u_rW2u|vQ$4f17Dw7vlAUqBFYNz37tq$>b+?49q!Vr6N%2zE<~$VO=vSt7
zQ39kk%QjfmCq5AH^>z0zbA2i($<tj9IN$T=^<%Rf`+_+l40fwAUI{rP_P5N$5MuS1
z$q5BMkg7bnRGO+W3q8*Y-XyJfHP&A+pVvi~);{aq9Z;Xcwh<DlJ<U~9wqO}#ob-d2
zF`dtzn4h?IK64Kn0{Hf~TBSOCZ2_lS2@B7fPu3^W#~F_O$2gZQ?=A1&GvU`<l*Whl
zm_Pa$9w2tSRuH}?evhx1<z;h|Pik4w*1i{$*_dQ-ySt%r;OOh;x4bg|cS-29(K|1+
zP+-3@_i&;w(B`hT)GYOrc}l@<rH(I8X_>n;Nyqld;7((CpT4sr^U285M`+IfG_xyw
z;Eq|~zPGYN=FrBE$zw7K_Xwr(dFTN-xxo%BH3l}P7s9Iz0LKK(Ec=Yf1>l~bFdW{<
z-ko4)YF{okE1dYAmFl9v(or1?+RP^7CIPVwyhf%TZhJg-?0h}e5(g?)GG_rsfSwtH
zB7Emuhry9$*GD^EFH5c4@+9;Hk7>d7hqV~P)YjF-mbIP$sQ<V|#QWG`Xkhd>HZop`
z%Xs3l*h73KB_dvOPiCr<%t!f7>Sshe8FoCLnQCw8!Ct#Gr(cXEQ${vf^o8szhk96w
z5tO9r5|@>zg1o$%G(0?fJr;BxEkK1e2Yxrzd#Xz~V>21N<k$}8v9vhJ9gAfa^zFGk
zCPY-oUm<ANcZq3ryzG_oC-$|yY#d+a<c2$xf<<vF(K_so!Y;41#WOTK+foz5){1IH
zq@K=FAD~aKUQO%Jx&>Y_*fo<5E8R|%7c2Pb&a3TQdtwvdAyn{ruUBJ-9v`lhl|AHe
zpY%5fyFkT-awDo(7hj2)9ioaY4z>O;W9dy@ij24mo};{CE!5qIv;a<87s~CUq|T(s
zZ_`}V+#Yzvc**Ps4QdvnurNz64G1uwjCR%y2xhWpC$_r|v#UB<&YKkm%~AMO$k+Ar
z&&tbN5RAm*S|Z62n&_BGntvQ$p>e)Mk2KuxO+{99eU=Dneda6FJXRa}BBMXJNPi)h
z5;E@5<29Z=G?;iFJuX3ZT(%d+rx+oJo=Urrk(R>VeOUr(SUk(7YQeiP*HEu)$9$7a
zU#Ql3t1dJF+R49cvIkg5m@yCBZ215M=#z{F+{Nb7M8+pCV~zLIF4#K5!U#Im^JEkp
zL7UBmpZCm>WzFB^7!>PZzI7FV$@(c0$h>;SM0fcG12fSR;pxqCLwhSzj(TOq=P>Od
zNfQ<4+!3`Zx4WRUw@P)7=q1I5)Z$Dr+=m2wLk;oH8Zlu$i#lpwK|{6!c}c}BSNkdZ
z4I3p3kyBh0kwux6y?MqeZ|%4;h&*_FhDR@rYIu!ONa<tgCc7A-q0>IPlP%{{L6vmB
zKYgoBb+V&=c;69zr&MH?4Ymyn$gUmq2kzCfqFgv*yTrl$^lKtUAt83~ti}*x=ybuz
zgQiyrXQL?+W85DNqh9NkgnlbdZ%1)P&|t*RcSh%)gSa#svNRh+)Ef2Z2?ytlBQ+Yz
zaqo;oY>L`@Ml000a|<Po(?c7%$}zl3(Bt$r`INA|7n;cwO?aKozlXUg8ox<K`-?*3
z1X7x~tF_S9xQULKgaO@GH19A?Wn4j7&V04tuNZm2;;48tHGca7`$-x8;&g8-;x@U^
z;uxW?{&+d0c!ddW;gY)?QR7-1l7<sJnLxx#Wc{y>)*qQBP~4*Y3l7}mK+1ZuN%7*O
zHigJ_NjAlS(;Jwq7aiFN_+aXP);<1K+we=mco9Azp)g+kb3cF=$>cwg|Kb(%S9gGa
zPWhuhz@L#1N&a&A%TxN-Q2%$ofM0oX|10kggg?3m{L25o{r7+6^S_Y&HSVAJ|Knc&
zOW=1u{$Buge+T%(4Ex_0{C5if3y}}`_|i+^HwORTP5aNe`!9vx{j`5kV18*5@C)I0
zckO>;@<$EKpOFvQ$@FhHf18uvYhr!@J^UT$uL_xeqw~jE#y=w;GMx8sbpAs58=2qF
zHvWS7@jJ|4CmsKd${$+ZpOFu_D)}3gzcBtr<aa^v7YL@*Zy>*(t$$<k$0PY?<U`IH
a{v*f>6_}Tm00Hsl<&*pB<=g0wU;hslJNniD

literal 0
HcmV?d00001

diff --git a/c_src/eNpc.cmd b/c_src/eNpc.cmd
new file mode 100644
index 0000000..6c7a1ca
--- /dev/null
+++ b/c_src/eNpc.cmd
@@ -0,0 +1,4 @@
+@echo off
+setlocal
+set rebarscript=%~f0
+escript.exe "%rebarscript:.cmd=%" %*
diff --git a/rebar.config b/rebar.config
new file mode 100644
index 0000000..64f3e6d
--- /dev/null
+++ b/rebar.config
@@ -0,0 +1,8 @@
+{erl_opts, [debug_info]}.
+{deps, []}.
+
+{pre_hooks,
+   [{"", compile, "escript c_src/eNpc compile"}]}.
+
+{post_hooks,
+   [{"", clean, "escript c_src/eNpc clean"}]}.
diff --git a/src/eLfq.app.src b/src/eLfq.app.src
new file mode 100644
index 0000000..b693526
--- /dev/null
+++ b/src/eLfq.app.src
@@ -0,0 +1,11 @@
+{application, eLfq,
+   [{description, "An OTP library"},
+      {vsn, "0.1.0"},
+      {registered, []},
+      {applications, [kernel, stdlib]},
+      {env, []},
+      {modules, []},
+
+      {licenses, ["MIT"]},
+      {links, []}
+   ]}.
diff --git a/src/eLfq.erl b/src/eLfq.erl
new file mode 100644
index 0000000..97dbdff
--- /dev/null
+++ b/src/eLfq.erl
@@ -0,0 +1,115 @@
+-module(eLfq).
+
+-on_load(init/0).
+
+-define(NotLoaded, erlang:nif_error({nif_not_loaded, module, ?MODULE, line, ?LINE})).
+
+-export([
+   % create the queue
+   new/0
+
+   % Allocates more memory if necessary
+   , in/2                         % (item) : bool
+   , in/3                         % (prod_token, item) : bool
+   , ins/2                        % (item_first, count) : bool
+   , ins/3                        % (prod_token, item_first, count) : bool
+
+   % Fails if not enough memory to enqueue
+   , tryIn/2                      % (item) : bool
+   , tryIn/3                      % (prod_token, item) : bool
+   , tryIns/2                     % (item_first, count) : bool
+   , tryIns/3                     % (prod_token, item_first, count) : bool
+
+   % Attempts to dequeue from the queue (never allocates)
+   , tryOut/1                     % (item&) : bool
+   , tryOut/2                     % (cons_token, item&) : bool
+   , tryOuts/2                    % (item_first, max) : size_t
+   , tryOuts/3                    % (cons_token, item_first, max) : size_t
+
+   % If you happen to know which producer you want to dequeue from
+   , tryOutByProd/2                 % (prod_token, item&) : bool
+   , tryOutsByProd/3                % (prod_token, item_first, max) : size_t
+
+   % A not-necessarily-accurate count of the total number of elements
+   , size/1
+
+]).
+
+-spec init() -> ok | {error, {Reason :: load_failed | bad_lib | load | reload | upgrade | old_code, Text :: string()}}.
+init() ->
+   case code:priv_dir(?MODULE) of
+      {error, _} ->
+         case code:which(?MODULE) of
+            Filename when is_list(Filename) ->
+               SoName = filename:join([filename:dirname(Filename), "../priv", atom_to_list(?MODULE)]);
+            _ ->
+               SoName = filename:join("../priv", atom_to_list(?MODULE))
+         end;
+      Dir ->
+         SoName = filename:join(Dir, atom_to_list(?MODULE))
+   end,
+   erlang:load_nif(SoName, 0).
+
+-spec new() -> {ok, QueueRef :: reference()} | badarg | {error, Reason :: binary()}.
+new() ->
+   ?NotLoaded.
+
+-spec in(QueueRef :: reference(), Data :: any()) -> true | {error, Reason :: binary()}.
+in(_QueueRef, _Data) ->
+   ?NotLoaded.
+
+-spec in(QueueRef :: reference(), ProdToken :: any(), Data :: any()) -> true | {error, Reason :: binary()}.
+in(_QueueRef, _ProdToken, _Data) ->
+   ?NotLoaded.
+
+-spec ins(QueueRef :: reference(), DataList :: [any()]) -> true | {error, Reason :: binary()}.
+ins(_QueueRef, _DataList) ->
+   ?NotLoaded.
+
+-spec ins(QueueRef :: reference(), ProdToken :: any(), DataList :: [any()]) -> true | {error, Reason :: binary()}.
+ins(_QueueRef, _ProdToken, _DataList) ->
+   ?NotLoaded.
+
+-spec tryIn(QueueRef :: reference(), Data :: any()) -> true | {error, Reason :: binary()}.
+tryIn(_QueueRef, _Data) ->
+   ?NotLoaded.
+
+-spec tryIn(QueueRef :: reference(), ProdToken :: any(), Data :: any()) -> true | {error, Reason :: binary()}.
+tryIn(_QueueRef, _ProdToken, _Data) ->
+   ?NotLoaded.
+
+-spec tryIns(QueueRef :: reference(), DataList :: [any()]) -> true | {error, Reason :: binary()}.
+tryIns(_QueueRef, _DataList) ->
+   ?NotLoaded.
+
+-spec tryIns(QueueRef :: reference(), ProdToken :: any(), DataList :: [any()]) -> true | {error, Reason :: binary()}.
+tryIns(_QueueRef, _ProdToken, _DataList) ->
+   ?NotLoaded.
+
+-spec tryOut(QueueRef :: reference()) -> Data :: any() | {error, Reason :: binary()}.
+tryOut(_QueueRef) ->
+   ?NotLoaded.
+
+-spec tryOut(QueueRef :: reference(), ConsToken :: any()) -> Data :: any() | {error, Reason :: binary()}.
+tryOut(_QueueRef, _ConsToken) ->
+   ?NotLoaded.
+
+-spec tryOuts(QueueRef :: reference(), Cnt :: pos_integer()) -> DataList :: [any()] | {error, Reason :: binary()}.
+tryOuts(_QueueRef, _Cnt) ->
+   ?NotLoaded.
+
+-spec tryOuts(QueueRef :: reference(), ConsToken :: any(), Cnt :: pos_integer()) -> DataList :: [any()] | {error, Reason :: binary()}.
+tryOuts(_QueueRef, _ConsToken, _Cnt) ->
+   ?NotLoaded.
+
+-spec tryOutByProd(QueueRef :: reference(), ProdToken :: any()) -> Data :: any() | {error, Reason :: binary()}.
+tryOutByProd(_QueueRef, _ProdToken) ->
+   ?NotLoaded.
+
+-spec tryOutsByProd(QueueRef :: reference(), ProdToken :: any(), Cnt :: pos_integer()) -> DataList :: [any()] | {error, Reason :: binary()}.
+tryOutsByProd(_QueueRef, _ProdToken, _Cnt) ->
+   ?NotLoaded.
+
+-spec size(QueueRef :: reference()) -> pos_integer() | {error, Reason :: binary()}.
+size(_QueueRef) ->
+   ?NotLoaded.