From a75df1086e295003727b88c74e535ab196bcb708 Mon Sep 17 00:00:00 2001
From: SisMaker <156736github>
Date: Tue, 28 Dec 2021 01:38:38 +0800
Subject: [PATCH] =?UTF-8?q?ft:=20=E6=9B=B4=E6=96=B0#=20Allocates=20more=20?=
 =?UTF-8?q?memory=20if=20necessary=20enqueue(item)=20:=20bool=20enqueue(pr?=
 =?UTF-8?q?od=5Ftoken,=20item)=20:=20bool=20enqueue=5Fbulk(item=5Ffirst,?=
 =?UTF-8?q?=20count)=20:=20bool=20enqueue=5Fbulk(prod=5Ftoken,=20item=5Ffi?=
 =?UTF-8?q?rst,=20count)=20:=20bool?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# Fails if not enough memory to enqueue
try_enqueue(item) : bool
try_enqueue(prod_token, item) : bool
try_enqueue_bulk(item_first, count) : bool
try_enqueue_bulk(prod_token, item_first, count) : bool

# Attempts to dequeue from the queue (never allocates)
try_dequeue(item&) : bool
try_dequeue(cons_token, item&) : bool
try_dequeue_bulk(item_first, max) : size_t
try_dequeue_bulk(cons_token, item_first, max) : size_t

# If you happen to know which producer you want to dequeue from
try_dequeue_from_producer(prod_token, item&) : bool
try_dequeue_bulk_from_producer(prod_token, item_first, max) : size_t

# A not-necessarily-accurate count of the total number of elements
size_approx() : size_t
---
 c_src/enlfq/concurrentqueue.h | 1215 ++++++++++++++++++---------------
 1 file changed, 661 insertions(+), 554 deletions(-)

diff --git a/c_src/enlfq/concurrentqueue.h b/c_src/enlfq/concurrentqueue.h
index 68f66df..d471ad2 100644
--- a/c_src/enlfq/concurrentqueue.h
+++ b/c_src/enlfq/concurrentqueue.h
@@ -5,7 +5,7 @@
 //    http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue
 
 // Simplified BSD license:
-// Copyright (c) 2013-2016, Cameron Desrochers.
+// Copyright (c) 2013-2020, Cameron Desrochers.
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without modification,
@@ -27,10 +27,11 @@
 // TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+// Also dual-licensed under the Boost Software License (see LICENSE.md)
 
 #pragma once
 
-#if defined(__GNUC__)
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
 // Disable -Wconversion warnings (spuriously triggered when Traits::size_t and
 // Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings
 // upon assigning any computed values)
@@ -42,6 +43,13 @@
 #endif
 #endif
 
+#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
+// VS2019 with /W4 warns about constant conditional expressions but unless /std=c++17 or higher
+// does not support `if constexpr`, so we have no choice but to simply disable the warning
+#pragma warning(push)
+#pragma warning(disable: 4127)  // conditional expression is constant
+#endif
+
 #if defined(__APPLE__)
 #include "TargetConditionals.h"
 #endif
@@ -96,10 +104,10 @@ namespace moodycamel { namespace details {
 	static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU;	// Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4.
 	static inline thread_id_t thread_id() { return static_cast<thread_id_t>(::GetCurrentThreadId()); }
 } }
-#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE)
+#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE) || defined(MOODYCAMEL_NO_THREAD_LOCAL)
 namespace moodycamel { namespace details {
 	static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes");
-	
+
 	typedef std::thread::id thread_id_t;
 	static const thread_id_t invalid_thread_id;         // Default ctor creates invalid ID
 
@@ -146,10 +154,21 @@ namespace moodycamel { namespace details {
 	typedef std::uintptr_t thread_id_t;
 	static const thread_id_t invalid_thread_id  = 0;		// Address can't be nullptr
 	static const thread_id_t invalid_thread_id2 = 1;		// Member accesses off a null pointer are also generally invalid. Plus it's not aligned.
-	static inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast<thread_id_t>(&x); }
+	inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast<thread_id_t>(&x); }
 } }
 #endif
 
+// Constexpr if
+#ifndef MOODYCAMEL_CONSTEXPR_IF
+#if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || __cplusplus > 201402L
+#define MOODYCAMEL_CONSTEXPR_IF if constexpr
+#define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]]
+#else
+#define MOODYCAMEL_CONSTEXPR_IF if
+#define MOODYCAMEL_MAYBE_UNUSED
+#endif
+#endif
+
 // Exceptions
 #ifndef MOODYCAMEL_EXCEPTIONS_ENABLED
 #if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__))
@@ -162,8 +181,8 @@ namespace moodycamel { namespace details {
 #define MOODYCAMEL_RETHROW throw
 #define MOODYCAMEL_THROW(expr) throw (expr)
 #else
-#define MOODYCAMEL_TRY if (true)
-#define MOODYCAMEL_CATCH(...) else if (false)
+#define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF (true)
+#define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF (false)
 #define MOODYCAMEL_RETHROW
 #define MOODYCAMEL_THROW(expr)
 #endif
@@ -204,7 +223,7 @@ namespace moodycamel { namespace details {
 #endif
 #endif
 
-// VS2012 doesn't support deleted functions. 
+// VS2012 doesn't support deleted functions.
 // In this case, we declare the function normally but don't define it. A link error will be generated if the function is called.
 #ifndef MOODYCAMEL_DELETE_FUNCTION
 #if defined(_MSC_VER) && _MSC_VER < 1800
@@ -214,6 +233,44 @@ namespace moodycamel { namespace details {
 #endif
 #endif
 
+namespace moodycamel { namespace details {
+#ifndef MOODYCAMEL_ALIGNAS
+// VS2013 doesn't support alignas or alignof, and align() requires a constant literal
+#if defined(_MSC_VER) && _MSC_VER <= 1800
+#define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment))
+#define MOODYCAMEL_ALIGNOF(obj) __alignof(obj)
+#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) typename details::Vs2013Aligned<std::alignment_of<obj>::value, T>::type
+	template<int Align, typename T> struct Vs2013Aligned { };  // default, unsupported alignment
+	template<typename T> struct Vs2013Aligned<1, T> { typedef __declspec(align(1)) T type; };
+	template<typename T> struct Vs2013Aligned<2, T> { typedef __declspec(align(2)) T type; };
+	template<typename T> struct Vs2013Aligned<4, T> { typedef __declspec(align(4)) T type; };
+	template<typename T> struct Vs2013Aligned<8, T> { typedef __declspec(align(8)) T type; };
+	template<typename T> struct Vs2013Aligned<16, T> { typedef __declspec(align(16)) T type; };
+	template<typename T> struct Vs2013Aligned<32, T> { typedef __declspec(align(32)) T type; };
+	template<typename T> struct Vs2013Aligned<64, T> { typedef __declspec(align(64)) T type; };
+	template<typename T> struct Vs2013Aligned<128, T> { typedef __declspec(align(128)) T type; };
+	template<typename T> struct Vs2013Aligned<256, T> { typedef __declspec(align(256)) T type; };
+#else
+	template<typename T> struct identity { typedef T type; };
+#define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment)
+#define MOODYCAMEL_ALIGNOF(obj) alignof(obj)
+#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) alignas(alignof(obj)) typename details::identity<T>::type
+#endif
+#endif
+} }
+
+
+// TSAN can false report races in lock-free code.  To enable TSAN to be used from projects that use this one,
+// we can apply per-function compile-time suppression.
+// See https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer
+#define MOODYCAMEL_NO_TSAN
+#if defined(__has_feature)
+ #if __has_feature(thread_sanitizer)
+  #undef MOODYCAMEL_NO_TSAN
+  #define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize("thread")))
+ #endif // TSAN
+#endif // TSAN
+
 // Compiler-specific likely/unlikely hints
 namespace moodycamel { namespace details {
 #if defined(__GNUC__)
@@ -264,7 +321,7 @@ struct ConcurrentQueueDefaultTraits
 {
 	// General-purpose size type. std::size_t is strongly recommended.
 	typedef std::size_t size_t;
-	
+
 	// The type used for the enqueue and dequeue indices. Must be at least as
 	// large as size_t. Should be significantly larger than the number of elements
 	// you expect to hold at once, especially if you have a high turnover rate;
@@ -276,47 +333,53 @@ struct ConcurrentQueueDefaultTraits
 	// whether the queue is lock-free with a 64-int type depends on the whether
 	// std::atomic<std::uint64_t> is lock-free, which is platform-specific.
 	typedef std::size_t index_t;
-	
+
 	// Internally, all elements are enqueued and dequeued from multi-element
 	// blocks; this is the smallest controllable unit. If you expect few elements
 	// but many producers, a smaller block size should be favoured. For few producers
 	// and/or many elements, a larger block size is preferred. A sane default
 	// is provided. Must be a power of 2.
 	static const size_t BLOCK_SIZE = 32;
-	
+
 	// For explicit producers (i.e. when using a producer token), the block is
 	// checked for being empty by iterating through a list of flags, one per element.
 	// For large block sizes, this is too inefficient, and switching to an atomic
 	// counter-based approach is faster. The switch is made for block sizes strictly
 	// larger than this threshold.
 	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32;
-	
+
 	// How many full blocks can be expected for a single explicit producer? This should
 	// reflect that number's maximum for optimal performance. Must be a power of 2.
 	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32;
-	
+
 	// How many full blocks can be expected for a single implicit producer? This should
 	// reflect that number's maximum for optimal performance. Must be a power of 2.
 	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32;
-	
+
 	// The initial size of the hash table mapping thread IDs to implicit producers.
 	// Note that the hash is resized every time it becomes half full.
 	// Must be a power of two, and either 0 or at least 1. If 0, implicit production
 	// (using the enqueue methods without an explicit producer token) is disabled.
 	static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32;
-	
+
 	// Controls the number of items that an explicit consumer (i.e. one with a token)
 	// must consume before it causes all consumers to rotate and move on to the next
 	// internal queue.
 	static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256;
-	
+
 	// The maximum number of elements (inclusive) that can be enqueued to a sub-queue.
 	// Enqueue operations that would cause this limit to be surpassed will fail. Note
 	// that this limit is enforced at the block level (for performance reasons), i.e.
 	// it's rounded up to the nearest block size.
 	static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max<size_t>::value;
-	
-	
+
+	// The number of times to spin before sleeping when waiting on a semaphore.
+	// Recommended values are on the order of 1000-10000 unless the number of
+	// consumer threads exceeds the number of idle cores (in which case try 0-100).
+	// Only affects instances of the BlockingConcurrentQueue.
+	static const int MAX_SEMA_SPINS = 10000;
+
+
 #ifndef MCDBGQ_USE_RELACY
 	// Memory allocation can be customized if needed.
 	// malloc should return nullptr on failure, and handle alignment like std::malloc.
@@ -362,13 +425,13 @@ namespace details
 		ConcurrentQueueProducerTypelessBase* next;
 		std::atomic<bool> inactive;
 		ProducerToken* token;
-		
+
 		ConcurrentQueueProducerTypelessBase()
 			: next(nullptr), inactive(false), token(nullptr)
 		{
 		}
 	};
-	
+
 	template<bool use32> struct _hash_32_or_64 {
 		static inline std::uint32_t hash(std::uint32_t h)
 		{
@@ -394,14 +457,14 @@ namespace details
 		}
 	};
 	template<std::size_t size> struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> {  };
-	
+
 	static inline size_t hash_thread_id(thread_id_t id)
 	{
 		static_assert(sizeof(thread_id_t) <= 8, "Expected a platform where thread IDs are at most 64-bit values");
 		return static_cast<size_t>(hash_32_or_64<sizeof(thread_id_converter<thread_id_t>::thread_id_hash_t)>::hash(
 			thread_id_converter<thread_id_t>::prehash(id)));
 	}
-	
+
 	template<typename T>
 	static inline bool circular_less_than(T a, T b)
 	{
@@ -415,7 +478,7 @@ namespace details
 #pragma warning(pop)
 #endif
 	}
-	
+
 	template<typename U>
 	static inline char* align_for(char* ptr)
 	{
@@ -439,7 +502,7 @@ namespace details
 		++x;
 		return x;
 	}
-	
+
 	template<typename T>
 	static inline void swap_relaxed(std::atomic<T>& left, std::atomic<T>& right)
 	{
@@ -447,13 +510,13 @@ namespace details
 		left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed);
 		right.store(std::move(temp), std::memory_order_relaxed);
 	}
-	
+
 	template<typename T>
 	static inline T const& nomove(T const& x)
 	{
 		return x;
 	}
-	
+
 	template<bool Enable>
 	struct nomove_if
 	{
@@ -463,7 +526,7 @@ namespace details
 			return x;
 		}
 	};
-	
+
 	template<>
 	struct nomove_if<false>
 	{
@@ -474,19 +537,19 @@ namespace details
 			return std::forward<U>(x);
 		}
 	};
-	
+
 	template<typename It>
 	static inline auto deref_noexcept(It& it) MOODYCAMEL_NOEXCEPT -> decltype(*it)
 	{
 		return *it;
 	}
-	
+
 #if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
 	template<typename T> struct is_trivially_destructible : std::is_trivially_destructible<T> { };
 #else
 	template<typename T> struct is_trivially_destructible : std::has_trivial_destructor<T> { };
 #endif
-	
+
 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
 #ifdef MCDBGQ_USE_RELACY
 	typedef RelacyThreadExitListener ThreadExitListener;
@@ -497,11 +560,11 @@ namespace details
 		typedef void (*callback_t)(void*);
 		callback_t callback;
 		void* userData;
-		
+
 		ThreadExitListener* next;		// reserved for use by the ThreadExitNotifier
 	};
-	
-	
+
+
 	class ThreadExitNotifier
 	{
 	public:
@@ -511,7 +574,7 @@ namespace details
 			listener->next = tlsInst.tail;
 			tlsInst.tail = listener;
 		}
-		
+
 		static void unsubscribe(ThreadExitListener* listener)
 		{
 			auto& tlsInst = instance();
@@ -524,12 +587,12 @@ namespace details
 				prev = &ptr->next;
 			}
 		}
-		
+
 	private:
 		ThreadExitNotifier() : tail(nullptr) { }
 		ThreadExitNotifier(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;
 		ThreadExitNotifier& operator=(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;
-		
+
 		~ThreadExitNotifier()
 		{
 			// This thread is about to exit, let everyone know!
@@ -538,20 +601,20 @@ namespace details
 				ptr->callback(ptr->userData);
 			}
 		}
-		
+
 		// Thread-local
 		static inline ThreadExitNotifier& instance()
 		{
 			static thread_local ThreadExitNotifier notifier;
 			return notifier;
 		}
-		
+
 	private:
 		ThreadExitListener* tail;
 	};
 #endif
 #endif
-	
+
 	template<typename T> struct static_is_lock_free_num { enum { value = 0 }; };
 	template<> struct static_is_lock_free_num<signed char> { enum { value = ATOMIC_CHAR_LOCK_FREE }; };
 	template<> struct static_is_lock_free_num<short> { enum { value = ATOMIC_SHORT_LOCK_FREE }; };
@@ -568,10 +631,10 @@ struct ProducerToken
 {
 	template<typename T, typename Traits>
 	explicit ProducerToken(ConcurrentQueue<T, Traits>& queue);
-	
+
 	template<typename T, typename Traits>
 	explicit ProducerToken(BlockingConcurrentQueue<T, Traits>& queue);
-	
+
 	ProducerToken(ProducerToken&& other) MOODYCAMEL_NOEXCEPT
 		: producer(other.producer)
 	{
@@ -580,13 +643,13 @@ struct ProducerToken
 			producer->token = this;
 		}
 	}
-	
+
 	inline ProducerToken& operator=(ProducerToken&& other) MOODYCAMEL_NOEXCEPT
 	{
 		swap(other);
 		return *this;
 	}
-	
+
 	void swap(ProducerToken& other) MOODYCAMEL_NOEXCEPT
 	{
 		std::swap(producer, other.producer);
@@ -597,7 +660,7 @@ struct ProducerToken
 			other.producer->token = &other;
 		}
 	}
-	
+
 	// A token is always valid unless:
 	//     1) Memory allocation failed during construction
 	//     2) It was moved via the move constructor
@@ -607,7 +670,7 @@ struct ProducerToken
 	// that the token is valid for use with a specific queue,
 	// but not which one; that's up to the user to track.
 	inline bool valid() const { return producer != nullptr; }
-	
+
 	~ProducerToken()
 	{
 		if (producer != nullptr) {
@@ -615,15 +678,15 @@ struct ProducerToken
 			producer->inactive.store(true, std::memory_order_release);
 		}
 	}
-	
+
 	// Disable copying and assignment
 	ProducerToken(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION;
 	ProducerToken& operator=(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION;
-	
+
 private:
 	template<typename T, typename Traits> friend class ConcurrentQueue;
 	friend class ConcurrentQueueTests;
-	
+
 protected:
 	details::ConcurrentQueueProducerTypelessBase* producer;
 };
@@ -633,21 +696,21 @@ struct ConsumerToken
 {
 	template<typename T, typename Traits>
 	explicit ConsumerToken(ConcurrentQueue<T, Traits>& q);
-	
+
 	template<typename T, typename Traits>
 	explicit ConsumerToken(BlockingConcurrentQueue<T, Traits>& q);
-	
+
 	ConsumerToken(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT
 		: initialOffset(other.initialOffset), lastKnownGlobalOffset(other.lastKnownGlobalOffset), itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), currentProducer(other.currentProducer), desiredProducer(other.desiredProducer)
 	{
 	}
-	
+
 	inline ConsumerToken& operator=(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT
 	{
 		swap(other);
 		return *this;
 	}
-	
+
 	void swap(ConsumerToken& other) MOODYCAMEL_NOEXCEPT
 	{
 		std::swap(initialOffset, other.initialOffset);
@@ -656,7 +719,7 @@ struct ConsumerToken
 		std::swap(currentProducer, other.currentProducer);
 		std::swap(desiredProducer, other.desiredProducer);
 	}
-	
+
 	// Disable copying and assignment
 	ConsumerToken(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION;
 	ConsumerToken& operator=(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION;
@@ -664,7 +727,7 @@ struct ConsumerToken
 private:
 	template<typename T, typename Traits> friend class ConcurrentQueue;
 	friend class ConcurrentQueueTests;
-	
+
 private: // but shared with ConcurrentQueue
 	std::uint32_t initialOffset;
 	std::uint32_t lastKnownGlobalOffset;
@@ -685,10 +748,10 @@ class ConcurrentQueue
 public:
 	typedef ::moodycamel::ProducerToken producer_token_t;
 	typedef ::moodycamel::ConsumerToken consumer_token_t;
-	
+
 	typedef typename Traits::index_t index_t;
 	typedef typename Traits::size_t size_t;
-	
+
 	static const size_t BLOCK_SIZE = static_cast<size_t>(Traits::BLOCK_SIZE);
 	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast<size_t>(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD);
 	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::EXPLICIT_INITIAL_INDEX_SIZE);
@@ -736,7 +799,7 @@ public:
 		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
 		populate_initial_implicit_producer_hash();
 		populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1));
-		
+
 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
 		// Track all the producers using a fully-resolved typed list for
 		// each kind; this makes it possible to debug them starting from
@@ -746,7 +809,7 @@ public:
 		implicitProducers.store(nullptr, std::memory_order_relaxed);
 #endif
 	}
-	
+
 	// Computes the correct amount of pre-allocated blocks for you based
 	// on the minimum number of elements you want available at any given
 	// time, and the maximum concurrent number of each type of producer.
@@ -761,13 +824,13 @@ public:
 		populate_initial_implicit_producer_hash();
 		size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) + 2 * (maxExplicitProducers + maxImplicitProducers);
 		populate_initial_block_list(blocks);
-		
+
 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
 		explicitProducers.store(nullptr, std::memory_order_relaxed);
 		implicitProducers.store(nullptr, std::memory_order_relaxed);
 #endif
 	}
-	
+
 	// Note: The queue should not be accessed concurrently while it's
 	// being deleted. It's up to the user to synchronize this.
 	// This method is not thread safe.
@@ -783,9 +846,9 @@ public:
 			destroy(ptr);
 			ptr = next;
 		}
-		
+
 		// Destroy implicit producer hash tables
-		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) {
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) {
 			auto hash = implicitProducerHash.load(std::memory_order_relaxed);
 			while (hash != nullptr) {
 				auto prev = hash->prev;
@@ -799,7 +862,7 @@ public:
 				hash = prev;
 			}
 		}
-		
+
 		// Destroy global free list
 		auto block = freeList.head_unsafe();
 		while (block != nullptr) {
@@ -809,7 +872,7 @@ public:
 			}
 			block = next;
 		}
-		
+
 		// Destroy initial free list
 		destroy_array(initialBlockPool, initialBlockPoolSize);
 	}
@@ -817,7 +880,7 @@ public:
 	// Disable copying and copy assignment
 	ConcurrentQueue(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
 	ConcurrentQueue& operator=(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
-	
+
 	// Moving is supported, but note that it is *not* a thread-safe operation.
 	// Nobody can use the queue while it's being moved, and the memory effects
 	// of that move must be propagated to other threads before they can use it.
@@ -838,31 +901,31 @@ public:
 		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
 		populate_initial_implicit_producer_hash();
 		swap_implicit_producer_hashes(other);
-		
+
 		other.producerListTail.store(nullptr, std::memory_order_relaxed);
 		other.producerCount.store(0, std::memory_order_relaxed);
 		other.nextExplicitConsumerId.store(0, std::memory_order_relaxed);
 		other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed);
-		
+
 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
 		explicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
 		other.explicitProducers.store(nullptr, std::memory_order_relaxed);
 		implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
 		other.implicitProducers.store(nullptr, std::memory_order_relaxed);
 #endif
-		
+
 		other.initialBlockPoolIndex.store(0, std::memory_order_relaxed);
 		other.initialBlockPoolSize = 0;
 		other.initialBlockPool = nullptr;
-		
+
 		reown_producers();
 	}
-	
+
 	inline ConcurrentQueue& operator=(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
 	{
 		return swap_internal(other);
 	}
-	
+
 	// Swaps this queue's state with the other's. Not thread-safe.
 	// Swapping two queues does not invalidate their tokens, however
 	// the tokens that were created for one queue must be used with
@@ -872,14 +935,14 @@ public:
 	{
 		swap_internal(other);
 	}
-	
+
 private:
 	ConcurrentQueue& swap_internal(ConcurrentQueue& other)
 	{
 		if (this == &other) {
 			return *this;
 		}
-		
+
 		details::swap_relaxed(producerListTail, other.producerListTail);
 		details::swap_relaxed(producerCount, other.producerCount);
 		details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex);
@@ -888,20 +951,20 @@ private:
 		freeList.swap(other.freeList);
 		details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId);
 		details::swap_relaxed(globalExplicitConsumerOffset, other.globalExplicitConsumerOffset);
-		
+
 		swap_implicit_producer_hashes(other);
-		
+
 		reown_producers();
 		other.reown_producers();
-		
+
 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
 		details::swap_relaxed(explicitProducers, other.explicitProducers);
 		details::swap_relaxed(implicitProducers, other.implicitProducers);
 #endif
-		
+
 		return *this;
 	}
-	
+
 public:
 	// Enqueues a single item (by copying it).
 	// Allocates memory if required. Only fails if memory allocation fails (or implicit
@@ -910,10 +973,10 @@ public:
 	// Thread-safe.
 	inline bool enqueue(T const& item)
 	{
-		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-		return inner_enqueue<CanAlloc>(item);
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue<CanAlloc>(item);
 	}
-	
+
 	// Enqueues a single item (by moving it, if possible).
 	// Allocates memory if required. Only fails if memory allocation fails (or implicit
 	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
@@ -921,10 +984,10 @@ public:
 	// Thread-safe.
 	inline bool enqueue(T&& item)
 	{
-		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-		return inner_enqueue<CanAlloc>(std::move(item));
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue<CanAlloc>(std::move(item));
 	}
-	
+
 	// Enqueues a single item (by copying it) using an explicit producer token.
 	// Allocates memory if required. Only fails if memory allocation fails (or
 	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
@@ -933,7 +996,7 @@ public:
 	{
 		return inner_enqueue<CanAlloc>(token, item);
 	}
-	
+
 	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
 	// Allocates memory if required. Only fails if memory allocation fails (or
 	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
@@ -942,7 +1005,7 @@ public:
 	{
 		return inner_enqueue<CanAlloc>(token, std::move(item));
 	}
-	
+
 	// Enqueues several items.
 	// Allocates memory if required. Only fails if memory allocation fails (or
 	// implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
@@ -952,10 +1015,10 @@ public:
 	template<typename It>
 	bool enqueue_bulk(It itemFirst, size_t count)
 	{
-		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-		return inner_enqueue_bulk<CanAlloc>(itemFirst, count);
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue_bulk<CanAlloc>(itemFirst, count);
 	}
-	
+
 	// Enqueues several items using an explicit producer token.
 	// Allocates memory if required. Only fails if memory allocation fails
 	// (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
@@ -967,7 +1030,7 @@ public:
 	{
 		return inner_enqueue_bulk<CanAlloc>(token, itemFirst, count);
 	}
-	
+
 	// Enqueues a single item (by copying it).
 	// Does not allocate memory. Fails if not enough room to enqueue (or implicit
 	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
@@ -975,10 +1038,10 @@ public:
 	// Thread-safe.
 	inline bool try_enqueue(T const& item)
 	{
-		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-		return inner_enqueue<CannotAlloc>(item);
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue<CannotAlloc>(item);
 	}
-	
+
 	// Enqueues a single item (by moving it, if possible).
 	// Does not allocate memory (except for one-time implicit producer).
 	// Fails if not enough room to enqueue (or implicit production is
@@ -986,10 +1049,10 @@ public:
 	// Thread-safe.
 	inline bool try_enqueue(T&& item)
 	{
-		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-		return inner_enqueue<CannotAlloc>(std::move(item));
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue<CannotAlloc>(std::move(item));
 	}
-	
+
 	// Enqueues a single item (by copying it) using an explicit producer token.
 	// Does not allocate memory. Fails if not enough room to enqueue.
 	// Thread-safe.
@@ -997,7 +1060,7 @@ public:
 	{
 		return inner_enqueue<CannotAlloc>(token, item);
 	}
-	
+
 	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
 	// Does not allocate memory. Fails if not enough room to enqueue.
 	// Thread-safe.
@@ -1005,7 +1068,7 @@ public:
 	{
 		return inner_enqueue<CannotAlloc>(token, std::move(item));
 	}
-	
+
 	// Enqueues several items.
 	// Does not allocate memory (except for one-time implicit producer).
 	// Fails if not enough room to enqueue (or implicit production is
@@ -1016,10 +1079,10 @@ public:
 	template<typename It>
 	bool try_enqueue_bulk(It itemFirst, size_t count)
 	{
-		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-		return inner_enqueue_bulk<CannotAlloc>(itemFirst, count);
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue_bulk<CannotAlloc>(itemFirst, count);
 	}
-	
+
 	// Enqueues several items using an explicit producer token.
 	// Does not allocate memory. Fails if not enough room to enqueue.
 	// Note: Use std::make_move_iterator if the elements should be moved
@@ -1030,9 +1093,9 @@ public:
 	{
 		return inner_enqueue_bulk<CannotAlloc>(token, itemFirst, count);
 	}
-	
-	
-	
+
+
+
 	// Attempts to dequeue from the queue.
 	// Returns false if all producer streams appeared empty at the time they
 	// were checked (so, the queue is likely but not guaranteed to be empty).
@@ -1055,7 +1118,7 @@ public:
 				++nonEmptyCount;
 			}
 		}
-		
+
 		// If there was at least one non-empty queue but it appears empty at the time
 		// we try to dequeue from it, we need to make sure every queue's been tried
 		if (nonEmptyCount > 0) {
@@ -1070,7 +1133,7 @@ public:
 		}
 		return false;
 	}
-	
+
 	// Attempts to dequeue from the queue.
 	// Returns false if all producer streams appeared empty at the time they
 	// were checked (so, the queue is likely but not guaranteed to be empty).
@@ -1090,7 +1153,7 @@ public:
 		}
 		return false;
 	}
-	
+
 	// Attempts to dequeue from the queue using an explicit consumer token.
 	// Returns false if all producer streams appeared empty at the time they
 	// were checked (so, the queue is likely but not guaranteed to be empty).
@@ -1103,13 +1166,13 @@ public:
 		// If you see that the global offset has changed, you must reset your consumption counter and move to your designated place
 		// If there's no items where you're supposed to be, keep moving until you find a producer with some items
 		// If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in it
-		
+
 		if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
 			if (!update_current_producer_after_rotation(token)) {
 				return false;
 			}
 		}
-		
+
 		// If there was at least one non-empty queue but it appears empty at the time
 		// we try to dequeue from it, we need to make sure every queue's been tried
 		if (static_cast<ProducerBase*>(token.currentProducer)->dequeue(item)) {
@@ -1118,7 +1181,7 @@ public:
 			}
 			return true;
 		}
-		
+
 		auto tail = producerListTail.load(std::memory_order_acquire);
 		auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();
 		if (ptr == nullptr) {
@@ -1137,7 +1200,7 @@ public:
 		}
 		return false;
 	}
-	
+
 	// Attempts to dequeue several elements from the queue.
 	// Returns the number of items actually dequeued.
 	// Returns 0 if all producer streams appeared empty at the time they
@@ -1155,7 +1218,7 @@ public:
 		}
 		return count;
 	}
-	
+
 	// Attempts to dequeue several elements from the queue using an explicit consumer token.
 	// Returns the number of items actually dequeued.
 	// Returns 0 if all producer streams appeared empty at the time they
@@ -1169,7 +1232,7 @@ public:
 				return 0;
 			}
 		}
-		
+
 		size_t count = static_cast<ProducerBase*>(token.currentProducer)->dequeue_bulk(itemFirst, max);
 		if (count == max) {
 			if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
@@ -1179,7 +1242,7 @@ public:
 		}
 		token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count);
 		max -= count;
-		
+
 		auto tail = producerListTail.load(std::memory_order_acquire);
 		auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();
 		if (ptr == nullptr) {
@@ -1203,9 +1266,9 @@ public:
 		}
 		return count;
 	}
-	
-	
-	
+
+
+
 	// Attempts to dequeue from a specific producer's inner queue.
 	// If you happen to know which producer you want to dequeue from, this
 	// is significantly faster than using the general-case try_dequeue methods.
@@ -1217,7 +1280,7 @@ public:
 	{
 		return static_cast<ExplicitProducer*>(producer.producer)->dequeue(item);
 	}
-	
+
 	// Attempts to dequeue several elements from a specific producer's inner queue.
 	// Returns the number of items actually dequeued.
 	// If you happen to know which producer you want to dequeue from, this
@@ -1230,8 +1293,8 @@ public:
 	{
 		return static_cast<ExplicitProducer*>(producer.producer)->dequeue_bulk(itemFirst, max);
 	}
-	
-	
+
+
 	// Returns an estimate of the total number of elements currently in the queue. This
 	// estimate is only accurate if the queue has completely stabilized before it is called
 	// (i.e. all enqueue and dequeue operations have completed and their memory effects are
@@ -1246,12 +1309,12 @@ public:
 		}
 		return size;
 	}
-	
-	
+
+
 	// Returns true if the underlying atomic variables used by
 	// the queue are lock-free (they should be on most platforms).
 	// Thread-safe.
-	static bool is_lock_free()
+	static constexpr bool is_lock_free()
 	{
 		return
 			details::static_is_lock_free<bool>::value == 2 &&
@@ -1271,40 +1334,40 @@ private:
 	struct ImplicitProducer;
 	friend struct ImplicitProducer;
 	friend class ConcurrentQueueTests;
-		
+
 	enum AllocationMode { CanAlloc, CannotAlloc };
-	
-	
+
+
 	///////////////////////////////
 	// Queue methods
 	///////////////////////////////
-	
+
 	template<AllocationMode canAlloc, typename U>
 	inline bool inner_enqueue(producer_token_t const& token, U&& element)
 	{
 		return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));
 	}
-	
+
 	template<AllocationMode canAlloc, typename U>
 	inline bool inner_enqueue(U&& element)
 	{
 		auto producer = get_or_add_implicit_producer();
 		return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));
 	}
-	
+
 	template<AllocationMode canAlloc, typename It>
 	inline bool inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
 	{
 		return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);
 	}
-	
+
 	template<AllocationMode canAlloc, typename It>
 	inline bool inner_enqueue_bulk(It itemFirst, size_t count)
 	{
 		auto producer = get_or_add_implicit_producer();
 		return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);
 	}
-	
+
 	inline bool update_current_producer_after_rotation(consumer_token_t& token)
 	{
 		// Ah, there's been a rotation, figure out where we should be!
@@ -1327,7 +1390,7 @@ private:
 				}
 			}
 		}
-		
+
 		std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset;
 		if (delta >= prodCount) {
 			delta = delta % prodCount;
@@ -1338,27 +1401,27 @@ private:
 				token.desiredProducer = tail;
 			}
 		}
-		
+
 		token.lastKnownGlobalOffset = globalOffset;
 		token.currentProducer = token.desiredProducer;
 		token.itemsConsumedFromCurrent = 0;
 		return true;
 	}
-	
-	
+
+
 	///////////////////////////
 	// Free list
 	///////////////////////////
-	
+
 	template <typename N>
 	struct FreeListNode
 	{
 		FreeListNode() : freeListRefs(0), freeListNext(nullptr) { }
-		
+
 		std::atomic<std::uint32_t> freeListRefs;
 		std::atomic<N*> freeListNext;
 	};
-	
+
 	// A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but
 	// simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly
 	// speedy under low contention.
@@ -1368,15 +1431,15 @@ private:
 		FreeList() : freeListHead(nullptr) { }
 		FreeList(FreeList&& other) : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) { other.freeListHead.store(nullptr, std::memory_order_relaxed); }
 		void swap(FreeList& other) { details::swap_relaxed(freeListHead, other.freeListHead); }
-		
+
 		FreeList(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;
 		FreeList& operator=(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;
-		
+
 		inline void add(N* node)
 		{
-#if MCDBGQ_NOLOCKFREE_FREELIST
+#ifdef MCDBGQ_NOLOCKFREE_FREELIST
 			debug::DebugLock lock(mutex);
-#endif		
+#endif
 			// We know that the should-be-on-freelist bit is 0 at this point, so it's safe to
 			// set it using a fetch_add
 			if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel) == 0) {
@@ -1385,12 +1448,12 @@ private:
 		 		add_knowing_refcount_is_zero(node);
 			}
 		}
-		
+
 		inline N* try_get()
 		{
-#if MCDBGQ_NOLOCKFREE_FREELIST
+#ifdef MCDBGQ_NOLOCKFREE_FREELIST
 			debug::DebugLock lock(mutex);
-#endif		
+#endif
 			auto head = freeListHead.load(std::memory_order_acquire);
 			while (head != nullptr) {
 				auto prevHead = head;
@@ -1399,7 +1462,7 @@ private:
 					head = freeListHead.load(std::memory_order_acquire);
 					continue;
 				}
-				
+
 				// Good, reference count has been incremented (it wasn't at zero), which means we can read the
 				// next and not worry about it changing between now and the time we do the CAS
 				auto next = head->freeListNext.load(std::memory_order_relaxed);
@@ -1407,12 +1470,12 @@ private:
 					// Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no
 					// matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on).
 					assert((head->freeListRefs.load(std::memory_order_relaxed) & SHOULD_BE_ON_FREELIST) == 0);
-					
+
 					// Decrease refcount twice, once for our ref, and once for the list's ref
 					head->freeListRefs.fetch_sub(2, std::memory_order_release);
 					return head;
 				}
-				
+
 				// OK, the head must have changed on us, but we still need to decrease the refcount we increased.
 				// Note that we don't need to release any memory effects, but we do need to ensure that the reference
 				// count decrement happens-after the CAS on the head.
@@ -1421,13 +1484,13 @@ private:
 					add_knowing_refcount_is_zero(prevHead);
 				}
 			}
-			
+
 			return nullptr;
 		}
-		
+
 		// Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes)
 		N* head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); }
-		
+
 	private:
 		inline void add_knowing_refcount_is_zero(N* node)
 		{
@@ -1452,47 +1515,47 @@ private:
 				return;
 			}
 		}
-		
+
 	private:
 		// Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention)
 		std::atomic<N*> freeListHead;
-	
+
 	static const std::uint32_t REFS_MASK = 0x7FFFFFFF;
 	static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000;
-		
-#if MCDBGQ_NOLOCKFREE_FREELIST
+
+#ifdef MCDBGQ_NOLOCKFREE_FREELIST
 		debug::DebugMutex mutex;
 #endif
 	};
-	
-	
+
+
 	///////////////////////////
 	// Block
 	///////////////////////////
-	
+
 	enum InnerQueueContext { implicit_context = 0, explicit_context = 1 };
-	
+
 	struct Block
 	{
 		Block()
 			: next(nullptr), elementsCompletelyDequeued(0), freeListRefs(0), freeListNext(nullptr), shouldBeOnFreeList(false), dynamicallyAllocated(true)
 		{
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
 			owner = nullptr;
 #endif
 		}
-		
+
 		template<InnerQueueContext context>
 		inline bool is_empty() const
 		{
-			if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
 				// Check flags
 				for (size_t i = 0; i < BLOCK_SIZE; ++i) {
 					if (!emptyFlags[i].load(std::memory_order_relaxed)) {
 						return false;
 					}
 				}
-				
+
 				// Aha, empty; make sure we have all other memory effects that happened before the empty flags were set
 				std::atomic_thread_fence(std::memory_order_acquire);
 				return true;
@@ -1507,12 +1570,12 @@ private:
 				return false;
 			}
 		}
-		
+
 		// Returns true if the block is now empty (does not apply in explicit context)
 		template<InnerQueueContext context>
-		inline bool set_empty(index_t i)
+		inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i)
 		{
-			if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
 				// Set flag
 				assert(!emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].load(std::memory_order_relaxed));
 				emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].store(true, std::memory_order_release);
@@ -1525,13 +1588,13 @@ private:
 				return prevVal == BLOCK_SIZE - 1;
 			}
 		}
-		
+
 		// Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0).
 		// Returns true if the block is now empty (does not apply in explicit context).
 		template<InnerQueueContext context>
-		inline bool set_many_empty(index_t i, size_t count)
+		inline bool set_many_empty(MOODYCAMEL_MAYBE_UNUSED index_t i, size_t count)
 		{
-			if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
 				// Set flags
 				std::atomic_thread_fence(std::memory_order_release);
 				i = BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1)) - count + 1;
@@ -1548,11 +1611,11 @@ private:
 				return prevVal + count == BLOCK_SIZE;
 			}
 		}
-		
+
 		template<InnerQueueContext context>
 		inline void set_all_empty()
 		{
-			if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
 				// Set all flags
 				for (size_t i = 0; i != BLOCK_SIZE; ++i) {
 					emptyFlags[i].store(true, std::memory_order_relaxed);
@@ -1563,11 +1626,11 @@ private:
 				elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed);
 			}
 		}
-		
+
 		template<InnerQueueContext context>
 		inline void reset_empty()
 		{
-			if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
 				// Reset flags
 				for (size_t i = 0; i != BLOCK_SIZE; ++i) {
 					emptyFlags[i].store(false, std::memory_order_relaxed);
@@ -1578,25 +1641,13 @@ private:
 				elementsCompletelyDequeued.store(0, std::memory_order_relaxed);
 			}
 		}
-		
+
 		inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT { return static_cast<T*>(static_cast<void*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
 		inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return static_cast<T const*>(static_cast<void const*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
-		
+
 	private:
-		// IMPORTANT: This must be the first member in Block, so that if T depends on the alignment of
-		// addresses returned by malloc, that alignment will be preserved. Apparently clang actually
-		// generates code that uses this assumption for AVX instructions in some cases. Ideally, we
-		// should also align Block to the alignment of T in case it's higher than malloc's 16-byte
-		// alignment, but this is hard to do in a cross-platform way. Assert for this case:
-		static_assert(std::alignment_of<T>::value <= std::alignment_of<details::max_align_t>::value, "The queue does not support super-aligned types at this time");
-		// Additionally, we need the alignment of Block itself to be a multiple of max_align_t since
-		// otherwise the appropriate padding will not be added at the end of Block in order to make
-		// arrays of Blocks all be properly aligned (not just the first one). We use a union to force
-		// this.
-		union {
-			char elements[sizeof(T) * BLOCK_SIZE];
-			details::max_align_t dummy;
-		};
+		static_assert(std::alignment_of<T>::value <= sizeof(T), "The queue does not support types with an alignment greater than their size at this time");
+		MOODYCAMEL_ALIGNED_TYPE_LIKE(char[sizeof(T) * BLOCK_SIZE], T) elements;
 	public:
 		Block* next;
 		std::atomic<size_t> elementsCompletelyDequeued;
@@ -1606,24 +1657,24 @@ private:
 		std::atomic<Block*> freeListNext;
 		std::atomic<bool> shouldBeOnFreeList;
 		bool dynamicallyAllocated;		// Perhaps a better name for this would be 'isNotPartOfInitialBlockPool'
-		
-#if MCDBGQ_TRACKMEM
+
+#ifdef MCDBGQ_TRACKMEM
 		void* owner;
 #endif
 	};
-	static_assert(std::alignment_of<Block>::value >= std::alignment_of<details::max_align_t>::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping");
+	static_assert(std::alignment_of<Block>::value >= std::alignment_of<T>::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping");
 
 
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
 public:
 	struct MemStats;
 private:
 #endif
-	
+
 	///////////////////////////
 	// Producer base
 	///////////////////////////
-	
+
 	struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase
 	{
 		ProducerBase(ConcurrentQueue* parent_, bool isExplicit_) :
@@ -1636,9 +1687,9 @@ private:
 			parent(parent_)
 		{
 		}
-		
-		virtual ~ProducerBase() { };
-		
+
+		virtual ~ProducerBase() { }
+
 		template<typename U>
 		inline bool dequeue(U& element)
 		{
@@ -1649,7 +1700,7 @@ private:
 				return static_cast<ImplicitProducer*>(this)->dequeue(element);
 			}
 		}
-		
+
 		template<typename It>
 		inline size_t dequeue_bulk(It& itemFirst, size_t max)
 		{
@@ -1660,45 +1711,45 @@ private:
 				return static_cast<ImplicitProducer*>(this)->dequeue_bulk(itemFirst, max);
 			}
 		}
-		
+
 		inline ProducerBase* next_prod() const { return static_cast<ProducerBase*>(next); }
-		
+
 		inline size_t size_approx() const
 		{
 			auto tail = tailIndex.load(std::memory_order_relaxed);
 			auto head = headIndex.load(std::memory_order_relaxed);
 			return details::circular_less_than(head, tail) ? static_cast<size_t>(tail - head) : 0;
 		}
-		
+
 		inline index_t getTail() const { return tailIndex.load(std::memory_order_relaxed); }
 	protected:
 		std::atomic<index_t> tailIndex;		// Where to enqueue to next
 		std::atomic<index_t> headIndex;		// Where to dequeue from next
-		
+
 		std::atomic<index_t> dequeueOptimisticCount;
 		std::atomic<index_t> dequeueOvercommit;
-		
+
 		Block* tailBlock;
-		
+
 	public:
 		bool isExplicit;
 		ConcurrentQueue* parent;
-		
+
 	protected:
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
 		friend struct MemStats;
 #endif
 	};
-	
-	
+
+
 	///////////////////////////
 	// Explicit queue
 	///////////////////////////
-		
+
 	struct ExplicitProducer : public ProducerBase
 	{
-		explicit ExplicitProducer(ConcurrentQueue* parent) :
-			ProducerBase(parent, true),
+		explicit ExplicitProducer(ConcurrentQueue* parent_) :
+			ProducerBase(parent_, true),
 			blockIndex(nullptr),
 			pr_blockIndexSlotsUsed(0),
 			pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1),
@@ -1706,14 +1757,14 @@ private:
 			pr_blockIndexEntries(nullptr),
 			pr_blockIndexRaw(nullptr)
 		{
-			size_t poolBasedIndexSize = details::ceil_to_pow_2(parent->initialBlockPoolSize) >> 1;
+			size_t poolBasedIndexSize = details::ceil_to_pow_2(parent_->initialBlockPoolSize) >> 1;
 			if (poolBasedIndexSize > pr_blockIndexSize) {
 				pr_blockIndexSize = poolBasedIndexSize;
 			}
-			
+
 			new_block_index(0);		// This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE
 		}
-		
+
 		~ExplicitProducer()
 		{
 			// Destruct any elements not yet dequeued.
@@ -1732,7 +1783,7 @@ private:
 					assert(details::circular_less_than<index_t>(pr_blockIndexEntries[i].base, this->headIndex.load(std::memory_order_relaxed)));
 					halfDequeuedBlock = pr_blockIndexEntries[i].block;
 				}
-				
+
 				// Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration)
 				auto block = this->tailBlock;
 				do {
@@ -1740,12 +1791,12 @@ private:
 					if (block->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
 						continue;
 					}
-					
+
 					size_t i = 0;	// Offset into block
 					if (block == halfDequeuedBlock) {
 						i = static_cast<size_t>(this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));
 					}
-					
+
 					// Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index
 					auto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 ? BLOCK_SIZE : static_cast<size_t>(this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));
 					while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) {
@@ -1753,7 +1804,7 @@ private:
 					}
 				} while (block != this->tailBlock);
 			}
-			
+
 			// Destroy all blocks that we own
 			if (this->tailBlock != nullptr) {
 				auto block = this->tailBlock;
@@ -1768,7 +1819,7 @@ private:
 					block = nextBlock;
 				} while (block != this->tailBlock);
 			}
-			
+
 			// Destroy the block indices
 			auto header = static_cast<BlockIndexHeader*>(pr_blockIndexRaw);
 			while (header != nullptr) {
@@ -1778,7 +1829,7 @@ private:
 				header = prev;
 			}
 		}
-		
+
 		template<AllocationMode allocMode, typename U>
 		inline bool enqueue(U&& element)
 		{
@@ -1789,10 +1840,10 @@ private:
 				auto startBlock = this->tailBlock;
 				auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
 				if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
-					// We can re-use the block ahead of us, it's empty!					
+					// We can re-use the block ahead of us, it's empty!
 					this->tailBlock = this->tailBlock->next;
 					this->tailBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
-					
+
 					// We'll put the block on the block index (guaranteed to be room since we're conceptually removing the
 					// last block from it first -- except instead of removing then adding, we can just overwrite).
 					// Note that there must be a valid block index here, since even if allocation failed in the ctor,
@@ -1817,18 +1868,21 @@ private:
 						// Hmm, the circular block index is already full -- we'll need
 						// to allocate a new index. Note pr_blockIndexRaw can only be nullptr if
 						// the initial allocation failed in the constructor.
-						
-						if (allocMode == CannotAlloc || !new_block_index(pr_blockIndexSlotsUsed)) {
+
+						MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
+							return false;
+						}
+						else if (!new_block_index(pr_blockIndexSlotsUsed)) {
 							return false;
 						}
 					}
-					
+
 					// Insert a new block in the circular linked list
 					auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
 					if (newBlock == nullptr) {
 						return false;
 					}
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
 					newBlock->owner = this;
 #endif
 					newBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
@@ -1842,8 +1896,8 @@ private:
 					this->tailBlock = newBlock;
 					++pr_blockIndexSlotsUsed;
 				}
-				
-				if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {
+
+				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
 					// The constructor may throw. We want the element not to appear in the queue in
 					// that case (without corrupting the queue):
 					MOODYCAMEL_TRY {
@@ -1861,27 +1915,27 @@ private:
 					(void)startBlock;
 					(void)originalBlockIndexSlotsUsed;
 				}
-				
+
 				// Add block to block index
 				auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
 				entry.base = currentTailIndex;
 				entry.block = this->tailBlock;
 				blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release);
 				pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
-				
-				if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {
+
+				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
 					this->tailIndex.store(newTailIndex, std::memory_order_release);
 					return true;
 				}
 			}
-			
+
 			// Enqueue
 			new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
-			
+
 			this->tailIndex.store(newTailIndex, std::memory_order_release);
 			return true;
 		}
-		
+
 		template<typename U>
 		bool dequeue(U& element)
 		{
@@ -1889,10 +1943,10 @@ private:
 			auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
 			if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {
 				// Might be something to dequeue, let's give it a try
-				
+
 				// Note that this if is purely for performance purposes in the common case when the queue is
 				// empty and the values are eventually consistent -- we may enter here spuriously.
-				
+
 				// Note that whatever the values of overcommit and tail are, they are not going to change (unless we
 				// change them) and must be the same value at this point (inside the if) as when the if condition was
 				// evaluated.
@@ -1905,23 +1959,24 @@ private:
 				// unfortunately that can't be shown to be correct using only the C++11 standard.
 				// See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case
 				std::atomic_thread_fence(std::memory_order_acquire);
-				
+
 				// Increment optimistic counter, then check if it went over the boundary
 				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
-				
+
 				// Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever
 				// incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now
 				// have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon
 				// incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount.
-				assert(overcommit <= myDequeueCount);
-				
+				// However, we can't assert this since both dequeueOptimisticCount and dequeueOvercommit may (independently)
+				// overflow; in such a case, though, the logic still holds since the difference between the two is maintained.
+
 				// Note that we reload tail here in case it changed; it will be the same value as before or greater, since
 				// this load is sequenced after (happens after) the earlier load above. This is supported by read-read
 				// coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_order
 				tail = this->tailIndex.load(std::memory_order_acquire);
 				if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {
 					// Guaranteed to be at least one element to dequeue!
-					
+
 					// Get the index. Note that since there's guaranteed to be at least one element, this
 					// will never exceed tail. We need to do an acquire-release fence here since it's possible
 					// that whatever condition got us to this point was for an earlier enqueued element (that
@@ -1931,13 +1986,13 @@ private:
 					// place with the more current condition (they must have acquired a tail that is at least
 					// as recent).
 					auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);
-					
-					
+
+
 					// Determine which block the element is in
-					
+
 					auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
 					auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
-					
+
 					// We need to be careful here about subtracting and dividing because of index wrap-around.
 					// When an index wraps, we need to preserve the sign of the offset when dividing it by the
 					// block size (in order to get a correct signed block count offset in all cases):
@@ -1945,7 +2000,7 @@ private:
 					auto blockBaseIndex = index & ~static_cast<index_t>(BLOCK_SIZE - 1);
 					auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(blockBaseIndex - headBase) / BLOCK_SIZE);
 					auto block = localBlockIndex->entries[(localBlockIndexHead + offset) & (localBlockIndex->size - 1)].block;
-					
+
 					// Dequeue
 					auto& el = *((*block)[index]);
 					if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {
@@ -1954,22 +2009,22 @@ private:
 						struct Guard {
 							Block* block;
 							index_t index;
-							
+
 							~Guard()
 							{
 								(*block)[index]->~T();
 								block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
 							}
 						} guard = { block, index };
-						
-						element = std::move(el);
+
+						element = std::move(el); // NOLINT
 					}
 					else {
-						element = std::move(el);
-						el.~T();
+						element = std::move(el); // NOLINT
+						el.~T(); // NOLINT
 						block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
 					}
-					
+
 					return true;
 				}
 				else {
@@ -1977,12 +2032,12 @@ private:
 					this->dequeueOvercommit.fetch_add(1, std::memory_order_release);		// Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write
 				}
 			}
-		
+
 			return false;
 		}
-		
+
 		template<AllocationMode allocMode, typename It>
-		bool enqueue_bulk(It itemFirst, size_t count)
+		bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, size_t count)
 		{
 			// First, we need to make sure we have enough room to enqueue all of the elements;
 			// this means pre-allocating blocks and putting them in the block index (but only if
@@ -1991,9 +2046,9 @@ private:
 			auto startBlock = this->tailBlock;
 			auto originalBlockIndexFront = pr_blockIndexFront;
 			auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
-			
+
 			Block* firstAllocatedBlock = nullptr;
-			
+
 			// Figure out how many blocks we'll need to allocate, and do so
 			size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
 			index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
@@ -2002,39 +2057,46 @@ private:
 				while (blockBaseDiff > 0 && this->tailBlock != nullptr && this->tailBlock->next != firstAllocatedBlock && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
 					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
 					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
-					
+
 					this->tailBlock = this->tailBlock->next;
 					firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
-					
+
 					auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
 					entry.base = currentTailIndex;
 					entry.block = this->tailBlock;
 					pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
 				}
-				
+
 				// Now allocate as many blocks as necessary from the block pool
 				while (blockBaseDiff > 0) {
 					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
 					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
-					
+
 					auto head = this->headIndex.load(std::memory_order_relaxed);
 					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
 					bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
 					if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize || full) {
-						if (allocMode == CannotAlloc || full || !new_block_index(originalBlockIndexSlotsUsed)) {
+						MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
 							// Failed to allocate, undo changes (but keep injected blocks)
 							pr_blockIndexFront = originalBlockIndexFront;
 							pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
 							this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
 							return false;
 						}
-						
+						else if (full || !new_block_index(originalBlockIndexSlotsUsed)) {
+							// Failed to allocate, undo changes (but keep injected blocks)
+							pr_blockIndexFront = originalBlockIndexFront;
+							pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+							this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
+							return false;
+						}
+
 						// pr_blockIndexFront is updated inside new_block_index, so we need to
 						// update our fallback value too (since we keep the new index even if we
 						// later fail)
 						originalBlockIndexFront = originalBlockIndexSlotsUsed;
 					}
-					
+
 					// Insert a new block in the circular linked list
 					auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
 					if (newBlock == nullptr) {
@@ -2043,8 +2105,8 @@ private:
 						this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
 						return false;
 					}
-					
-#if MCDBGQ_TRACKMEM
+
+#ifdef MCDBGQ_TRACKMEM
 					newBlock->owner = this;
 #endif
 					newBlock->ConcurrentQueue::Block::template set_all_empty<explicit_context>();
@@ -2057,15 +2119,15 @@ private:
 					}
 					this->tailBlock = newBlock;
 					firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
-					
+
 					++pr_blockIndexSlotsUsed;
-					
+
 					auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
 					entry.base = currentTailIndex;
 					entry.block = this->tailBlock;
 					pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
 				}
-				
+
 				// Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and
 				// publish the new block index front
 				auto block = firstAllocatedBlock;
@@ -2076,12 +2138,12 @@ private:
 					}
 					block = block->next;
 				}
-				
-				if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) {
+
+				MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
 					blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
 				}
 			}
-			
+
 			// Enqueue, one block at a time
 			index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
 			currentTailIndex = startTailIndex;
@@ -2092,11 +2154,11 @@ private:
 				this->tailBlock = firstAllocatedBlock;
 			}
 			while (true) {
-				auto stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+				index_t stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
 				if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
 					stopIndex = newTailIndex;
 				}
-				if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) {
+				MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
 					while (currentTailIndex != stopIndex) {
 						new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
 					}
@@ -2111,7 +2173,7 @@ private:
 							// may only define a (noexcept) move constructor, and so calls to the
 							// cctor will not compile, even if they are in an if branch that will never
 							// be executed
-							new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
+							new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
 							++currentTailIndex;
 							++itemFirst;
 						}
@@ -2122,11 +2184,11 @@ private:
 						// any allocated blocks in our linked list for later, though).
 						auto constructedStopIndex = currentTailIndex;
 						auto lastBlockEnqueued = this->tailBlock;
-						
+
 						pr_blockIndexFront = originalBlockIndexFront;
 						pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
 						this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
-						
+
 						if (!details::is_trivially_destructible<T>::value) {
 							auto block = startBlock;
 							if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
@@ -2150,22 +2212,23 @@ private:
 						MOODYCAMEL_RETHROW;
 					}
 				}
-				
+
 				if (this->tailBlock == endBlock) {
 					assert(currentTailIndex == newTailIndex);
 					break;
 				}
 				this->tailBlock = this->tailBlock->next;
 			}
-			
-			if (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst))) && firstAllocatedBlock != nullptr) {
-				blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
+
+			MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
+				if (firstAllocatedBlock != nullptr)
+					blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
 			}
-			
+
 			this->tailIndex.store(newTailIndex, std::memory_order_release);
 			return true;
 		}
-		
+
 		template<typename It>
 		size_t dequeue_bulk(It& itemFirst, size_t max)
 		{
@@ -2175,10 +2238,9 @@ private:
 			if (details::circular_less_than<size_t>(0, desiredCount)) {
 				desiredCount = desiredCount < max ? desiredCount : max;
 				std::atomic_thread_fence(std::memory_order_acquire);
-				
+
 				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
-				assert(overcommit <= myDequeueCount);
-				
+
 				tail = this->tailIndex.load(std::memory_order_acquire);
 				auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
 				if (details::circular_less_than<size_t>(0, actualCount)) {
@@ -2186,25 +2248,25 @@ private:
 					if (actualCount < desiredCount) {
 						this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);
 					}
-					
+
 					// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
 					// will never exceed tail.
 					auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
-					
+
 					// Determine which block the first element is in
 					auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
 					auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
-					
+
 					auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
 					auto firstBlockBaseIndex = firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1);
 					auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) / BLOCK_SIZE);
 					auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1);
-					
+
 					// Iterate the blocks and dequeue
 					auto index = firstIndex;
 					do {
 						auto firstIndexInBlock = index;
-						auto endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+						index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
 						endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
 						auto block = localBlockIndex->entries[indexIndex].block;
 						if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {
@@ -2236,19 +2298,19 @@ private:
 									}
 									block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
 									indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
-									
+
 									firstIndexInBlock = index;
 									endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
 									endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
 								} while (index != firstIndex + actualCount);
-								
+
 								MOODYCAMEL_RETHROW;
 							}
 						}
 						block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
 						indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
 					} while (index != firstIndex + actualCount);
-					
+
 					return actualCount;
 				}
 				else {
@@ -2256,17 +2318,17 @@ private:
 					this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
 				}
 			}
-			
+
 			return 0;
 		}
-		
+
 	private:
 		struct BlockIndexEntry
 		{
 			index_t base;
 			Block* block;
 		};
-		
+
 		struct BlockIndexHeader
 		{
 			size_t size;
@@ -2274,12 +2336,12 @@ private:
 			BlockIndexEntry* entries;
 			void* prev;
 		};
-		
-		
+
+
 		bool new_block_index(size_t numberOfFilledSlotsToExpose)
 		{
 			auto prevBlockSizeMask = pr_blockIndexSize - 1;
-			
+
 			// Create the new block
 			pr_blockIndexSize <<= 1;
 			auto newRawPtr = static_cast<char*>((Traits::malloc)(sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * pr_blockIndexSize));
@@ -2287,9 +2349,9 @@ private:
 				pr_blockIndexSize >>= 1;		// Reset to allow graceful retry
 				return false;
 			}
-			
+
 			auto newBlockIndexEntries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(newRawPtr + sizeof(BlockIndexHeader)));
-			
+
 			// Copy in all the old indices, if any
 			size_t j = 0;
 			if (pr_blockIndexSlotsUsed != 0) {
@@ -2299,72 +2361,72 @@ private:
 					i = (i + 1) & prevBlockSizeMask;
 				} while (i != pr_blockIndexFront);
 			}
-			
+
 			// Update everything
 			auto header = new (newRawPtr) BlockIndexHeader;
 			header->size = pr_blockIndexSize;
 			header->front.store(numberOfFilledSlotsToExpose - 1, std::memory_order_relaxed);
 			header->entries = newBlockIndexEntries;
 			header->prev = pr_blockIndexRaw;		// we link the new block to the old one so we can free it later
-			
+
 			pr_blockIndexFront = j;
 			pr_blockIndexEntries = newBlockIndexEntries;
 			pr_blockIndexRaw = newRawPtr;
 			blockIndex.store(header, std::memory_order_release);
-			
+
 			return true;
 		}
-		
+
 	private:
 		std::atomic<BlockIndexHeader*> blockIndex;
-		
+
 		// To be used by producer only -- consumer must use the ones in referenced by blockIndex
 		size_t pr_blockIndexSlotsUsed;
 		size_t pr_blockIndexSize;
 		size_t pr_blockIndexFront;		// Next slot (not current)
 		BlockIndexEntry* pr_blockIndexEntries;
 		void* pr_blockIndexRaw;
-		
+
 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
 	public:
 		ExplicitProducer* nextExplicitProducer;
 	private:
 #endif
-		
-#if MCDBGQ_TRACKMEM
+
+#ifdef MCDBGQ_TRACKMEM
 		friend struct MemStats;
 #endif
 	};
-	
-	
+
+
 	//////////////////////////////////
 	// Implicit queue
 	//////////////////////////////////
-	
+
 	struct ImplicitProducer : public ProducerBase
-	{			
-		ImplicitProducer(ConcurrentQueue* parent) :
-			ProducerBase(parent, false),
+	{
+		ImplicitProducer(ConcurrentQueue* parent_) :
+			ProducerBase(parent_, false),
 			nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE),
 			blockIndex(nullptr)
 		{
 			new_block_index();
 		}
-		
+
 		~ImplicitProducer()
 		{
 			// Note that since we're in the destructor we can assume that all enqueue/dequeue operations
 			// completed already; this means that all undequeued elements are placed contiguously across
 			// contiguous blocks, and that only the first and last remaining blocks can be only partially
 			// empty (all other remaining blocks must be completely full).
-			
+
 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
 			// Unregister ourselves for thread termination notification
 			if (!this->inactive.load(std::memory_order_relaxed)) {
 				details::ThreadExitNotifier::unsubscribe(&threadExitListener);
 			}
 #endif
-			
+
 			// Destroy all remaining elements!
 			auto tail = this->tailIndex.load(std::memory_order_relaxed);
 			auto index = this->headIndex.load(std::memory_order_relaxed);
@@ -2377,10 +2439,10 @@ private:
 						// Free the old block
 						this->parent->add_block_to_free_list(block);
 					}
-					
+
 					block = get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed);
 				}
-				
+
 				((*block)[index])->~T();
 				++index;
 			}
@@ -2390,7 +2452,7 @@ private:
 			if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0)) {
 				this->parent->add_block_to_free_list(this->tailBlock);
 			}
-			
+
 			// Destroy block index
 			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
 			if (localBlockIndex != nullptr) {
@@ -2405,7 +2467,7 @@ private:
 				} while (localBlockIndex != nullptr);
 			}
 		}
-		
+
 		template<AllocationMode allocMode, typename U>
 		inline bool enqueue(U&& element)
 		{
@@ -2418,7 +2480,7 @@ private:
 				if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
 					return false;
 				}
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
 				debug::DebugLock lock(mutex);
 #endif
 				// Find out where we'll be inserting this block in the block index
@@ -2426,7 +2488,7 @@ private:
 				if (!insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) {
 					return false;
 				}
-				
+
 				// Get ahold of a new block
 				auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
 				if (newBlock == nullptr) {
@@ -2434,12 +2496,12 @@ private:
 					idxEntry->value.store(nullptr, std::memory_order_relaxed);
 					return false;
 				}
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
 				newBlock->owner = this;
 #endif
 				newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
-				
-				if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {
+
+				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
 					// May throw, try to insert now before we publish the fact that we have this new block
 					MOODYCAMEL_TRY {
 						new ((*newBlock)[currentTailIndex]) T(std::forward<U>(element));
@@ -2451,25 +2513,25 @@ private:
 						MOODYCAMEL_RETHROW;
 					}
 				}
-				
+
 				// Insert the new block into the index
 				idxEntry->value.store(newBlock, std::memory_order_relaxed);
-				
+
 				this->tailBlock = newBlock;
-				
-				if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {
+
+				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
 					this->tailIndex.store(newTailIndex, std::memory_order_release);
 					return true;
 				}
 			}
-			
+
 			// Enqueue
 			new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
-			
+
 			this->tailIndex.store(newTailIndex, std::memory_order_release);
 			return true;
 		}
-		
+
 		template<typename U>
 		bool dequeue(U& element)
 		{
@@ -2478,22 +2540,21 @@ private:
 			index_t overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
 			if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {
 				std::atomic_thread_fence(std::memory_order_acquire);
-				
+
 				index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
-				assert(overcommit <= myDequeueCount);
 				tail = this->tailIndex.load(std::memory_order_acquire);
 				if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {
 					index_t index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);
-					
+
 					// Determine which block the element is in
 					auto entry = get_block_index_entry_for_index(index);
-					
+
 					// Dequeue
 					auto block = entry->value.load(std::memory_order_relaxed);
 					auto& el = *((*block)[index]);
-					
+
 					if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
 						// Note: Acquiring the mutex with every dequeue instead of only when a block
 						// is released is very sub-optimal, but it is, after all, purely debug code.
 						debug::DebugLock lock(producer->mutex);
@@ -2503,7 +2564,7 @@ private:
 							index_t index;
 							BlockIndexEntry* entry;
 							ConcurrentQueue* parent;
-							
+
 							~Guard()
 							{
 								(*block)[index]->~T();
@@ -2513,16 +2574,16 @@ private:
 								}
 							}
 						} guard = { block, index, entry, this->parent };
-						
-						element = std::move(el);
+
+						element = std::move(el); // NOLINT
 					}
 					else {
-						element = std::move(el);
-						el.~T();
-					
+						element = std::move(el); // NOLINT
+						el.~T(); // NOLINT
+
 						if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {
 							{
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
 								debug::DebugLock lock(mutex);
 #endif
 								// Add the block back into the global free pool (and remove from block index)
@@ -2531,45 +2592,49 @@ private:
 							this->parent->add_block_to_free_list(block);		// releases the above store
 						}
 					}
-					
+
 					return true;
 				}
 				else {
 					this->dequeueOvercommit.fetch_add(1, std::memory_order_release);
 				}
 			}
-		
+
 			return false;
 		}
-		
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4706)  // assignment within conditional expression
+#endif
 		template<AllocationMode allocMode, typename It>
 		bool enqueue_bulk(It itemFirst, size_t count)
 		{
 			// First, we need to make sure we have enough room to enqueue all of the elements;
 			// this means pre-allocating blocks and putting them in the block index (but only if
 			// all the allocations succeeded).
-			
+
 			// Note that the tailBlock we start off with may not be owned by us any more;
 			// this happens if it was filled up exactly to the top (setting tailIndex to
 			// the first index of the next block which is not yet allocated), then dequeued
 			// completely (putting it on the free list) before we enqueue again.
-			
+
 			index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);
 			auto startBlock = this->tailBlock;
 			Block* firstAllocatedBlock = nullptr;
 			auto endBlock = this->tailBlock;
-			
+
 			// Figure out how many blocks we'll need to allocate, and do so
 			size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
 			index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
 			if (blockBaseDiff > 0) {
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
 				debug::DebugLock lock(mutex);
 #endif
 				do {
 					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
 					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
-					
+
 					// Find out where we'll be inserting this block in the block index
 					BlockIndexEntry* idxEntry = nullptr;  // initialization here unnecessary but compiler can't always tell
 					Block* newBlock;
@@ -2577,6 +2642,7 @@ private:
 					auto head = this->headIndex.load(std::memory_order_relaxed);
 					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
 					bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
+
 					if (full || !(indexInserted = insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) || (newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>()) == nullptr) {
 						// Index allocation or block allocation failed; revert any other allocations
 						// and index insertions done so far for this operation
@@ -2593,19 +2659,19 @@ private:
 						}
 						this->parent->add_blocks_to_free_list(firstAllocatedBlock);
 						this->tailBlock = startBlock;
-						
+
 						return false;
 					}
-					
-#if MCDBGQ_TRACKMEM
+
+#ifdef MCDBGQ_TRACKMEM
 					newBlock->owner = this;
 #endif
 					newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
 					newBlock->next = nullptr;
-					
+
 					// Insert the new block into the index
 					idxEntry->value.store(newBlock, std::memory_order_relaxed);
-					
+
 					// Store the chain of blocks so that we can undo if later allocations fail,
 					// and so that we can find the blocks when we do the actual enqueueing
 					if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr) {
@@ -2617,7 +2683,7 @@ private:
 					firstAllocatedBlock = firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock;
 				} while (blockBaseDiff > 0);
 			}
-			
+
 			// Enqueue, one block at a time
 			index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
 			currentTailIndex = startTailIndex;
@@ -2627,11 +2693,11 @@ private:
 				this->tailBlock = firstAllocatedBlock;
 			}
 			while (true) {
-				auto stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+				index_t stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
 				if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
 					stopIndex = newTailIndex;
 				}
-				if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) {
+				MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
 					while (currentTailIndex != stopIndex) {
 						new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
 					}
@@ -2639,7 +2705,7 @@ private:
 				else {
 					MOODYCAMEL_TRY {
 						while (currentTailIndex != stopIndex) {
-							new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
+							new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
 							++currentTailIndex;
 							++itemFirst;
 						}
@@ -2647,7 +2713,7 @@ private:
 					MOODYCAMEL_CATCH (...) {
 						auto constructedStopIndex = currentTailIndex;
 						auto lastBlockEnqueued = this->tailBlock;
-						
+
 						if (!details::is_trivially_destructible<T>::value) {
 							auto block = startBlock;
 							if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
@@ -2668,7 +2734,7 @@ private:
 								block = block->next;
 							}
 						}
-						
+
 						currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
 						for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {
 							currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
@@ -2681,7 +2747,7 @@ private:
 						MOODYCAMEL_RETHROW;
 					}
 				}
-				
+
 				if (this->tailBlock == endBlock) {
 					assert(currentTailIndex == newTailIndex);
 					break;
@@ -2691,7 +2757,10 @@ private:
 			this->tailIndex.store(newTailIndex, std::memory_order_release);
 			return true;
 		}
-		
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
 		template<typename It>
 		size_t dequeue_bulk(It& itemFirst, size_t max)
 		{
@@ -2701,10 +2770,9 @@ private:
 			if (details::circular_less_than<size_t>(0, desiredCount)) {
 				desiredCount = desiredCount < max ? desiredCount : max;
 				std::atomic_thread_fence(std::memory_order_acquire);
-				
+
 				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
-				assert(overcommit <= myDequeueCount);
-				
+
 				tail = this->tailIndex.load(std::memory_order_acquire);
 				auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
 				if (details::circular_less_than<size_t>(0, actualCount)) {
@@ -2712,20 +2780,20 @@ private:
 					if (actualCount < desiredCount) {
 						this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);
 					}
-					
+
 					// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
 					// will never exceed tail.
 					auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
-					
+
 					// Iterate the blocks and dequeue
 					auto index = firstIndex;
 					BlockIndexHeader* localBlockIndex;
 					auto indexIndex = get_block_index_index_for_index(index, localBlockIndex);
 					do {
 						auto blockStartIndex = index;
-						auto endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+						index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
 						endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
-						
+
 						auto entry = localBlockIndex->index[indexIndex];
 						auto block = entry->value.load(std::memory_order_relaxed);
 						if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {
@@ -2753,27 +2821,27 @@ private:
 									while (index != endIndex) {
 										(*block)[index++]->~T();
 									}
-									
+
 									if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
 										debug::DebugLock lock(mutex);
 #endif
 										entry->value.store(nullptr, std::memory_order_relaxed);
 										this->parent->add_block_to_free_list(block);
 									}
 									indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
-									
+
 									blockStartIndex = index;
 									endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
 									endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
 								} while (index != firstIndex + actualCount);
-								
+
 								MOODYCAMEL_RETHROW;
 							}
 						}
 						if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {
 							{
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
 								debug::DebugLock lock(mutex);
 #endif
 								// Note that the set_many_empty above did a release, meaning that anybody who acquires the block
@@ -2784,27 +2852,27 @@ private:
 						}
 						indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
 					} while (index != firstIndex + actualCount);
-					
+
 					return actualCount;
 				}
 				else {
 					this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
 				}
 			}
-			
+
 			return 0;
 		}
-		
+
 	private:
 		// The block size must be > 1, so any number with the low bit set is an invalid block base index
 		static const index_t INVALID_BLOCK_BASE = 1;
-		
+
 		struct BlockIndexEntry
 		{
 			std::atomic<index_t> key;
 			std::atomic<Block*> value;
 		};
-		
+
 		struct BlockIndexHeader
 		{
 			size_t capacity;
@@ -2813,7 +2881,7 @@ private:
 			BlockIndexEntry** index;
 			BlockIndexHeader* prev;
 		};
-		
+
 		template<AllocationMode allocMode>
 		inline bool insert_block_index_entry(BlockIndexEntry*& idxEntry, index_t blockStartIndex)
 		{
@@ -2821,45 +2889,50 @@ private:
 			if (localBlockIndex == nullptr) {
 				return false;  // this can happen if new_block_index failed in the constructor
 			}
-			auto newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);
+			size_t newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);
 			idxEntry = localBlockIndex->index[newTail];
 			if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE ||
 				idxEntry->value.load(std::memory_order_relaxed) == nullptr) {
-				
+
 				idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
 				localBlockIndex->tail.store(newTail, std::memory_order_release);
 				return true;
 			}
-			
+
 			// No room in the old block index, try to allocate another one!
-			if (allocMode == CannotAlloc || !new_block_index()) {
+			MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
 				return false;
 			}
-			localBlockIndex = blockIndex.load(std::memory_order_relaxed);
-			newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);
-			idxEntry = localBlockIndex->index[newTail];
-			assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE);
-			idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
-			localBlockIndex->tail.store(newTail, std::memory_order_release);
-			return true;
+			else if (!new_block_index()) {
+				return false;
+			}
+			else {
+				localBlockIndex = blockIndex.load(std::memory_order_relaxed);
+				newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);
+				idxEntry = localBlockIndex->index[newTail];
+				assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE);
+				idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
+				localBlockIndex->tail.store(newTail, std::memory_order_release);
+				return true;
+			}
 		}
-		
+
 		inline void rewind_block_index_tail()
 		{
 			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
 			localBlockIndex->tail.store((localBlockIndex->tail.load(std::memory_order_relaxed) - 1) & (localBlockIndex->capacity - 1), std::memory_order_relaxed);
 		}
-		
+
 		inline BlockIndexEntry* get_block_index_entry_for_index(index_t index) const
 		{
 			BlockIndexHeader* localBlockIndex;
 			auto idx = get_block_index_index_for_index(index, localBlockIndex);
 			return localBlockIndex->index[idx];
 		}
-		
+
 		inline size_t get_block_index_index_for_index(index_t index, BlockIndexHeader*& localBlockIndex) const
 		{
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
 			debug::DebugLock lock(mutex);
 #endif
 			index &= ~static_cast<index_t>(BLOCK_SIZE - 1);
@@ -2874,7 +2947,7 @@ private:
 			assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == index && localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != nullptr);
 			return idx;
 		}
-		
+
 		bool new_block_index()
 		{
 			auto prev = blockIndex.load(std::memory_order_relaxed);
@@ -2887,7 +2960,7 @@ private:
 			if (raw == nullptr) {
 				return false;
 			}
-			
+
 			auto header = new (raw) BlockIndexHeader;
 			auto entries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(raw + sizeof(BlockIndexHeader)));
 			auto index = reinterpret_cast<BlockIndexEntry**>(details::align_for<BlockIndexEntry*>(reinterpret_cast<char*>(entries) + sizeof(BlockIndexEntry) * entryCount));
@@ -2911,14 +2984,14 @@ private:
 			header->index = index;
 			header->capacity = nextBlockIndexCapacity;
 			header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), std::memory_order_relaxed);
-			
+
 			blockIndex.store(header, std::memory_order_release);
-			
+
 			nextBlockIndexCapacity <<= 1;
-			
+
 			return true;
 		}
-		
+
 	private:
 		size_t nextBlockIndexCapacity;
 		std::atomic<BlockIndexHeader*> blockIndex;
@@ -2928,26 +3001,26 @@ private:
 		details::ThreadExitListener threadExitListener;
 	private:
 #endif
-		
+
 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
 	public:
 		ImplicitProducer* nextImplicitProducer;
 	private:
 #endif
 
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
 		mutable debug::DebugMutex mutex;
 #endif
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
 		friend struct MemStats;
 #endif
 	};
-	
-	
+
+
 	//////////////////////////////////
 	// Block pool manipulation
 	//////////////////////////////////
-	
+
 	void populate_initial_block_list(size_t blockCount)
 	{
 		initialBlockPoolSize = blockCount;
@@ -2955,7 +3028,7 @@ private:
 			initialBlockPool = nullptr;
 			return;
 		}
-		
+
 		initialBlockPool = create_array<Block>(blockCount);
 		if (initialBlockPool == nullptr) {
 			initialBlockPoolSize = 0;
@@ -2964,26 +3037,26 @@ private:
 			initialBlockPool[i].dynamicallyAllocated = false;
 		}
 	}
-	
+
 	inline Block* try_get_block_from_initial_pool()
 	{
 		if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= initialBlockPoolSize) {
 			return nullptr;
 		}
-		
+
 		auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed);
-		
+
 		return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr;
 	}
-	
+
 	inline void add_block_to_free_list(Block* block)
 	{
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
 		block->owner = nullptr;
 #endif
 		freeList.add(block);
 	}
-	
+
 	inline void add_blocks_to_free_list(Block* block)
 	{
 		while (block != nullptr) {
@@ -2992,12 +3065,12 @@ private:
 			block = next;
 		}
 	}
-	
+
 	inline Block* try_get_block_from_free_list()
 	{
 		return freeList.try_get();
 	}
-	
+
 	// Gets a free block from one of the memory pools, or allocates a new one (if applicable)
 	template<AllocationMode canAlloc>
 	Block* requisition_block()
@@ -3006,21 +3079,22 @@ private:
 		if (block != nullptr) {
 			return block;
 		}
-		
+
 		block = try_get_block_from_free_list();
 		if (block != nullptr) {
 			return block;
 		}
-		
-		if (canAlloc == CanAlloc) {
+
+		MOODYCAMEL_CONSTEXPR_IF (canAlloc == CanAlloc) {
 			return create<Block>();
 		}
-		
-		return nullptr;
+		else {
+			return nullptr;
+		}
 	}
-	
 
-#if MCDBGQ_TRACKMEM
+
+#ifdef MCDBGQ_TRACKMEM
 	public:
 		struct MemStats {
 			size_t allocatedBlocks;
@@ -3035,28 +3109,28 @@ private:
 			size_t queueClassBytes;
 			size_t implicitBlockIndexBytes;
 			size_t explicitBlockIndexBytes;
-			
+
 			friend class ConcurrentQueue;
-			
+
 		private:
 			static MemStats getFor(ConcurrentQueue* q)
 			{
 				MemStats stats = { 0 };
-				
+
 				stats.elementsEnqueued = q->size_approx();
-			
+
 				auto block = q->freeList.head_unsafe();
 				while (block != nullptr) {
 					++stats.allocatedBlocks;
 					++stats.freeBlocks;
 					block = block->freeListNext.load(std::memory_order_relaxed);
 				}
-				
+
 				for (auto ptr = q->producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
 					bool implicit = dynamic_cast<ImplicitProducer*>(ptr) != nullptr;
 					stats.implicitProducers += implicit ? 1 : 0;
 					stats.explicitProducers += implicit ? 0 : 1;
-					
+
 					if (implicit) {
 						auto prod = static_cast<ImplicitProducer*>(ptr);
 						stats.queueClassBytes += sizeof(ImplicitProducer);
@@ -3104,18 +3178,18 @@ private:
 						}
 					}
 				}
-				
+
 				auto freeOnInitialPool = q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= q->initialBlockPoolSize ? 0 : q->initialBlockPoolSize - q->initialBlockPoolIndex.load(std::memory_order_relaxed);
 				stats.allocatedBlocks += freeOnInitialPool;
 				stats.freeBlocks += freeOnInitialPool;
-				
+
 				stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks;
 				stats.queueClassBytes += sizeof(ConcurrentQueue);
-				
+
 				return stats;
 			}
 		};
-		
+
 		// For debugging only. Not thread-safe.
 		MemStats getMemStats()
 		{
@@ -3124,21 +3198,21 @@ private:
 	private:
 		friend struct MemStats;
 #endif
-	
-	
+
+
 	//////////////////////////////////
 	// Producer list manipulation
-	//////////////////////////////////	
-	
+	//////////////////////////////////
+
 	ProducerBase* recycle_or_create_producer(bool isExplicit)
 	{
 		bool recycled;
 		return recycle_or_create_producer(isExplicit, recycled);
 	}
-	
+
 	ProducerBase* recycle_or_create_producer(bool isExplicit, bool& recycled)
 	{
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
 		debug::DebugLock lock(implicitProdMutex);
 #endif
 		// Try to re-use one first
@@ -3152,26 +3226,26 @@ private:
 				}
 			}
 		}
-		
+
 		recycled = false;
 		return add_producer(isExplicit ? static_cast<ProducerBase*>(create<ExplicitProducer>(this)) : create<ImplicitProducer>(this));
 	}
-	
+
 	ProducerBase* add_producer(ProducerBase* producer)
 	{
 		// Handle failed memory allocation
 		if (producer == nullptr) {
 			return nullptr;
 		}
-		
+
 		producerCount.fetch_add(1, std::memory_order_relaxed);
-		
+
 		// Add it to the lock-free list
 		auto prevTail = producerListTail.load(std::memory_order_relaxed);
 		do {
 			producer->next = prevTail;
 		} while (!producerListTail.compare_exchange_weak(prevTail, producer, std::memory_order_release, std::memory_order_relaxed));
-		
+
 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
 		if (producer->isExplicit) {
 			auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed);
@@ -3186,10 +3260,10 @@ private:
 			} while (!implicitProducers.compare_exchange_weak(prevTailImplicit, static_cast<ImplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));
 		}
 #endif
-		
+
 		return producer;
 	}
-	
+
 	void reown_producers()
 	{
 		// After another instance is moved-into/swapped-with this one, all the
@@ -3199,31 +3273,31 @@ private:
 			ptr->parent = this;
 		}
 	}
-	
-	
+
+
 	//////////////////////////////////
 	// Implicit producer hash
 	//////////////////////////////////
-	
+
 	struct ImplicitProducerKVP
 	{
 		std::atomic<details::thread_id_t> key;
 		ImplicitProducer* value;		// No need for atomicity since it's only read by the thread that sets it in the first place
-		
+
 		ImplicitProducerKVP() : value(nullptr) { }
-		
+
 		ImplicitProducerKVP(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT
 		{
 			key.store(other.key.load(std::memory_order_relaxed), std::memory_order_relaxed);
 			value = other.value;
 		}
-		
+
 		inline ImplicitProducerKVP& operator=(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT
 		{
 			swap(other);
 			return *this;
 		}
-		
+
 		inline void swap(ImplicitProducerKVP& other) MOODYCAMEL_NOEXCEPT
 		{
 			if (this != &other) {
@@ -3232,93 +3306,100 @@ private:
 			}
 		}
 	};
-	
+
 	template<typename XT, typename XTraits>
 	friend void moodycamel::swap(typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&, typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&) MOODYCAMEL_NOEXCEPT;
-	
+
 	struct ImplicitProducerHash
 	{
 		size_t capacity;
 		ImplicitProducerKVP* entries;
 		ImplicitProducerHash* prev;
 	};
-	
+
 	inline void populate_initial_implicit_producer_hash()
 	{
-		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return;
-		
-		implicitProducerHashCount.store(0, std::memory_order_relaxed);
-		auto hash = &initialImplicitProducerHash;
-		hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
-		hash->entries = &initialImplicitProducerHashEntries[0];
-		for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) {
-			initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) {
+			return;
+		}
+		else {
+			implicitProducerHashCount.store(0, std::memory_order_relaxed);
+			auto hash = &initialImplicitProducerHash;
+			hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
+			hash->entries = &initialImplicitProducerHashEntries[0];
+			for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) {
+				initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
+			}
+			hash->prev = nullptr;
+			implicitProducerHash.store(hash, std::memory_order_relaxed);
 		}
-		hash->prev = nullptr;
-		implicitProducerHash.store(hash, std::memory_order_relaxed);
 	}
-	
+
 	void swap_implicit_producer_hashes(ConcurrentQueue& other)
 	{
-		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return;
-		
-		// Swap (assumes our implicit producer hash is initialized)
-		initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries);
-		initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0];
-		other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0];
-		
-		details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount);
-		
-		details::swap_relaxed(implicitProducerHash, other.implicitProducerHash);
-		if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) {
-			implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed);
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) {
+			return;
 		}
 		else {
-			ImplicitProducerHash* hash;
-			for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) {
-				continue;
+			// Swap (assumes our implicit producer hash is initialized)
+			initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries);
+			initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0];
+			other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0];
+
+			details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount);
+
+			details::swap_relaxed(implicitProducerHash, other.implicitProducerHash);
+			if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) {
+				implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed);
 			}
-			hash->prev = &initialImplicitProducerHash;
-		}
-		if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) {
-			other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed);
-		}
-		else {
-			ImplicitProducerHash* hash;
-			for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; hash = hash->prev) {
-				continue;
+			else {
+				ImplicitProducerHash* hash;
+				for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) {
+					continue;
+				}
+				hash->prev = &initialImplicitProducerHash;
+			}
+			if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) {
+				other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed);
+			}
+			else {
+				ImplicitProducerHash* hash;
+				for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; hash = hash->prev) {
+					continue;
+				}
+				hash->prev = &other.initialImplicitProducerHash;
 			}
-			hash->prev = &other.initialImplicitProducerHash;
 		}
 	}
-	
+
 	// Only fails (returns nullptr) if memory allocation fails
 	ImplicitProducer* get_or_add_implicit_producer()
 	{
 		// Note that since the data is essentially thread-local (key is thread ID),
 		// there's a reduced need for fences (memory ordering is already consistent
 		// for any individual thread), except for the current table itself.
-		
+
 		// Start by looking for the thread ID in the current and all previous hash tables.
 		// If it's not found, it must not be in there yet, since this same thread would
 		// have added it previously to one of the tables that we traversed.
-		
+
 		// Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table
-		
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
 		debug::DebugLock lock(implicitProdMutex);
 #endif
-		
+
 		auto id = details::thread_id();
 		auto hashedId = details::hash_thread_id(id);
-		
+
 		auto mainHash = implicitProducerHash.load(std::memory_order_acquire);
+		assert(mainHash != nullptr);  // silence clang-tidy and MSVC warnings (hash cannot be null)
 		for (auto hash = mainHash; hash != nullptr; hash = hash->prev) {
 			// Look for the id in this hash
 			auto index = hashedId;
 			while (true) {		// Not an infinite loop because at least one slot is free in the hash table
 				index &= hash->capacity - 1;
-				
+
 				auto probedKey = hash->entries[index].key.load(std::memory_order_relaxed);
 				if (probedKey == id) {
 					// Found it! If we had to search several hashes deep, though, we should lazily add it
@@ -3346,7 +3427,7 @@ private:
 							++index;
 						}
 					}
-					
+
 					return value;
 				}
 				if (probedKey == details::invalid_thread_id) {
@@ -3355,10 +3436,11 @@ private:
 				++index;
 			}
 		}
-		
+
 		// Insert!
 		auto newCount = 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed);
 		while (true) {
+			// NOLINTNEXTLINE(clang-analyzer-core.NullDereference)
 			if (newCount >= (mainHash->capacity >> 1) && !implicitProducerHashResizeInProgress.test_and_set(std::memory_order_acquire)) {
 				// We've acquired the resize lock, try to allocate a bigger hash table.
 				// Note the acquire fence synchronizes with the release fence at the end of this block, and hence when
@@ -3377,9 +3459,9 @@ private:
 						implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
 						return nullptr;
 					}
-					
+
 					auto newHash = new (raw) ImplicitProducerHash;
-					newHash->capacity = newCapacity;
+					newHash->capacity = static_cast<size_t>(newCapacity);
 					newHash->entries = reinterpret_cast<ImplicitProducerKVP*>(details::align_for<ImplicitProducerKVP>(raw + sizeof(ImplicitProducerHash)));
 					for (size_t i = 0; i != newCapacity; ++i) {
 						new (newHash->entries + i) ImplicitProducerKVP;
@@ -3394,7 +3476,7 @@ private:
 					implicitProducerHashResizeInProgress.clear(std::memory_order_release);
 				}
 			}
-			
+
 			// If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table
 			// to finish being allocated by another thread (and if we just finished allocating above, the condition will
 			// always be true)
@@ -3408,18 +3490,18 @@ private:
 				if (recycled) {
 					implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
 				}
-				
+
 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
 				producer->threadExitListener.callback = &ConcurrentQueue::implicit_producer_thread_exited_callback;
 				producer->threadExitListener.userData = producer;
 				details::ThreadExitNotifier::subscribe(&producer->threadExitListener);
 #endif
-				
+
 				auto index = hashedId;
 				while (true) {
 					index &= mainHash->capacity - 1;
 					auto probedKey = mainHash->entries[index].key.load(std::memory_order_relaxed);
-					
+
 					auto empty = details::invalid_thread_id;
 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
 					auto reusable = details::invalid_thread_id2;
@@ -3435,22 +3517,22 @@ private:
 				}
 				return producer;
 			}
-			
+
 			// Hmm, the old hash is quite full and somebody else is busy allocating a new one.
 			// We need to wait for the allocating thread to finish (if it succeeds, we add, if not,
 			// we try to allocate ourselves).
 			mainHash = implicitProducerHash.load(std::memory_order_acquire);
 		}
 	}
-	
+
 #ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
 	void implicit_producer_thread_exited(ImplicitProducer* producer)
 	{
 		// Remove from thread exit listeners
 		details::ThreadExitNotifier::unsubscribe(&producer->threadExitListener);
-		
+
 		// Remove from hash
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
 		debug::DebugLock lock(implicitProdMutex);
 #endif
 		auto hash = implicitProducerHash.load(std::memory_order_acquire);
@@ -3458,7 +3540,7 @@ private:
 		auto id = details::thread_id();
 		auto hashedId = details::hash_thread_id(id);
 		details::thread_id_t probedKey;
-		
+
 		// We need to traverse all the hashes just in case other threads aren't on the current one yet and are
 		// trying to add an entry thinking there's a free slot (because they reused a producer)
 		for (; hash != nullptr; hash = hash->prev) {
@@ -3473,11 +3555,11 @@ private:
 				++index;
 			} while (probedKey != details::invalid_thread_id);		// Can happen if the hash has changed but we weren't put back in it yet, or if we weren't added to this hash in the first place
 		}
-		
+
 		// Mark the queue as being recyclable
 		producer->inactive.store(true, std::memory_order_release);
 	}
-	
+
 	static void implicit_producer_thread_exited_callback(void* userData)
 	{
 		auto producer = static_cast<ImplicitProducer*>(userData);
@@ -3485,88 +3567,109 @@ private:
 		queue->implicit_producer_thread_exited(producer);
 	}
 #endif
-	
+
 	//////////////////////////////////
 	// Utility functions
 	//////////////////////////////////
-	
+
+	template<typename TAlign>
+	static inline void* aligned_malloc(size_t size)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (std::alignment_of<TAlign>::value <= std::alignment_of<details::max_align_t>::value)
+			return (Traits::malloc)(size);
+		else {
+			size_t alignment = std::alignment_of<TAlign>::value;
+			void* raw = (Traits::malloc)(size + alignment - 1 + sizeof(void*));
+			if (!raw)
+				return nullptr;
+			char* ptr = details::align_for<TAlign>(reinterpret_cast<char*>(raw) + sizeof(void*));
+			*(reinterpret_cast<void**>(ptr) - 1) = raw;
+			return ptr;
+		}
+	}
+
+	template<typename TAlign>
+	static inline void aligned_free(void* ptr)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (std::alignment_of<TAlign>::value <= std::alignment_of<details::max_align_t>::value)
+			return (Traits::free)(ptr);
+		else
+			(Traits::free)(ptr ? *(reinterpret_cast<void**>(ptr) - 1) : nullptr);
+	}
+
 	template<typename U>
 	static inline U* create_array(size_t count)
 	{
 		assert(count > 0);
-		auto p = static_cast<U*>((Traits::malloc)(sizeof(U) * count));
-		if (p == nullptr) {
+		U* p = static_cast<U*>(aligned_malloc<U>(sizeof(U) * count));
+		if (p == nullptr)
 			return nullptr;
-		}
-		
-		for (size_t i = 0; i != count; ++i) {
+
+		for (size_t i = 0; i != count; ++i)
 			new (p + i) U();
-		}
 		return p;
 	}
-	
+
 	template<typename U>
 	static inline void destroy_array(U* p, size_t count)
 	{
 		if (p != nullptr) {
 			assert(count > 0);
-			for (size_t i = count; i != 0; ) {
+			for (size_t i = count; i != 0; )
 				(p + --i)->~U();
-			}
-			(Traits::free)(p);
 		}
+		aligned_free<U>(p);
 	}
-	
+
 	template<typename U>
 	static inline U* create()
 	{
-		auto p = (Traits::malloc)(sizeof(U));
+		void* p = aligned_malloc<U>(sizeof(U));
 		return p != nullptr ? new (p) U : nullptr;
 	}
-	
+
 	template<typename U, typename A1>
 	static inline U* create(A1&& a1)
 	{
-		auto p = (Traits::malloc)(sizeof(U));
+		void* p = aligned_malloc<U>(sizeof(U));
 		return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;
 	}
-	
+
 	template<typename U>
 	static inline void destroy(U* p)
 	{
-		if (p != nullptr) {
+		if (p != nullptr)
 			p->~U();
-		}
-		(Traits::free)(p);
+		aligned_free<U>(p);
 	}
 
 private:
 	std::atomic<ProducerBase*> producerListTail;
 	std::atomic<std::uint32_t> producerCount;
-	
+
 	std::atomic<size_t> initialBlockPoolIndex;
 	Block* initialBlockPool;
 	size_t initialBlockPoolSize;
-	
-#if !MCDBGQ_USEDEBUGFREELIST
+
+#ifndef MCDBGQ_USEDEBUGFREELIST
 	FreeList<Block> freeList;
 #else
 	debug::DebugFreeList<Block> freeList;
 #endif
-	
+
 	std::atomic<ImplicitProducerHash*> implicitProducerHash;
 	std::atomic<size_t> implicitProducerHashCount;		// Number of slots logically used
 	ImplicitProducerHash initialImplicitProducerHash;
 	std::array<ImplicitProducerKVP, INITIAL_IMPLICIT_PRODUCER_HASH_SIZE> initialImplicitProducerHashEntries;
 	std::atomic_flag implicitProducerHashResizeInProgress;
-	
+
 	std::atomic<std::uint32_t> nextExplicitConsumerId;
 	std::atomic<std::uint32_t> globalExplicitConsumerOffset;
-	
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
 	debug::DebugMutex implicitProdMutex;
 #endif
-	
+
 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
 	std::atomic<ExplicitProducer*> explicitProducers;
 	std::atomic<ImplicitProducer*> implicitProducers;
@@ -3597,7 +3700,7 @@ ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits>& queue)
 	: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
 {
 	initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
-	lastKnownGlobalOffset = -1;
+	lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
 }
 
 template<typename T, typename Traits>
@@ -3605,7 +3708,7 @@ ConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, Traits>& queue)
 	: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
 {
 	initialOffset = reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
-	lastKnownGlobalOffset = -1;
+	lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
 }
 
 template<typename T, typename Traits>
@@ -3632,6 +3735,10 @@ inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, ty
 
 }
 
-#if defined(__GNUC__)
+#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
+#pragma warning(pop)
+#endif
+
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
 #pragma GCC diagnostic pop
 #endif