diff --git a/libs/common/Numerics/TensorPrimitives.IBinaryOperator.cs b/libs/common/Numerics/TensorPrimitives.IBinaryOperator.cs
new file mode 100644
index 0000000000..3c94b67b4a
--- /dev/null
+++ b/libs/common/Numerics/TensorPrimitives.IBinaryOperator.cs
@@ -0,0 +1,49 @@
+﻿// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+using System.Diagnostics;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+
+namespace Garnet.common.Numerics
+{
+    public static unsafe partial class TensorPrimitives
+    {
+        /// <summary>x &amp; y</summary>
+        public readonly struct BitwiseAndOperator<T> : IBinaryOperator<T> where T : IBitwiseOperators<T, T, T>
+        {
+            public static T Invoke(T x, T y) => x & y;
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => x & y;
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => x & y;
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => x & y;
+        }
+
+        /// <summary>x | y</summary>
+        public readonly struct BitwiseOrOperator<T> : IBinaryOperator<T> where T : IBitwiseOperators<T, T, T>
+        {
+            public static T Invoke(T x, T y) => x | y;
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => x | y;
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => x | y;
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => x | y;
+        }
+
+        /// <summary>x ^ y</summary>
+        public readonly struct BitwiseXorOperator<T> : IBinaryOperator<T> where T : IBitwiseOperators<T, T, T>
+        {
+            public static T Invoke(T x, T y) => x ^ y;
+            public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => x ^ y;
+            public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => x ^ y;
+            public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => x ^ y;
+        }
+
+        /// <summary>Operator that takes two input values and returns a single value.</summary>
+        public interface IBinaryOperator<T>
+        {
+            static abstract T Invoke(T x, T y);
+            static abstract Vector128<T> Invoke(Vector128<T> x, Vector128<T> y);
+            static abstract Vector256<T> Invoke(Vector256<T> x, Vector256<T> y);
+            static abstract Vector512<T> Invoke(Vector512<T> x, Vector512<T> y);
+        }
+    }
+}
\ No newline at end of file
diff --git a/libs/server/Resp/Bitmap/BitmapManagerBitOp.cs b/libs/server/Resp/Bitmap/BitmapManagerBitOp.cs
index 6917e25993..4f6a3d8b33 100644
--- a/libs/server/Resp/Bitmap/BitmapManagerBitOp.cs
+++ b/libs/server/Resp/Bitmap/BitmapManagerBitOp.cs
@@ -2,12 +2,16 @@
 // Licensed under the MIT license.
 
 using System.Runtime.Intrinsics;
-using System.Runtime.Intrinsics.X86;
 using Garnet.common;
-
+using static Garnet.common.Numerics.TensorPrimitives;
 
 namespace Garnet.server
 {
+    // TODO: Guard Vector### logic behind IsSupported & IsHardwareAccelerated
+    // TODO: Add Vector512 & Vector128 paths atleast
+    // TODO: Get rid of "IBinaryOperator<ulong>" scalar logic?
+    // FÒLLOW-UP: Non-temporal stores after sizes larger than 256KB (like in TensorPrimitives)
+    // FÒLLOW-UP: Investigate alignment -> overlapping & jump-table (like in TensorPrimitives)
     public unsafe partial class BitmapManager
     {
         /// <summary>
@@ -26,16 +30,16 @@ public static bool BitOpMainUnsafeMultiKey(byte* dstPtr, int dstLen, byte** srcS
             switch (bitop)
             {
                 case (byte)BitmapOperation.NOT:
-                    __bitop_multikey_simdX256_not(dstPtr, dstLen, srcStartPtrs[0], srcEndPtrs[0] - srcStartPtrs[0]);
+                    InvokeSingleKeyBitwiseNot(dstPtr, dstLen, srcStartPtrs[0], srcEndPtrs[0] - srcStartPtrs[0]);
                     break;
                 case (byte)BitmapOperation.AND:
-                    __bitop_multikey_simdX256_and(dstPtr, dstLen, srcStartPtrs, srcEndPtrs, srcKeyCount, minSize);
+                    InvokeMultiKeyBitwise<BitwiseAndOperator<byte>, BitwiseAndOperator<ulong>>(dstPtr, dstLen, srcStartPtrs, srcEndPtrs, srcKeyCount, minSize);
                     break;
                 case (byte)BitmapOperation.OR:
-                    __bitop_multikey_simdX256_or(dstPtr, dstLen, srcStartPtrs, srcEndPtrs, srcKeyCount, minSize);
+                    InvokeMultiKeyBitwise<BitwiseOrOperator<byte>, BitwiseOrOperator<ulong>>(dstPtr, dstLen, srcStartPtrs, srcEndPtrs, srcKeyCount, minSize);
                     break;
                 case (byte)BitmapOperation.XOR:
-                    __bitop_multikey_simdX256_xor(dstPtr, dstLen, srcStartPtrs, srcEndPtrs, srcKeyCount, minSize);
+                    InvokeMultiKeyBitwise<BitwiseXorOperator<byte>, BitwiseXorOperator<ulong>>(dstPtr, dstLen, srcStartPtrs, srcEndPtrs, srcKeyCount, minSize);
                     break;
                 default:
                     throw new GarnetException("Unsupported BitOp command");
@@ -44,285 +48,87 @@ public static bool BitOpMainUnsafeMultiKey(byte* dstPtr, int dstLen, byte** srcS
         }
 
         /// <summary>
-        /// Negation bitop implementation using 256-wide SIMD registers.
+        /// Invokes unary bitwise-NOT operation for single source key using hardware accelerated SIMD intrinsics when possible.
         /// </summary>
         /// <param name="dstPtr">Output buffer to write BitOp result</param>
         /// <param name="dstLen">Output buffer length.</param>
         /// <param name="srcBitmap">Pointer to source bitmap.</param>
         /// <param name="srcLen">Source bitmap length.</param>
-        private static void __bitop_multikey_simdX256_not(byte* dstPtr, long dstLen, byte* srcBitmap, long srcLen)
+        private static void InvokeSingleKeyBitwiseNot(byte* dstPtr, long dstLen, byte* srcBitmap, long srcLen)
         {
-            int batchSize = 8 * 32;
             long slen = srcLen;
-            long stail = slen & (batchSize - 1);
+            long remainder = slen & ((Vector256<byte>.Count * 8) - 1);
 
             //iterate using srcBitmap because always dstLen >= srcLen 
             byte* srcCurr = srcBitmap;
-            byte* srcEnd = srcCurr + (slen - stail);
+            byte* srcEnd = srcCurr + (slen - remainder);
             byte* dstCurr = dstPtr;
 
-            #region 8x32
-            while (srcCurr < srcEnd)
-            {
-                Vector256<byte> d00 = Avx.LoadVector256(srcCurr);
-                Vector256<byte> d01 = Avx.LoadVector256(srcCurr + 32);
-                Vector256<byte> d02 = Avx.LoadVector256(srcCurr + 64);
-                Vector256<byte> d03 = Avx.LoadVector256(srcCurr + 96);
-                Vector256<byte> d04 = Avx.LoadVector256(srcCurr + 128);
-                Vector256<byte> d05 = Avx.LoadVector256(srcCurr + 160);
-                Vector256<byte> d06 = Avx.LoadVector256(srcCurr + 192);
-                Vector256<byte> d07 = Avx.LoadVector256(srcCurr + 224);
-
-                Avx.Store(dstCurr, Avx2.Xor(d00, Vector256<byte>.AllBitsSet));
-                Avx.Store(dstCurr + 32, Avx2.Xor(d01, Vector256<byte>.AllBitsSet));
-                Avx.Store(dstCurr + 64, Avx2.Xor(d02, Vector256<byte>.AllBitsSet));
-                Avx.Store(dstCurr + 96, Avx2.Xor(d03, Vector256<byte>.AllBitsSet));
-                Avx.Store(dstCurr + 128, Avx2.Xor(d04, Vector256<byte>.AllBitsSet));
-                Avx.Store(dstCurr + 160, Avx2.Xor(d05, Vector256<byte>.AllBitsSet));
-                Avx.Store(dstCurr + 192, Avx2.Xor(d06, Vector256<byte>.AllBitsSet));
-                Avx.Store(dstCurr + 224, Avx2.Xor(d07, Vector256<byte>.AllBitsSet));
-
-                srcCurr += batchSize;
-                dstCurr += batchSize;
-            }
-            if (stail == 0) return;
-            #endregion
-
-            #region 1x32
-            slen = stail;
-            batchSize = 1 * 32;
-            stail = slen & (batchSize - 1);
-            srcEnd = srcCurr + (slen - stail);
             while (srcCurr < srcEnd)
             {
-                Vector256<byte> d00 = Avx.LoadVector256(srcCurr);
-                Avx.Store(dstCurr, Avx2.Xor(d00, Vector256<byte>.AllBitsSet));
-                srcCurr += batchSize;
-                dstCurr += batchSize;
+                var d00 = Vector256.Load(srcCurr);
+                var d01 = Vector256.Load(srcCurr + Vector256<byte>.Count);
+                var d02 = Vector256.Load(srcCurr + (Vector256<byte>.Count * 2));
+                var d03 = Vector256.Load(srcCurr + (Vector256<byte>.Count * 3));
+                var d04 = Vector256.Load(srcCurr + (Vector256<byte>.Count * 4));
+                var d05 = Vector256.Load(srcCurr + (Vector256<byte>.Count * 5));
+                var d06 = Vector256.Load(srcCurr + (Vector256<byte>.Count * 6));
+                var d07 = Vector256.Load(srcCurr + (Vector256<byte>.Count * 7));
+
+                Vector256.Store(~d00, dstCurr);
+                Vector256.Store(~d01, dstCurr + Vector256<byte>.Count);
+                Vector256.Store(~d02, dstCurr + Vector256<byte>.Count * 2);
+                Vector256.Store(~d03, dstCurr + Vector256<byte>.Count * 3);
+                Vector256.Store(~d04, dstCurr + Vector256<byte>.Count * 4);
+                Vector256.Store(~d05, dstCurr + Vector256<byte>.Count * 5);
+                Vector256.Store(~d06, dstCurr + Vector256<byte>.Count * 6);
+                Vector256.Store(~d07, dstCurr + Vector256<byte>.Count * 7);
+
+                srcCurr += Vector256<byte>.Count * 8;
+                dstCurr += Vector256<byte>.Count * 8;
             }
-            if (stail == 0) return;
-            #endregion
-
-            #region 4x8
-            slen = stail;
-            batchSize = 4 * 8;
-            stail = slen & (batchSize - 1);
-            srcEnd = srcCurr + (slen - stail);
+            if (remainder == 0) return;
+
+            slen = remainder;
+            remainder = slen & (Vector256<byte>.Count - 1);
+            srcEnd = srcCurr + (slen - remainder);
             while (srcCurr < srcEnd)
             {
-                long d00 = *(long*)(srcCurr);
-                long d01 = *(long*)(srcCurr + 8);
-                long d02 = *(long*)(srcCurr + 16);
-                long d03 = *(long*)(srcCurr + 24);
-
-                *(long*)dstCurr = ~d00;
-                *(long*)(dstCurr + 8) = ~d01;
-                *(long*)(dstCurr + 16) = ~d02;
-                *(long*)(dstCurr + 24) = ~d03;
-
-                srcCurr += batchSize;
-                dstCurr += batchSize;
+                Vector256.Store(~Vector256.Load(srcCurr), dstCurr);
+
+                srcCurr += Vector256<byte>.Count;
+                dstCurr += Vector256<byte>.Count;
             }
-            if (stail == 0) return;
-            #endregion
-
-            #region 1x8
-            slen = stail;
-            batchSize = 8;
-            stail = slen & (batchSize - 1);
-            srcEnd = srcCurr + (slen - stail);
+            if (remainder == 0) return;
+
+            slen = remainder;
+            remainder = slen & (sizeof(ulong) - 1);
+            srcEnd = srcCurr + (slen - remainder);
             while (srcCurr < srcEnd)
             {
-                long d00 = *(long*)(srcCurr);
+                *(ulong*)dstCurr = ~*(ulong*)srcCurr;
 
-                *(long*)dstCurr = ~d00;
-
-                srcCurr += batchSize;
-                dstCurr += batchSize;
+                srcCurr += sizeof(ulong);
+                dstCurr += sizeof(ulong);
             }
-            if (stail == 0) return;
-            #endregion
-
-            if (stail >= 7) dstCurr[6] = (byte)(~srcCurr[6]);
-            if (stail >= 6) dstCurr[5] = (byte)(~srcCurr[5]);
-            if (stail >= 5) dstCurr[4] = (byte)(~srcCurr[4]);
-            if (stail >= 4) dstCurr[3] = (byte)(~srcCurr[3]);
-            if (stail >= 3) dstCurr[2] = (byte)(~srcCurr[2]);
-            if (stail >= 2) dstCurr[1] = (byte)(~srcCurr[1]);
-            if (stail >= 1) dstCurr[0] = (byte)(~srcCurr[0]);
+            if (remainder == 0) return;
+
+            if (remainder >= 7) dstCurr[6] = (byte)~srcCurr[6];
+            if (remainder >= 6) dstCurr[5] = (byte)~srcCurr[5];
+            if (remainder >= 5) dstCurr[4] = (byte)~srcCurr[4];
+            if (remainder >= 4) dstCurr[3] = (byte)~srcCurr[3];
+            if (remainder >= 3) dstCurr[2] = (byte)~srcCurr[2];
+            if (remainder >= 2) dstCurr[1] = (byte)~srcCurr[1];
+            if (remainder >= 1) dstCurr[0] = (byte)~srcCurr[0];
         }
 
-        /// <summary>
-        /// AND bitop implementation using 256-wide SIMD registers.
-        /// </summary>
-        /// <param name="dstPtr">Output buffer to write BitOp result</param>
-        /// <param name="dstLen">Output buffer length.</param>
-        /// <param name="srcStartPtrs">Pointer to start of bitmap sources.</param>
-        /// <param name="srcEndPtrs">Pointer to end of bitmap sources</param>
-        /// <param name="srcKeyCount">Number of source keys.</param>
-        /// <param name="minSize">Minimum size of source bitmaps.</param>
-        private static void __bitop_multikey_simdX256_and(byte* dstPtr, int dstLen, byte** srcStartPtrs, byte** srcEndPtrs, int srcKeyCount, int minSize)
+        public static void GenericCodeGenDebugAid(int dstLen, int srcKeyCount, int minSize)
         {
-            int batchSize = 8 * 32;
-            long slen = minSize;
-            long stail = slen & (batchSize - 1);
-
-            byte* dstCurr = dstPtr;
-            byte* dstEnd = dstCurr + (slen - stail);
-
-            #region 8x32
-            while (dstCurr < dstEnd)
-            {
-                Vector256<byte> d00 = Avx.LoadVector256(srcStartPtrs[0]);
-                Vector256<byte> d01 = Avx.LoadVector256(srcStartPtrs[0] + 32);
-                Vector256<byte> d02 = Avx.LoadVector256(srcStartPtrs[0] + 64);
-                Vector256<byte> d03 = Avx.LoadVector256(srcStartPtrs[0] + 96);
-                Vector256<byte> d04 = Avx.LoadVector256(srcStartPtrs[0] + 128);
-                Vector256<byte> d05 = Avx.LoadVector256(srcStartPtrs[0] + 160);
-                Vector256<byte> d06 = Avx.LoadVector256(srcStartPtrs[0] + 192);
-                Vector256<byte> d07 = Avx.LoadVector256(srcStartPtrs[0] + 224);
-                srcStartPtrs[0] += batchSize;
-                for (int i = 1; i < srcKeyCount; i++)
-                {
-                    Vector256<byte> s00 = Avx.LoadVector256(srcStartPtrs[i]);
-                    Vector256<byte> s01 = Avx.LoadVector256(srcStartPtrs[i] + 32);
-                    Vector256<byte> s02 = Avx.LoadVector256(srcStartPtrs[i] + 64);
-                    Vector256<byte> s03 = Avx.LoadVector256(srcStartPtrs[i] + 96);
-                    Vector256<byte> s04 = Avx.LoadVector256(srcStartPtrs[i] + 128);
-                    Vector256<byte> s05 = Avx.LoadVector256(srcStartPtrs[i] + 160);
-                    Vector256<byte> s06 = Avx.LoadVector256(srcStartPtrs[i] + 192);
-                    Vector256<byte> s07 = Avx.LoadVector256(srcStartPtrs[i] + 224);
-
-                    d00 = Avx2.And(d00, s00);
-                    d01 = Avx2.And(d01, s01);
-                    d02 = Avx2.And(d02, s02);
-                    d03 = Avx2.And(d03, s03);
-                    d04 = Avx2.And(d04, s04);
-                    d05 = Avx2.And(d05, s05);
-                    d06 = Avx2.And(d06, s06);
-                    d07 = Avx2.And(d07, s07);
-                    srcStartPtrs[i] += batchSize;
-                }
-
-                Avx.Store(dstCurr, d00);
-                Avx.Store(dstCurr + 32, d01);
-                Avx.Store(dstCurr + 64, d02);
-                Avx.Store(dstCurr + 96, d03);
-                Avx.Store(dstCurr + 128, d04);
-                Avx.Store(dstCurr + 160, d05);
-                Avx.Store(dstCurr + 192, d06);
-                Avx.Store(dstCurr + 224, d07);
-
-                dstCurr += batchSize;
-            }
-            if (stail == 0) goto fillTail;
-            #endregion
-
-            #region 1x32
-            slen = stail;
-            batchSize = 1 * 32;
-            stail = slen & (batchSize - 1);
-            dstEnd = dstCurr + (slen - stail);
-
-            while (dstCurr < dstEnd)
-            {
-                Vector256<byte> d00 = Avx.LoadVector256(srcStartPtrs[0]);
-                srcStartPtrs[0] += batchSize;
-                for (int i = 1; i < srcKeyCount; i++)
-                {
-                    Vector256<byte> s00 = Avx.LoadVector256(srcStartPtrs[i]);
-                    d00 = Avx2.And(d00, s00);
-                    srcStartPtrs[i] += batchSize;
-                }
-                Avx.Store(dstCurr, d00);
-                dstCurr += batchSize;
-            }
-            if (stail == 0) goto fillTail;
-            #endregion
-
-            #region scalar_4x8
-            slen = stail;
-            batchSize = 4 * 8;
-            stail = slen & (batchSize - 1);
-            dstEnd = dstCurr + (slen - stail);
-            while (dstCurr < dstEnd)
-            {
-                long d00 = *(long*)(srcStartPtrs[0]);
-                long d01 = *(long*)(srcStartPtrs[0] + 8);
-                long d02 = *(long*)(srcStartPtrs[0] + 16);
-                long d03 = *(long*)(srcStartPtrs[0] + 24);
-                srcStartPtrs[0] += batchSize;
-                for (int i = 1; i < srcKeyCount; i++)
-                {
-                    d00 &= *(long*)(srcStartPtrs[i]);
-                    d01 &= *(long*)(srcStartPtrs[i] + 8);
-                    d02 &= *(long*)(srcStartPtrs[i] + 16);
-                    d03 &= *(long*)(srcStartPtrs[i] + 24);
-                    srcStartPtrs[i] += batchSize;
-                }
-
-                *(long*)dstCurr = d00;
-                *(long*)(dstCurr + 8) = d01;
-                *(long*)(dstCurr + 16) = d02;
-                *(long*)(dstCurr + 24) = d03;
-                dstCurr += batchSize;
-            }
-            if (stail == 0) goto fillTail;
-            #endregion  
-
-            #region scalar_1x8
-            slen = stail;
-            batchSize = 8;
-            stail = slen & (batchSize - 1);
-            dstEnd = dstCurr + (slen - stail);
-            while (dstCurr < dstEnd)
-            {
-                long d00 = *(long*)(srcStartPtrs[0]);
-                srcStartPtrs[0] += batchSize;
-                for (int i = 1; i < srcKeyCount; i++)
-                {
-                    d00 &= *(long*)(srcStartPtrs[i]);
-                    srcStartPtrs[i] += batchSize;
-                }
-                *(long*)dstCurr = d00;
-                dstCurr += batchSize;
-            }
-        #endregion
-
-        fillTail:
-            #region scalar_1x1    
-            byte* dstMaxEnd = dstPtr + dstLen;
-            int offset = 0;
-            while (dstCurr < dstMaxEnd)
-            {
-                byte d00;
-                if (srcStartPtrs[0] + offset < srcEndPtrs[0])
-                    d00 = srcStartPtrs[0][offset];
-                else
-                {
-                    d00 = 0;
-                    goto writeBack;
-                }
-
-                for (int i = 1; i < srcKeyCount; i++)
-                {
-                    if (srcStartPtrs[i] + offset < srcEndPtrs[i])
-                        d00 &= srcStartPtrs[i][offset];
-                    else
-                    {
-                        d00 = 0;
-                        goto writeBack;
-                    }
-                }
-            writeBack:
-                *dstCurr++ = d00;
-                offset++;
-            }
-            #endregion
+            InvokeMultiKeyBitwise<BitwiseAndOperator<byte>, BitwiseAndOperator<ulong>>((byte*)0, dstLen, (byte**)0, (byte**)0, srcKeyCount, minSize);
         }
 
         /// <summary>
-        /// OR bitop implementation using 256-wide SIMD registers.
+        /// Invokes bitwise bit-operation for multiple keys using hardware accelerated SIMD intrinsics when possible.
         /// </summary>
         /// <param name="dstPtr">Output buffer to write BitOp result</param>
         /// <param name="dstLen">Output buffer length.</param>
@@ -330,321 +136,149 @@ private static void __bitop_multikey_simdX256_and(byte* dstPtr, int dstLen, byte
         /// <param name="srcEndPtrs">Pointer to end of bitmap sources</param>
         /// <param name="srcKeyCount">Number of source keys.</param>
         /// <param name="minSize">Minimum size of source bitmaps.</param>
-        private static void __bitop_multikey_simdX256_or(byte* dstPtr, int dstLen, byte** srcStartPtrs, byte** srcEndPtrs, int srcKeyCount, int minSize)
+        private static void InvokeMultiKeyBitwise<TBinaryOperator, TBinaryOperator2>(byte* dstPtr, int dstLen, byte** srcStartPtrs, byte** srcEndPtrs, int srcKeyCount, int minSize)
+            where TBinaryOperator : struct, IBinaryOperator<byte>
+            where TBinaryOperator2 : struct, IBinaryOperator<ulong>
         {
-            int batchSize = 8 * 32;
             long slen = minSize;
-            long stail = slen & (batchSize - 1);
+            var remainder = slen & ((Vector256<byte>.Count * 8) - 1);
 
-            byte* dstCurr = dstPtr;
-            byte* dstEnd = dstCurr + (slen - stail);
+            var dstEndPtr = dstPtr + dstLen;
+            var dstBatchEndPtr = dstPtr + (slen - remainder);
 
-            #region 8x32
-            while (dstCurr < dstEnd)
+            ref var firstKeyPtr = ref srcStartPtrs[0];
+
+            while (dstPtr < dstBatchEndPtr)
             {
-                Vector256<byte> d00 = Avx.LoadVector256(srcStartPtrs[0]);
-                Vector256<byte> d01 = Avx.LoadVector256(srcStartPtrs[0] + 32);
-                Vector256<byte> d02 = Avx.LoadVector256(srcStartPtrs[0] + 64);
-                Vector256<byte> d03 = Avx.LoadVector256(srcStartPtrs[0] + 96);
-                Vector256<byte> d04 = Avx.LoadVector256(srcStartPtrs[0] + 128);
-                Vector256<byte> d05 = Avx.LoadVector256(srcStartPtrs[0] + 160);
-                Vector256<byte> d06 = Avx.LoadVector256(srcStartPtrs[0] + 192);
-                Vector256<byte> d07 = Avx.LoadVector256(srcStartPtrs[0] + 224);
-                srcStartPtrs[0] += batchSize;
-                for (int i = 1; i < srcKeyCount; i++)
+                var d00 = Vector256.Load(firstKeyPtr);
+                var d01 = Vector256.Load(firstKeyPtr + Vector256<byte>.Count);
+                var d02 = Vector256.Load(firstKeyPtr + (Vector256<byte>.Count * 2));
+                var d03 = Vector256.Load(firstKeyPtr + (Vector256<byte>.Count * 3));
+                var d04 = Vector256.Load(firstKeyPtr + (Vector256<byte>.Count * 4));
+                var d05 = Vector256.Load(firstKeyPtr + (Vector256<byte>.Count * 5));
+                var d06 = Vector256.Load(firstKeyPtr + (Vector256<byte>.Count * 6));
+                var d07 = Vector256.Load(firstKeyPtr + (Vector256<byte>.Count * 7));
+
+                firstKeyPtr += Vector256<byte>.Count * 8;
+
+                for (var i = 1; i < srcKeyCount; i++)
                 {
-                    Vector256<byte> s00 = Avx.LoadVector256(srcStartPtrs[i]);
-                    Vector256<byte> s01 = Avx.LoadVector256(srcStartPtrs[i] + 32);
-                    Vector256<byte> s02 = Avx.LoadVector256(srcStartPtrs[i] + 64);
-                    Vector256<byte> s03 = Avx.LoadVector256(srcStartPtrs[i] + 96);
-                    Vector256<byte> s04 = Avx.LoadVector256(srcStartPtrs[i] + 128);
-                    Vector256<byte> s05 = Avx.LoadVector256(srcStartPtrs[i] + 160);
-                    Vector256<byte> s06 = Avx.LoadVector256(srcStartPtrs[i] + 192);
-                    Vector256<byte> s07 = Avx.LoadVector256(srcStartPtrs[i] + 224);
-
-                    d00 = Avx2.Or(d00, s00);
-                    d01 = Avx2.Or(d01, s01);
-                    d02 = Avx2.Or(d02, s02);
-                    d03 = Avx2.Or(d03, s03);
-                    d04 = Avx2.Or(d04, s04);
-                    d05 = Avx2.Or(d05, s05);
-                    d06 = Avx2.Or(d06, s06);
-                    d07 = Avx2.Or(d07, s07);
-                    srcStartPtrs[i] += batchSize;
+                    ref var keyStartPtr = ref srcStartPtrs[i];
+
+                    var s00 = Vector256.Load(keyStartPtr);
+                    var s01 = Vector256.Load(keyStartPtr + Vector256<byte>.Count);
+                    var s02 = Vector256.Load(keyStartPtr + (Vector256<byte>.Count * 2));
+                    var s03 = Vector256.Load(keyStartPtr + (Vector256<byte>.Count * 3));
+                    var s04 = Vector256.Load(keyStartPtr + (Vector256<byte>.Count * 4));
+                    var s05 = Vector256.Load(keyStartPtr + (Vector256<byte>.Count * 5));
+                    var s06 = Vector256.Load(keyStartPtr + (Vector256<byte>.Count * 6));
+                    var s07 = Vector256.Load(keyStartPtr + (Vector256<byte>.Count * 7));
+
+                    d00 = TBinaryOperator.Invoke(d00, s00);
+                    d01 = TBinaryOperator.Invoke(d01, s01);
+                    d02 = TBinaryOperator.Invoke(d02, s02);
+                    d03 = TBinaryOperator.Invoke(d03, s03);
+                    d04 = TBinaryOperator.Invoke(d04, s04);
+                    d05 = TBinaryOperator.Invoke(d05, s05);
+                    d06 = TBinaryOperator.Invoke(d06, s06);
+                    d07 = TBinaryOperator.Invoke(d07, s07);
+
+                    keyStartPtr += Vector256<byte>.Count * 8;
                 }
 
-                Avx.Store(dstCurr, d00);
-                Avx.Store(dstCurr + 32, d01);
-                Avx.Store(dstCurr + 64, d02);
-                Avx.Store(dstCurr + 96, d03);
-                Avx.Store(dstCurr + 128, d04);
-                Avx.Store(dstCurr + 160, d05);
-                Avx.Store(dstCurr + 192, d06);
-                Avx.Store(dstCurr + 224, d07);
+                Vector256.Store(d00, dstPtr);
+                Vector256.Store(d01, dstPtr + Vector256<byte>.Count);
+                Vector256.Store(d02, dstPtr + Vector256<byte>.Count * 2);
+                Vector256.Store(d03, dstPtr + Vector256<byte>.Count * 3);
+                Vector256.Store(d04, dstPtr + Vector256<byte>.Count * 4);
+                Vector256.Store(d05, dstPtr + Vector256<byte>.Count * 5);
+                Vector256.Store(d06, dstPtr + Vector256<byte>.Count * 6);
+                Vector256.Store(d07, dstPtr + Vector256<byte>.Count * 7);
 
-                dstCurr += batchSize;
+                dstPtr += Vector256<byte>.Count * 8;
             }
-            if (stail == 0) goto fillTail;
-            #endregion
+            if (remainder == 0) goto fillTail;
 
-            #region 1x32
-            slen = stail;
-            batchSize = 1 * 32;
-            stail = slen & (batchSize - 1);
-            dstEnd = dstCurr + (slen - stail);
+            slen = remainder;
+            remainder = slen & (Vector256<byte>.Count - 1);
+            dstBatchEndPtr = dstPtr + (slen - remainder);
 
-            while (dstCurr < dstEnd)
+            while (dstPtr < dstBatchEndPtr)
             {
-                Vector256<byte> d00 = Avx.LoadVector256(srcStartPtrs[0]);
-                srcStartPtrs[0] += batchSize;
-                for (int i = 1; i < srcKeyCount; i++)
-                {
-                    Vector256<byte> s00 = Avx.LoadVector256(srcStartPtrs[i]);
-                    d00 = Avx2.Or(d00, s00);
-                    srcStartPtrs[i] += batchSize;
-                }
-                Avx.Store(dstCurr, d00);
-                dstCurr += batchSize;
-            }
-            if (stail == 0) goto fillTail;
-            #endregion
-
-            #region scalar_4x8
-            slen = stail;
-            batchSize = 4 * 8;
-            stail = slen & (batchSize - 1);
-            dstEnd = dstCurr + (slen - stail);
-            while (dstCurr < dstEnd)
-            {
-                long d00 = *(long*)(srcStartPtrs[0]);
-                long d01 = *(long*)(srcStartPtrs[0] + 8);
-                long d02 = *(long*)(srcStartPtrs[0] + 16);
-                long d03 = *(long*)(srcStartPtrs[0] + 24);
-                srcStartPtrs[0] += batchSize;
-                for (int i = 1; i < srcKeyCount; i++)
-                {
-                    d00 |= *(long*)(srcStartPtrs[i]);
-                    d01 |= *(long*)(srcStartPtrs[i] + 8);
-                    d02 |= *(long*)(srcStartPtrs[i] + 16);
-                    d03 |= *(long*)(srcStartPtrs[i] + 24);
-                    srcStartPtrs[i] += batchSize;
-                }
+                var d00 = Vector256.Load(firstKeyPtr);
+                firstKeyPtr += Vector256<byte>.Count;
 
-                *(long*)dstCurr = d00;
-                *(long*)(dstCurr + 8) = d01;
-                *(long*)(dstCurr + 16) = d02;
-                *(long*)(dstCurr + 24) = d03;
-                dstCurr += batchSize;
-            }
-            if (stail == 0) goto fillTail;
-            #endregion
-
-            #region scalar_1x8
-            slen = stail;
-            batchSize = 8;
-            stail = slen & (batchSize - 1);
-            dstEnd = dstCurr + (slen - stail);
-            while (dstCurr < dstEnd)
-            {
-                long d00 = *(long*)(srcStartPtrs[0]);
-                srcStartPtrs[0] += batchSize;
-                for (int i = 1; i < srcKeyCount; i++)
+                for (var i = 1; i < srcKeyCount; i++)
                 {
-                    d00 |= *(long*)(srcStartPtrs[i]);
-                    srcStartPtrs[i] += batchSize;
-                }
-                *(long*)dstCurr = d00;
-                dstCurr += batchSize;
-            }
-        #endregion
-
-        fillTail:
-            #region scalar_1x1    
-            byte* dstMaxEnd = dstPtr + dstLen;
-            int offset = 0;
-            while (dstCurr < dstMaxEnd)
-            {
-                byte d00 = 0;
-                if (srcStartPtrs[0] + offset < srcEndPtrs[0])
-                {
-                    d00 = srcStartPtrs[0][offset];
-                    if (d00 == 0xff) goto writeBack;
-                }
-
-                for (int i = 1; i < srcKeyCount; i++)
-                {
-                    if (srcStartPtrs[i] + offset < srcEndPtrs[i])
-                    {
-                        d00 |= srcStartPtrs[i][offset];
-                        if (d00 == 0xff) goto writeBack;
-                    }
-                }
-            writeBack:
-                *dstCurr++ = d00;
-                offset++;
-            }
-            #endregion
-        }
-
-        /// <summary>
-        /// XOR bitop implementation using 256-wide SIMD registers.
-        /// </summary>
-        /// <param name="dstPtr">Output buffer to write BitOp result</param>
-        /// <param name="dstLen">Output buffer length.</param>
-        /// <param name="srcStartPtrs">Pointer to start of bitmap sources.</param>
-        /// <param name="srcEndPtrs">Pointer to end of bitmap sources</param>
-        /// <param name="srcKeyCount">Number of source keys.</param>
-        /// <param name="minSize">Minimum size of source bitmaps.</param>
-        private static void __bitop_multikey_simdX256_xor(byte* dstPtr, int dstLen, byte** srcStartPtrs, byte** srcEndPtrs, int srcKeyCount, int minSize)
-        {
-            int batchSize = 8 * 32;
-            long slen = minSize;
-            long stail = slen & (batchSize - 1);
-
-            byte* dstCurr = dstPtr;
-            byte* dstEnd = dstCurr + (slen - stail);
+                    var s00 = Vector256.Load(srcStartPtrs[i]);
+                    d00 = TBinaryOperator.Invoke(d00, s00);
 
-            #region 8x32
-            while (dstCurr < dstEnd)
-            {
-                Vector256<byte> d00 = Avx.LoadVector256(srcStartPtrs[0]);
-                Vector256<byte> d01 = Avx.LoadVector256(srcStartPtrs[0] + 32);
-                Vector256<byte> d02 = Avx.LoadVector256(srcStartPtrs[0] + 64);
-                Vector256<byte> d03 = Avx.LoadVector256(srcStartPtrs[0] + 96);
-                Vector256<byte> d04 = Avx.LoadVector256(srcStartPtrs[0] + 128);
-                Vector256<byte> d05 = Avx.LoadVector256(srcStartPtrs[0] + 160);
-                Vector256<byte> d06 = Avx.LoadVector256(srcStartPtrs[0] + 192);
-                Vector256<byte> d07 = Avx.LoadVector256(srcStartPtrs[0] + 224);
-                srcStartPtrs[0] += batchSize;
-                for (int i = 1; i < srcKeyCount; i++)
-                {
-                    Vector256<byte> s00 = Avx.LoadVector256(srcStartPtrs[i]);
-                    Vector256<byte> s01 = Avx.LoadVector256(srcStartPtrs[i] + 32);
-                    Vector256<byte> s02 = Avx.LoadVector256(srcStartPtrs[i] + 64);
-                    Vector256<byte> s03 = Avx.LoadVector256(srcStartPtrs[i] + 96);
-                    Vector256<byte> s04 = Avx.LoadVector256(srcStartPtrs[i] + 128);
-                    Vector256<byte> s05 = Avx.LoadVector256(srcStartPtrs[i] + 160);
-                    Vector256<byte> s06 = Avx.LoadVector256(srcStartPtrs[i] + 192);
-                    Vector256<byte> s07 = Avx.LoadVector256(srcStartPtrs[i] + 224);
-
-                    d00 = Avx2.Xor(d00, s00);
-                    d01 = Avx2.Xor(d01, s01);
-                    d02 = Avx2.Xor(d02, s02);
-                    d03 = Avx2.Xor(d03, s03);
-                    d04 = Avx2.Xor(d04, s04);
-                    d05 = Avx2.Xor(d05, s05);
-                    d06 = Avx2.Xor(d06, s06);
-                    d07 = Avx2.Xor(d07, s07);
-                    srcStartPtrs[i] += batchSize;
+                    srcStartPtrs[i] += Vector256<byte>.Count;
                 }
 
-                Avx.Store(dstCurr, d00);
-                Avx.Store(dstCurr + 32, d01);
-                Avx.Store(dstCurr + 64, d02);
-                Avx.Store(dstCurr + 96, d03);
-                Avx.Store(dstCurr + 128, d04);
-                Avx.Store(dstCurr + 160, d05);
-                Avx.Store(dstCurr + 192, d06);
-                Avx.Store(dstCurr + 224, d07);
+                Vector256.Store(d00, dstPtr);
 
-                dstCurr += batchSize;
+                dstPtr += Vector256<byte>.Count;
             }
-            #endregion
+            if (remainder == 0) goto fillTail;
 
-            #region 1x32
-            slen = stail;
-            batchSize = 1 * 32;
-            stail = slen & (batchSize - 1);
-            dstEnd = dstCurr + (slen - stail);
+            slen = remainder;
+            remainder = slen & (sizeof(ulong) - 1);
+            dstBatchEndPtr = dstPtr + (slen - remainder);
 
-            while (dstCurr < dstEnd)
+            while (dstPtr < dstBatchEndPtr)
             {
-                Vector256<byte> d00 = Avx.LoadVector256(srcStartPtrs[0]);
-                srcStartPtrs[0] += batchSize;
-                for (int i = 1; i < srcKeyCount; i++)
-                {
-                    Vector256<byte> s00 = Avx.LoadVector256(srcStartPtrs[i]);
-                    d00 = Avx2.Xor(d00, s00);
-                    srcStartPtrs[i] += batchSize;
-                }
-                Avx.Store(dstCurr, d00);
-                dstCurr += batchSize;
-            }
-            #endregion
-
-            #region scalar_4x8
-            slen = stail;
-            batchSize = 4 * 8;
-            stail = slen & (batchSize - 1);
-            dstEnd = dstCurr + (slen - stail);
-            while (dstCurr < dstEnd)
-            {
-                long d00 = *(long*)(srcStartPtrs[0]);
-                long d01 = *(long*)(srcStartPtrs[0] + 8);
-                long d02 = *(long*)(srcStartPtrs[0] + 16);
-                long d03 = *(long*)(srcStartPtrs[0] + 24);
-                srcStartPtrs[0] += batchSize;
-                for (int i = 1; i < srcKeyCount; i++)
-                {
-                    d00 ^= *(long*)(srcStartPtrs[i]);
-                    d01 ^= *(long*)(srcStartPtrs[i] + 8);
-                    d02 ^= *(long*)(srcStartPtrs[i] + 16);
-                    d03 ^= *(long*)(srcStartPtrs[i] + 24);
-                    srcStartPtrs[i] += batchSize;
-                }
+                ulong d00 = *(ulong*)firstKeyPtr;
+                firstKeyPtr += sizeof(ulong);
 
-                *(long*)dstCurr = d00;
-                *(long*)(dstCurr + 8) = d01;
-                *(long*)(dstCurr + 16) = d02;
-                *(long*)(dstCurr + 24) = d03;
-                dstCurr += batchSize;
-            }
-            if (stail == 0) goto fillTail;
-            #endregion
-
-            #region scalar_1x8
-            slen = stail;
-            batchSize = 8;
-            stail = slen & (batchSize - 1);
-            dstEnd = dstCurr + (slen - stail);
-            while (dstCurr < dstEnd)
-            {
-                long d00 = *(long*)(srcStartPtrs[0]);
-                srcStartPtrs[0] += batchSize;
-                for (int i = 1; i < srcKeyCount; i++)
+                for (var i = 1; i < srcKeyCount; i++)
                 {
-                    d00 ^= *(long*)(srcStartPtrs[i]);
-                    srcStartPtrs[i] += batchSize;
+                    d00 = TBinaryOperator2.Invoke(d00, *(ulong*)srcStartPtrs[i]);
+                    srcStartPtrs[i] += sizeof(ulong);
                 }
-                *(long*)dstCurr = d00;
-                dstCurr += batchSize;
+
+                *(ulong*)dstPtr = d00;
+                dstPtr += sizeof(ulong);
             }
-        #endregion
 
         fillTail:
-            #region scalar_1x1    
-            byte* dstMaxEnd = dstPtr + dstLen;
-            while (dstCurr < dstMaxEnd)
+            while (dstPtr < dstEndPtr)
             {
                 byte d00 = 0;
-                if (srcStartPtrs[0] < srcEndPtrs[0])
+
+                if (firstKeyPtr < srcEndPtrs[0])
                 {
-                    d00 = *srcStartPtrs[0];
-                    srcStartPtrs[0]++;
+                    d00 = *firstKeyPtr;
+                    firstKeyPtr++;
                 }
 
-                for (int i = 1; i < srcKeyCount; i++)
+                for (var i = 1; i < srcKeyCount; i++)
                 {
                     if (srcStartPtrs[i] < srcEndPtrs[i])
                     {
-                        d00 ^= *srcStartPtrs[i];
+                        d00 = TBinaryOperator.Invoke(d00, *srcStartPtrs[i]);
                         srcStartPtrs[i]++;
                     }
+                    else
+                    {
+                        if (typeof(TBinaryOperator) == typeof(BitwiseAndOperator<byte>))
+                        {
+                            d00 = 0;
+                        }
+                        else if (typeof(TBinaryOperator) == typeof(BitwiseOrOperator<byte>))
+                        {
+                            // nop
+                        }
+                        else if (typeof(TBinaryOperator) == typeof(BitwiseXorOperator<byte>))
+                        {
+                            // TODO: I _think_ there's a error in this logic and we should have here:
+                            // d00 ^= 0;
+                        }
+                    }
                 }
-                *dstCurr++ = d00;
+
+                *dstPtr++ = d00;
             }
-            #endregion
         }
-
     }
 }
\ No newline at end of file
diff --git a/test/Garnet.test/GarnetBitmapTests.cs b/test/Garnet.test/GarnetBitmapTests.cs
index 627a4984d4..11a2327417 100644
--- a/test/Garnet.test/GarnetBitmapTests.cs
+++ b/test/Garnet.test/GarnetBitmapTests.cs
@@ -32,14 +32,9 @@ public void TearDown()
             TestUtils.DeleteDirectory(TestUtils.MethodTestDir);
         }
 
-        private long LongRandom() => ((long)this.r.Next() << 32) | (long)this.r.Next();
+        private long LongRandom() => r.NextInt64();
 
-        private ulong ULongRandom()
-        {
-            ulong lsb = (ulong)(this.r.Next());
-            ulong msb = (ulong)(this.r.Next()) << 32;
-            return (msb | lsb);
-        }
+        private ulong ULongRandom() => (ulong)r.NextInt64(long.MinValue, long.MaxValue);
 
         private unsafe long ResponseToLong(byte[] response, int offset)
         {
@@ -879,16 +874,18 @@ public void BitmapSimpleBitOpTests()
             }
         }
 
-        private static void InitBitmap(ref byte[] dst, byte[] srcA, bool invert = false)
+        private static byte[] CopyBitmap(byte[] sourceBitmap, bool invert = false)
         {
-            dst = new byte[srcA.Length];
+            var dst = new byte[sourceBitmap.Length];
             if (invert)
-                for (int i = 0; i < srcA.Length; i++) dst[i] = (byte)~srcA[i];
+                for (int i = 0; i < sourceBitmap.Length; i++) dst[i] = (byte)~sourceBitmap[i];
             else
-                for (int i = 0; i < srcA.Length; i++) dst[i] = srcA[i];
+                sourceBitmap.AsSpan().CopyTo(dst);
+
+            return dst;
         }
 
-        private static void ApplyBitop(ref byte[] dst, byte[] srcA, Func<byte, byte, byte> f8)
+        private static void ApplyBitop(ref byte[] dst, byte[] srcA, Func<byte, byte, byte> op)
         {
             if (dst.Length < srcA.Length)
             {
@@ -899,12 +896,12 @@ private static void ApplyBitop(ref byte[] dst, byte[] srcA, Func<byte, byte, byt
 
             for (int i = 0; i < srcA.Length; i++)
             {
-                dst[i] = f8(dst[i], srcA[i]);
+                dst[i] = op(dst[i], srcA[i]);
             }
 
             for (int i = srcA.Length; i < dst.Length; i++)
             {
-                dst[i] = f8(dst[i], 0);
+                dst[i] = op(dst[i], 0);
             }
         }
 
@@ -930,7 +927,6 @@ public void BitmapSimpleVarLenBitOpTests()
             byte[] dataB = new byte[r.Next(1, maxBytes)];
             byte[] dataC = new byte[r.Next(1, maxBytes)];
             byte[] dataD = new byte[r.Next(1, maxBytes)];
-            byte[] dataX = null;
 
             for (int j = 0; j < bitwiseOps.Length; j++)
             {
@@ -946,25 +942,17 @@ public void BitmapSimpleVarLenBitOpTests()
                     db.StringSet(c, dataC);
                     db.StringSet(d, dataD);
 
-                    Func<byte, byte, byte> f8 = null;
-                    switch (bitwiseOps[j])
-                    {
-                        case Bitwise.And:
-                            f8 = (a, b) => (byte)(a & b);
-                            break;
-                        case Bitwise.Or:
-                            f8 = (a, b) => (byte)(a | b);
-                            break;
-                        case Bitwise.Xor:
-                            f8 = (a, b) => (byte)(a ^ b);
-                            break;
-                    }
+                    Func<byte, byte, byte> op = bitwiseOps[j] switch
+                    { 
+                        Bitwise.And => static (a, b) => (byte)(a & b),
+                        Bitwise.Or => static (a, b) => (byte)(a | b),
+                        Bitwise.Xor => static (a, b) => (byte)(a ^ b)
+                    };
 
-                    dataX = null;
-                    InitBitmap(ref dataX, dataA);
-                    ApplyBitop(ref dataX, dataB, f8);
-                    ApplyBitop(ref dataX, dataC, f8);
-                    ApplyBitop(ref dataX, dataD, f8);
+                    byte[] dataX = CopyBitmap(dataA);
+                    ApplyBitop(ref dataX, dataB, op);
+                    ApplyBitop(ref dataX, dataC, op);
+                    ApplyBitop(ref dataX, dataD, op);
 
                     long size = db.StringBitOperation(bitwiseOps[j], x, keys);
                     ClassicAssert.AreEqual(size, dataX.Length);
@@ -1032,7 +1020,7 @@ public void BitmapSimpleBitOpVarLenGrowingSizeTests()
             string x = "x";
 
             byte[] dataA, dataB, dataC, dataD;
-            byte[] dataX;
+
             int minSize = 512;
             Bitwise[] bitwiseOps = [Bitwise.And, Bitwise.Or, Bitwise.Xor, Bitwise.And, Bitwise.Or, Bitwise.Xor];
             RedisKey[] keys = [a, b, c, d];
@@ -1042,15 +1030,14 @@ public void BitmapSimpleBitOpVarLenGrowingSizeTests()
             {
                 dataA = new byte[r.Next(minSize, minSize + 32)];
                 r.NextBytes(dataA);
-                db.StringSet(a, dataA);
+                byte[] expectedX = CopyBitmap(dataA, invert: true);
 
-                dataX = null;
-                InitBitmap(ref dataX, dataA, true);
+                db.StringSet(a, dataA);
                 long size = db.StringBitOperation(Bitwise.Not, x, a);
-                ClassicAssert.AreEqual(size, dataX.Length);
+                ClassicAssert.AreEqual(expectedX.Length, size);
 
-                byte[] expectedX = db.StringGet(x);
-                ClassicAssert.AreEqual(dataX, expectedX);
+                byte[] actualX = db.StringGet(x);
+                ClassicAssert.AreEqual(expectedX, actualX);
             }
 
             //Test AND, OR, XOR
@@ -1062,8 +1049,7 @@ public void BitmapSimpleBitOpVarLenGrowingSizeTests()
                     dataB = new byte[r.Next(minSize, minSize + 16)]; minSize = dataB.Length;
                     dataC = new byte[r.Next(minSize, minSize + 16)]; minSize = dataC.Length;
                     dataD = new byte[r.Next(minSize, minSize + 16)]; minSize = dataD.Length;
-                    minSize = 17;
-
+                    
                     r.NextBytes(dataA);
                     r.NextBytes(dataB);
                     r.NextBytes(dataC);
@@ -1074,32 +1060,24 @@ public void BitmapSimpleBitOpVarLenGrowingSizeTests()
                     db.StringSet(c, dataC);
                     db.StringSet(d, dataD);
 
-                    Func<byte, byte, byte> f8 = null;
-                    switch (bitwiseOps[j])
+                    Func<byte, byte, byte> op = bitwiseOps[j] switch
                     {
-                        case Bitwise.And:
-                            f8 = (a, b) => (byte)(a & b);
-                            break;
-                        case Bitwise.Or:
-                            f8 = (a, b) => (byte)(a | b);
-                            break;
-                        case Bitwise.Xor:
-                            f8 = (a, b) => (byte)(a ^ b);
-                            break;
-                    }
+                        Bitwise.And => static (a, b) => (byte)(a & b),
+                        Bitwise.Or => static (a, b) => (byte)(a | b),
+                        Bitwise.Xor => static (a, b) => (byte)(a ^ b)
+                    };
 
-                    dataX = null;
-                    InitBitmap(ref dataX, dataA);
-                    ApplyBitop(ref dataX, dataB, f8);
-                    ApplyBitop(ref dataX, dataC, f8);
-                    ApplyBitop(ref dataX, dataD, f8);
+                    byte[] expectedX = CopyBitmap(dataA);
+                    ApplyBitop(ref expectedX, dataB, op);
+                    ApplyBitop(ref expectedX, dataC, op);
+                    ApplyBitop(ref expectedX, dataD, op);
 
                     long size = db.StringBitOperation(bitwiseOps[j], x, keys);
-                    ClassicAssert.AreEqual(size, dataX.Length);
-                    byte[] expectedX = db.StringGet(x);
+                    ClassicAssert.AreEqual(expectedX.Length, size);
+                    byte[] dataX = db.StringGet(x);
 
-                    ClassicAssert.AreEqual(expectedX.Length, dataX.Length);
-                    ClassicAssert.AreEqual(dataX, expectedX);
+                    ClassicAssert.AreEqual(expectedX.Length, expectedX.Length);
+                    ClassicAssert.AreEqual(expectedX, dataX);
                 }
             }
         }