diff --git a/src/libraries/System.Collections/src/System/Collections/BitArray.cs b/src/libraries/System.Collections/src/System/Collections/BitArray.cs index 9f8531a9e818a8..925f175f7fcf8f 100644 --- a/src/libraries/System.Collections/src/System/Collections/BitArray.cs +++ b/src/libraries/System.Collections/src/System/Collections/BitArray.cs @@ -4,9 +4,11 @@ using System.Buffers.Binary; using System.Diagnostics; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; using System.Runtime.Intrinsics.Arm; +using Internal.Runtime.CompilerServices; namespace System.Collections { @@ -145,81 +147,32 @@ public unsafe BitArray(bool[] values) // (true for any non-zero values, false for 0) - any values between 2-255 will be interpreted as false. // Instead, We compare with zeroes (== false) then negate the result to ensure compatibility. - if (Avx2.IsSupported) + ref byte value = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(values)); + + if (Vector256.IsHardwareAccelerated) { - // JIT does not support code hoisting for SIMD yet - Vector256 zero = Vector256.Zero; - fixed (bool* ptr = values) + for (; (i + Vector256ByteCount) <= (uint)values.Length; i += Vector256ByteCount) { - for (; (i + Vector256ByteCount) <= (uint)values.Length; i += Vector256ByteCount) - { - Vector256 vector = Avx.LoadVector256((byte*)ptr + i); - Vector256 isFalse = Avx2.CompareEqual(vector, zero); - int result = Avx2.MoveMask(isFalse); - m_array[i / 32u] = ~result; - } + Vector256 vector = Vector256.LoadUnsafe(ref value, i); + Vector256 isFalse = Vector256.Equals(vector, Vector256.Zero); + + uint result = isFalse.ExtractMostSignificantBits(); + m_array[i / 32u] = (int)(~result); } } - else if (Sse2.IsSupported) + else if (Vector128.IsHardwareAccelerated) { - // JIT does not support code hoisting for SIMD yet - Vector128 zero = Vector128.Zero; - fixed (bool* ptr = values) + for (; (i + Vector128ByteCount * 2u) <= (uint)values.Length; i += Vector128ByteCount * 2u) { - for (; (i + Vector128ByteCount * 2u) <= (uint)values.Length; i += Vector128ByteCount * 2u) - { - Vector128 lowerVector = Sse2.LoadVector128((byte*)ptr + i); - Vector128 lowerIsFalse = Sse2.CompareEqual(lowerVector, zero); - int lowerPackedIsFalse = Sse2.MoveMask(lowerIsFalse); + Vector128 lowerVector = Vector128.LoadUnsafe(ref value, i); + Vector128 lowerIsFalse = Vector128.Equals(lowerVector, Vector128.Zero); + uint lowerResult = lowerIsFalse.ExtractMostSignificantBits(); - Vector128 upperVector = Sse2.LoadVector128((byte*)ptr + i + Vector128.Count); - Vector128 upperIsFalse = Sse2.CompareEqual(upperVector, zero); - int upperPackedIsFalse = Sse2.MoveMask(upperIsFalse); + Vector128 upperVector = Vector128.LoadUnsafe(ref value, i + Vector128ByteCount); + Vector128 upperIsFalse = Vector128.Equals(upperVector, Vector128.Zero); + uint upperResult = upperIsFalse.ExtractMostSignificantBits(); - m_array[i / 32u] = ~((upperPackedIsFalse << 16) | lowerPackedIsFalse); - } - } - } - else if (AdvSimd.Arm64.IsSupported) - { - // JIT does not support code hoisting for SIMD yet - // However comparison against zero can be replaced to cmeq against zero (vceqzq_s8) - // See dotnet/runtime#33972 for details - Vector128 zero = Vector128.Zero; - Vector128 bitMask128 = BitConverter.IsLittleEndian ? - Vector128.Create(0x80402010_08040201).AsByte() : - Vector128.Create(0x01020408_10204080).AsByte(); - - fixed (bool* ptr = values) - { - for (; (i + Vector128ByteCount * 2u) <= (uint)values.Length; i += Vector128ByteCount * 2u) - { - // Same logic as SSE2 path, however we lack MoveMask (equivalent) instruction - // As a workaround, mask out the relevant bit after comparison - // and combine by ORing all of them together (In this case, adding all of them does the same thing) - Vector128 lowerVector = AdvSimd.LoadVector128((byte*)ptr + i); - Vector128 lowerIsFalse = AdvSimd.CompareEqual(lowerVector, zero); - Vector128 bitsExtracted1 = AdvSimd.And(lowerIsFalse, bitMask128); - bitsExtracted1 = AdvSimd.Arm64.AddPairwise(bitsExtracted1, bitsExtracted1); - bitsExtracted1 = AdvSimd.Arm64.AddPairwise(bitsExtracted1, bitsExtracted1); - bitsExtracted1 = AdvSimd.Arm64.AddPairwise(bitsExtracted1, bitsExtracted1); - Vector128 lowerPackedIsFalse = bitsExtracted1.AsInt16(); - - Vector128 upperVector = AdvSimd.LoadVector128((byte*)ptr + i + Vector128.Count); - Vector128 upperIsFalse = AdvSimd.CompareEqual(upperVector, zero); - Vector128 bitsExtracted2 = AdvSimd.And(upperIsFalse, bitMask128); - bitsExtracted2 = AdvSimd.Arm64.AddPairwise(bitsExtracted2, bitsExtracted2); - bitsExtracted2 = AdvSimd.Arm64.AddPairwise(bitsExtracted2, bitsExtracted2); - bitsExtracted2 = AdvSimd.Arm64.AddPairwise(bitsExtracted2, bitsExtracted2); - Vector128 upperPackedIsFalse = bitsExtracted2.AsInt16(); - - int result = AdvSimd.Arm64.ZipLow(lowerPackedIsFalse, upperPackedIsFalse).AsInt32().ToScalar(); - if (!BitConverter.IsLittleEndian) - { - result = BinaryPrimitives.ReverseEndianness(result); - } - m_array[i / 32u] = ~result; - } + m_array[i / 32u] = (int)(~((upperResult << 16) | lowerResult)); } } @@ -400,43 +353,24 @@ public unsafe BitArray And(BitArray value) } uint i = 0; - if (Avx2.IsSupported) - { - fixed (int* leftPtr = thisArray) - fixed (int* rightPtr = valueArray) - { - for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount) - { - Vector256 leftVec = Avx.LoadVector256(leftPtr + i); - Vector256 rightVec = Avx.LoadVector256(rightPtr + i); - Avx.Store(leftPtr + i, Avx2.And(leftVec, rightVec)); - } - } - } - else if (Sse2.IsSupported) + + ref int left = ref MemoryMarshal.GetArrayDataReference(thisArray); + ref int right = ref MemoryMarshal.GetArrayDataReference(valueArray); + + if (Vector256.IsHardwareAccelerated) { - fixed (int* leftPtr = thisArray) - fixed (int* rightPtr = valueArray) + for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount) { - for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) - { - Vector128 leftVec = Sse2.LoadVector128(leftPtr + i); - Vector128 rightVec = Sse2.LoadVector128(rightPtr + i); - Sse2.Store(leftPtr + i, Sse2.And(leftVec, rightVec)); - } + Vector256 result = Vector256.LoadUnsafe(ref left, i) & Vector256.LoadUnsafe(ref right, i); + result.StoreUnsafe(ref left, i); } } - else if (AdvSimd.IsSupported) + else if (Vector128.IsHardwareAccelerated) { - fixed (int* leftPtr = thisArray) - fixed (int* rightPtr = valueArray) + for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) { - for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) - { - Vector128 leftVec = AdvSimd.LoadVector128(leftPtr + i); - Vector128 rightVec = AdvSimd.LoadVector128(rightPtr + i); - AdvSimd.Store(leftPtr + i, AdvSimd.And(leftVec, rightVec)); - } + Vector128 result = Vector128.LoadUnsafe(ref left, i) & Vector128.LoadUnsafe(ref right, i); + result.StoreUnsafe(ref left, i); } } @@ -486,43 +420,24 @@ public unsafe BitArray Or(BitArray value) } uint i = 0; - if (Avx2.IsSupported) - { - fixed (int* leftPtr = thisArray) - fixed (int* rightPtr = valueArray) - { - for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount) - { - Vector256 leftVec = Avx.LoadVector256(leftPtr + i); - Vector256 rightVec = Avx.LoadVector256(rightPtr + i); - Avx.Store(leftPtr + i, Avx2.Or(leftVec, rightVec)); - } - } - } - else if (Sse2.IsSupported) + + ref int left = ref MemoryMarshal.GetArrayDataReference(thisArray); + ref int right = ref MemoryMarshal.GetArrayDataReference(valueArray); + + if (Vector256.IsHardwareAccelerated) { - fixed (int* leftPtr = thisArray) - fixed (int* rightPtr = valueArray) + for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount) { - for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) - { - Vector128 leftVec = Sse2.LoadVector128(leftPtr + i); - Vector128 rightVec = Sse2.LoadVector128(rightPtr + i); - Sse2.Store(leftPtr + i, Sse2.Or(leftVec, rightVec)); - } + Vector256 result = Vector256.LoadUnsafe(ref left, i) | Vector256.LoadUnsafe(ref right, i); + result.StoreUnsafe(ref left, i); } } - else if (AdvSimd.IsSupported) + else if (Vector128.IsHardwareAccelerated) { - fixed (int* leftPtr = thisArray) - fixed (int* rightPtr = valueArray) + for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) { - for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) - { - Vector128 leftVec = AdvSimd.LoadVector128(leftPtr + i); - Vector128 rightVec = AdvSimd.LoadVector128(rightPtr + i); - AdvSimd.Store(leftPtr + i, AdvSimd.Or(leftVec, rightVec)); - } + Vector128 result = Vector128.LoadUnsafe(ref left, i) | Vector128.LoadUnsafe(ref right, i); + result.StoreUnsafe(ref left, i); } } @@ -572,43 +487,24 @@ public unsafe BitArray Xor(BitArray value) } uint i = 0; - if (Avx2.IsSupported) + + ref int left = ref MemoryMarshal.GetArrayDataReference(thisArray); + ref int right = ref MemoryMarshal.GetArrayDataReference(valueArray); + + if (Vector256.IsHardwareAccelerated) { - fixed (int* leftPtr = m_array) - fixed (int* rightPtr = value.m_array) + for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount) { - for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount) - { - Vector256 leftVec = Avx.LoadVector256(leftPtr + i); - Vector256 rightVec = Avx.LoadVector256(rightPtr + i); - Avx.Store(leftPtr + i, Avx2.Xor(leftVec, rightVec)); - } + Vector256 result = Vector256.LoadUnsafe(ref left, i) ^ Vector256.LoadUnsafe(ref right, i); + result.StoreUnsafe(ref left, i); } } - else if (Sse2.IsSupported) + else if (Vector128.IsHardwareAccelerated) { - fixed (int* leftPtr = thisArray) - fixed (int* rightPtr = valueArray) + for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) { - for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) - { - Vector128 leftVec = Sse2.LoadVector128(leftPtr + i); - Vector128 rightVec = Sse2.LoadVector128(rightPtr + i); - Sse2.Store(leftPtr + i, Sse2.Xor(leftVec, rightVec)); - } - } - } - else if (AdvSimd.IsSupported) - { - fixed (int* leftPtr = thisArray) - fixed (int* rightPtr = valueArray) - { - for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) - { - Vector128 leftVec = AdvSimd.LoadVector128(leftPtr + i); - Vector128 rightVec = AdvSimd.LoadVector128(rightPtr + i); - AdvSimd.Store(leftPtr + i, AdvSimd.Xor(leftVec, rightVec)); - } + Vector128 result = Vector128.LoadUnsafe(ref left, i) ^ Vector128.LoadUnsafe(ref right, i); + result.StoreUnsafe(ref left, i); } } @@ -650,39 +546,23 @@ public unsafe BitArray Not() } uint i = 0; - if (Avx2.IsSupported) - { - Vector256 ones = Vector256.Create(-1); - fixed (int* ptr = thisArray) - { - for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount) - { - Vector256 vec = Avx.LoadVector256(ptr + i); - Avx.Store(ptr + i, Avx2.Xor(vec, ones)); - } - } - } - else if (Sse2.IsSupported) + + ref int value = ref MemoryMarshal.GetArrayDataReference(thisArray); + + if (Vector256.IsHardwareAccelerated) { - Vector128 ones = Vector128.Create(-1); - fixed (int* ptr = thisArray) + for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount) { - for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) - { - Vector128 vec = Sse2.LoadVector128(ptr + i); - Sse2.Store(ptr + i, Sse2.Xor(vec, ones)); - } + Vector256 result = ~Vector256.LoadUnsafe(ref value, i); + result.StoreUnsafe(ref value, i); } } - else if (AdvSimd.IsSupported) + else if (Vector128.IsHardwareAccelerated) { - fixed (int* leftPtr = thisArray) + for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) { - for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount) - { - Vector128 leftVec = AdvSimd.LoadVector128(leftPtr + i); - AdvSimd.Store(leftPtr + i, AdvSimd.Not(leftVec)); - } + Vector128 result = ~Vector128.LoadUnsafe(ref value, i); + result.StoreUnsafe(ref value, i); } }