From 80ebee4e5f7a66776f460840a1546196c95df143 Mon Sep 17 00:00:00 2001 From: PaulusParssinen Date: Wed, 8 May 2024 01:55:53 +0300 Subject: [PATCH] Fix incorrect quantization approach --- libs/server/Objects/SortedSetGeo/GeoHash.cs | 83 +++++++++++++-------- test/Garnet.test/GeoHashTests.cs | 56 +++++++------- 2 files changed, 80 insertions(+), 59 deletions(-) diff --git a/libs/server/Objects/SortedSetGeo/GeoHash.cs b/libs/server/Objects/SortedSetGeo/GeoHash.cs index 485ddd54e67..c1ef7de04bb 100644 --- a/libs/server/Objects/SortedSetGeo/GeoHash.cs +++ b/libs/server/Objects/SortedSetGeo/GeoHash.cs @@ -2,6 +2,7 @@ // Licensed under the MIT license. using System; +using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; #if NET8_0_OR_GREATER @@ -17,10 +18,12 @@ namespace Garnet.server /// public static class GeoHash { - // Constraints from EPSG:900913 / EPSG:3785 / OSGEO:41001 + // Constraints from WGS 84 / Pseudo-Mercator (EPSG:3857) private const double LongitudeMin = -180.0; private const double LongitudeMax = 180.0; + // TODO: These are "wrong" in a sense that EPSG:3857 + // says that the latitude should be from -85.05112878 to 85.05112878 private const double LatitudeMin = -90.0; private const double LatitudeMax = 90.0; @@ -45,14 +48,11 @@ public static long GeoToLongValue(double latitude, double longitude) return -1L; } - const double LatToUnitRangeReciprocal = 1 / 180.0; - const double LonToUnitRangeReciprocal = 1 / 360.0; - // Credits to https://mmcloughlin.com/posts/geohash-assembly for the quantization approach! - // The coordinates are quantized by first mapping them to the unit interval [0, 1] and + // The coordinates are quantized by first mapping them to the unit interval [0.0, 1.0] and // then multiplying the 2^32. For example, to get 32-bit quantized integer representation of the latitude - // which is in range [-90, 90] we would do: + // which is in range [-90.0, 90.0] we would do: // // latQuantized = floor(2.0^32 * (latitude + 90.0) / 180.0) // @@ -62,33 +62,40 @@ public static long GeoToLongValue(double latitude, double longitude) // // floor(2^52 * x) where x = (latitude + 90.0) / 180.0. // - // and further that the value of this can be read from binary representation of x+1. + // and further that the result of this calculation can be read simply from + // the IEEE-754 binary representation of x + 1.0. - // In otherwords to quantize, we need to first map value to unit range [0, 1]. - // We achieve this by multiplying: [-value, value] * rangeReciprocal = [-0.5, 0.5] - // Then by adding 1.5, we shift the value range to [1, 2], - // for which the IEEE-754 double-precision representation is as follows: - // - // (-1)^sign * 2^(exp-1023) * (1.0 + significand/2^52) - // where s=0, exp=1023, significand=floor(2^52 * x) - // - // Now we can read value of floor(2^52 * x) directly from binary representation of 1.0 + x, - // where the now "quantized" value is stored as the 32 most significant bits of the signicand! - static uint Quantize(double value, double rangeReciprocal) => (uint)(BitConverter.DoubleToUInt64Bits(Math.FusedMultiplyAdd(value, rangeReciprocal, 1.5)) >> 20); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static uint Quantize(double value, double rangeReciprocal) + { + // In otherwords; we need to first map value to unit range [0.0, 1.0]. + // We achieve this by multiplying [-value, value] by rangeReciprocal, giving us value in range [-0.5, 0.5] + // Then by adding 1.5, we shift the value range to [1.0, 2.0], + // for which the IEEE-754 double-precision representation is as follows: + // + // (-1)^sign * 2^(exp-1023) * (1.0 + significand/2^52) + // where sign=0, exp=1023, significand=floor(2^52 * x), for x in [1.0, 2.0) + // + // Now we can read value of floor(2^52 * x) directly from binary representation of y = 1.0 + x, + // where the now "quantized" value is stored as the 32 most significant bits of the signicand! + var y = BitConverter.DoubleToUInt64Bits(Math.FusedMultiplyAdd(value, rangeReciprocal, 1.5)) >> 20; + + // Except we need to handle the corner-case where value rounds to the maximumm the range: 2.0 + // + // We do this branchlessly by comparing the 64-bit binary representation to 0x40000000000 (e=1024). + // This way the JIT will emit cmp + cmov for x86-64 and cmp + csel for arm64 instead of branch. + return y >= 0x40000000000 ? uint.MaxValue : (uint)y; + } + + const double LatToUnitRangeReciprocal = 1 / 180.0; + const double LonToUnitRangeReciprocal = 1 / 360.0; var latQuantized = Quantize(latitude, LatToUnitRangeReciprocal); var lonQuantized = Quantize(longitude, LonToUnitRangeReciprocal); - // Morton encode the quantized values, i.e. - // latQuantBits = xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx - // lonQuantBits = yyyyyyyy yyyyyyyy yyyyyyyy yyyyyyyy - + // Morton encode the quantized values var result = MortonEncode(x: latQuantized, y: lonQuantized); - // Now: - // resultBits = xyxyxyxy xyxyxyxy xyxyxyxy xyxyxyxy - // xyxyxyxy xyxyxyxy xyxyxyxy xyxyxyxy - // Shift to 52-bit precision. return (long)(result >> ((sizeof(ulong) * 8) - BitsOfPrecision)); } @@ -105,12 +112,12 @@ public static (double Latitude, double Longitude) GetCoordinatesFromLong(long ha // Credits to https://github.com/georust/geohash for the hash de-quantization method! static double Dequantize(uint quantizedValue, double rangeMax) { - // Construct the IEEE-754 double-precision representation of the value, which is in range [1, 2] + // Construct the IEEE-754 double-precision representation of the value, which is in range [1, 2) var value = BitConverter.UInt64BitsToDouble(((ulong)quantizedValue << 20) | (1023UL << 52)); // Now: - // (2*rangeMax) * ([1, 2]-1) = [0, 2*rangeMax] - // [0, 2*rangeMax] - rangeMax = [-rangeMax, rangeMax] + // (2*rangeMax) * ([1.0, 2.0) - 1.0) = [0.0, 2*rangeMax) + // [0.0, 2*rangeMax) - rangeMax = [-rangeMax, rangeMax) return Math.FusedMultiplyAdd(rangeMax + rangeMax, value - 1.0, -rangeMax); } @@ -143,7 +150,19 @@ static double Dequantize(uint quantizedValue, double rangeMax) /// /// The x-coordinate to encode. /// The y-coordinate to encode. - /// A ulong value representing the Morton encoding of the given coordinates. + /// + /// A 64-bit value representing the Morton encoding of the given coordinates. + /// i.e. in binary representation, for: + /// + /// x = xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + /// y = yyyyyyyy yyyyyyyy yyyyyyyy yyyyyyyy + /// + /// Method returns: + /// + /// yxyxyxyx yxyxyxyx yxyxyxyx yxyxyxyx + /// yxyxyxyx yxyxyxyx yxyxyxyx yxyxyxyx + /// + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] private static ulong MortonEncode(uint x, uint y) { @@ -159,7 +178,7 @@ static ulong Spread(uint x) } #if NET8_0_OR_GREATER - // You may wonder, why is this also guarded behind AVX512F in addition to BMI2? + // One may ask: Why is this also guarded behind AVX512F in addition to BMI2? // The answer is that on AMD platforms before Zen 3, the PDEP (and PEXT) are implemented in microcode // and work bit-by-bit basis. It has been measured[^1] that for every bit set in the mask operand, // there is 8~ uops issued, meaning that we would do 32 bits per mask * 8 uops * 2 = 512~ uops in total for the encoding instead of just 1 uop. @@ -183,7 +202,7 @@ static ulong Spread(uint x) /// This is essentially a bit de-interleaving operation where the even and odd bits of are "squashed" into separate 32-bit values representing the x- and y-coordinates respectively. /// /// The 64-bit value to decode. - /// A tuple of values representing the x- and y-coordinates decoded from the given Morton code. + /// A tuple of 32-bit values representing the x- and y-coordinates decoded from the given Morton code. [MethodImpl(MethodImplOptions.AggressiveInlining)] private static (uint X, uint Y) MortonDecode(ulong x) { diff --git a/test/Garnet.test/GeoHashTests.cs b/test/Garnet.test/GeoHashTests.cs index 94e006cf4b8..ab84e3bec50 100644 --- a/test/Garnet.test/GeoHashTests.cs +++ b/test/Garnet.test/GeoHashTests.cs @@ -2,6 +2,7 @@ // Licensed under the MIT license. using System; +using System.Globalization; using Garnet.server; using NUnit.Framework; @@ -10,43 +11,42 @@ namespace Garnet.test public class GeoHashTests { [Test] - [TestCase(0.0, 0.0, 0xC000000000000)] - [TestCase(30.5388942218, 104.0555758833, 4024744861876082)] - [TestCase(27.988056, 86.925278, 3636631039000829)] - [TestCase(37.502669, 15.087269, 3476216502357864)] - [TestCase(38.115556, 13.361389, 3476004292229755)] - [TestCase(38.918250, -77.427944, 1787100258949719)] - [TestCase(47.642219912251285, -122.14205560231471, 1557413161902764)] - [TestCase(-89.0, -179.0, 111258114535)] - // TODO: Investigate corner-cases - // [TestCase(-90.0, -180.0, ?)] - // [TestCase(90.0, 180.0, 0)] - [TestCase(-90.0, -180.0, 0)] - public void CanEncodeAndDecodeCoordinates( - double latitude, - double longitude, - long expectedHashInteger) + [TestCase(30.5388942218, 104.0555758833)] + [TestCase(27.988056, 86.925278)] + [TestCase(37.502669, 15.087269)] + [TestCase(38.115556, 13.361389)] + [TestCase(38.918250, -77.427944)] + [TestCase(-90.0, -180.0)] + [TestCase(0.0, 0.0)] + [TestCase(double.Epsilon, double.Epsilon)] + [TestCase(-double.Epsilon, -double.Epsilon)] + [TestCase(90.0, 180.0)] + [TestCase(89.99999999999999, 179.99999999999997)] // double.BitDecrement((Lat/Long)Max) + public void CanEncodeAndDecodeCoordinates(double latitude, double longitude) { + const double Epsilon = 0.00001; + var hashinteger = GeoHash.GeoToLongValue(latitude, longitude); - Assert.AreEqual(expectedHashInteger, hashinteger); var (actualLatitude, actualLongitude) = GeoHash.GetCoordinatesFromLong(hashinteger); - const double Epsilon = 0.00001; - Assert.IsTrue(Math.Abs(latitude - actualLatitude) <= Epsilon); - Assert.IsTrue(Math.Abs(longitude - actualLongitude) <= Epsilon); + var latError = Math.Abs(latitude - actualLatitude); + var lonError = Math.Abs(longitude - actualLongitude); + + Assert.IsTrue(latError <= Epsilon, "Math.Abs(latError)=" + latError.ToString("F16", CultureInfo.InvariantCulture)); + Assert.IsTrue(lonError <= Epsilon, "Math.Abs(lonError)=" + latError.ToString("F16", CultureInfo.InvariantCulture)); } [Test] + [TestCase(30.5388942218, 104.0555758833, 4024744861876082, "wm3vxz6vywh")] [TestCase(27.988056, 86.925278, 3636631039000829, "tuvz4p141z8")] [TestCase(37.502669, 15.087269, 3476216502357864, "sqdtr74hyu0")] [TestCase(38.115556, 13.361389, 3476004292229755, "sqc8b49rnys")] [TestCase(38.918250, -77.427944, 1787100258949719, "dqbvqhfenps")] - [TestCase(-89.0, -179.0, 111258114535, "000twy01mts")] - // TODO: Investigate corner-cases - // [TestCase(-90.0, -180.0, ?, ")] - // [TestCase(0.0, 0.0, 0xC000000000000, "7zzzzzzzzzz")] - [TestCase(90.0, 180.0, 0, "00000000000")] + [TestCase(0.0, 0.0, 0xC000000000000, "s0000000000")] + [TestCase(-90.0, -180.0, 0, "00000000000")] + [TestCase(90.0, 180.0, 0xFFFFFFFFFFFFF, "zzzzzzzzzzs")] + [TestCase(89.99999999999999, 179.99999999999997, 0xFFFFFFFFFFFFF, "zzzzzzzzzzs")] public void CanEncodeAndDecodeCoordinatesWithGeoHashCode( double latitude, double longitude, @@ -57,8 +57,10 @@ public void CanEncodeAndDecodeCoordinatesWithGeoHashCode( var hash = GeoHash.GetGeoHashCode(hashInteger); Assert.AreEqual(expectedHashInteger, hashInteger); - // Only first 10 characters = 50-bits - Assert.AreEqual(expectedHash[..10], hash[..10]); + + // Note: while we are comparing the entire textual representation of geohash (11 characters) + // we are comparing in 52-bit precision, not 55-bit that is expected from GeoHash standard. + Assert.AreEqual(expectedHash, hash); } } } \ No newline at end of file