Skip to content

Commit

Permalink
Fix incorrect quantization approach
Browse files Browse the repository at this point in the history
  • Loading branch information
PaulusParssinen committed May 7, 2024
1 parent f364a96 commit 80ebee4
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 59 deletions.
83 changes: 51 additions & 32 deletions libs/server/Objects/SortedSetGeo/GeoHash.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Licensed under the MIT license.

using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if NET8_0_OR_GREATER
Expand All @@ -17,10 +18,12 @@ namespace Garnet.server
/// </summary>
public static class GeoHash
{
// Constraints from EPSG:900913 / EPSG:3785 / OSGEO:41001
// Constraints from WGS 84 / Pseudo-Mercator (EPSG:3857)
private const double LongitudeMin = -180.0;
private const double LongitudeMax = 180.0;

// TODO: These are "wrong" in a sense that EPSG:3857
// says that the latitude should be from -85.05112878 to 85.05112878
private const double LatitudeMin = -90.0;
private const double LatitudeMax = 90.0;

Expand All @@ -45,14 +48,11 @@ public static long GeoToLongValue(double latitude, double longitude)
return -1L;
}

const double LatToUnitRangeReciprocal = 1 / 180.0;
const double LonToUnitRangeReciprocal = 1 / 360.0;

// Credits to https://mmcloughlin.com/posts/geohash-assembly for the quantization approach!

// The coordinates are quantized by first mapping them to the unit interval [0, 1] and
// The coordinates are quantized by first mapping them to the unit interval [0.0, 1.0] and
// then multiplying the 2^32. For example, to get 32-bit quantized integer representation of the latitude
// which is in range [-90, 90] we would do:
// which is in range [-90.0, 90.0] we would do:
//
// latQuantized = floor(2.0^32 * (latitude + 90.0) / 180.0)
//
Expand All @@ -62,33 +62,40 @@ public static long GeoToLongValue(double latitude, double longitude)
//
// floor(2^52 * x) where x = (latitude + 90.0) / 180.0.
//
// and further that the value of this can be read from binary representation of x+1.
// and further that the result of this calculation can be read simply from
// the IEEE-754 binary representation of x + 1.0.

// In otherwords to quantize, we need to first map value to unit range [0, 1].
// We achieve this by multiplying: [-value, value] * rangeReciprocal = [-0.5, 0.5]
// Then by adding 1.5, we shift the value range to [1, 2],
// for which the IEEE-754 double-precision representation is as follows:
//
// (-1)^sign * 2^(exp-1023) * (1.0 + significand/2^52)
// where s=0, exp=1023, significand=floor(2^52 * x)
//
// Now we can read value of floor(2^52 * x) directly from binary representation of 1.0 + x,
// where the now "quantized" value is stored as the 32 most significant bits of the signicand!
static uint Quantize(double value, double rangeReciprocal) => (uint)(BitConverter.DoubleToUInt64Bits(Math.FusedMultiplyAdd(value, rangeReciprocal, 1.5)) >> 20);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static uint Quantize(double value, double rangeReciprocal)
{
// In otherwords; we need to first map value to unit range [0.0, 1.0].
// We achieve this by multiplying [-value, value] by rangeReciprocal, giving us value in range [-0.5, 0.5]
// Then by adding 1.5, we shift the value range to [1.0, 2.0],
// for which the IEEE-754 double-precision representation is as follows:
//
// (-1)^sign * 2^(exp-1023) * (1.0 + significand/2^52)
// where sign=0, exp=1023, significand=floor(2^52 * x), for x in [1.0, 2.0)
//
// Now we can read value of floor(2^52 * x) directly from binary representation of y = 1.0 + x,
// where the now "quantized" value is stored as the 32 most significant bits of the signicand!
var y = BitConverter.DoubleToUInt64Bits(Math.FusedMultiplyAdd(value, rangeReciprocal, 1.5)) >> 20;

// Except we need to handle the corner-case where value rounds to the maximumm the range: 2.0
//
// We do this branchlessly by comparing the 64-bit binary representation to 0x40000000000 (e=1024).
// This way the JIT will emit cmp + cmov for x86-64 and cmp + csel for arm64 instead of branch.
return y >= 0x40000000000 ? uint.MaxValue : (uint)y;
}

const double LatToUnitRangeReciprocal = 1 / 180.0;
const double LonToUnitRangeReciprocal = 1 / 360.0;

var latQuantized = Quantize(latitude, LatToUnitRangeReciprocal);
var lonQuantized = Quantize(longitude, LonToUnitRangeReciprocal);

// Morton encode the quantized values, i.e.
// latQuantBits = xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
// lonQuantBits = yyyyyyyy yyyyyyyy yyyyyyyy yyyyyyyy

// Morton encode the quantized values
var result = MortonEncode(x: latQuantized, y: lonQuantized);

// Now:
// resultBits = xyxyxyxy xyxyxyxy xyxyxyxy xyxyxyxy
// xyxyxyxy xyxyxyxy xyxyxyxy xyxyxyxy

// Shift to 52-bit precision.
return (long)(result >> ((sizeof(ulong) * 8) - BitsOfPrecision));
}
Expand All @@ -105,12 +112,12 @@ public static (double Latitude, double Longitude) GetCoordinatesFromLong(long ha
// Credits to https://github.com/georust/geohash for the hash de-quantization method!
static double Dequantize(uint quantizedValue, double rangeMax)
{
// Construct the IEEE-754 double-precision representation of the value, which is in range [1, 2]
// Construct the IEEE-754 double-precision representation of the value, which is in range [1, 2)
var value = BitConverter.UInt64BitsToDouble(((ulong)quantizedValue << 20) | (1023UL << 52));

// Now:
// (2*rangeMax) * ([1, 2]-1) = [0, 2*rangeMax]
// [0, 2*rangeMax] - rangeMax = [-rangeMax, rangeMax]
// (2*rangeMax) * ([1.0, 2.0) - 1.0) = [0.0, 2*rangeMax)
// [0.0, 2*rangeMax) - rangeMax = [-rangeMax, rangeMax)
return Math.FusedMultiplyAdd(rangeMax + rangeMax, value - 1.0, -rangeMax);
}

Expand Down Expand Up @@ -143,7 +150,19 @@ static double Dequantize(uint quantizedValue, double rangeMax)
/// </summary>
/// <param name="x">The x-coordinate to encode.</param>
/// <param name="y">The y-coordinate to encode.</param>
/// <returns>A ulong value representing the Morton encoding of the given coordinates.</returns>
/// <returns>
/// A 64-bit value representing the Morton encoding of the given coordinates.
/// i.e. in binary representation, for:
/// <code>
/// x = xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
/// y = yyyyyyyy yyyyyyyy yyyyyyyy yyyyyyyy
/// </code>
/// Method returns:
/// <code>
/// yxyxyxyx yxyxyxyx yxyxyxyx yxyxyxyx
/// yxyxyxyx yxyxyxyx yxyxyxyx yxyxyxyx
/// </code>
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static ulong MortonEncode(uint x, uint y)
{
Expand All @@ -159,7 +178,7 @@ static ulong Spread(uint x)
}

#if NET8_0_OR_GREATER
// You may wonder, why is this also guarded behind AVX512F in addition to BMI2?
// One may ask: Why is this also guarded behind AVX512F in addition to BMI2?
// The answer is that on AMD platforms before Zen 3, the PDEP (and PEXT) are implemented in microcode
// and work bit-by-bit basis. It has been measured[^1] that for every bit set in the mask operand,
// there is 8~ uops issued, meaning that we would do 32 bits per mask * 8 uops * 2 = 512~ uops in total for the encoding instead of just 1 uop.
Expand All @@ -183,7 +202,7 @@ static ulong Spread(uint x)
/// This is essentially a bit de-interleaving operation where the even and odd bits of <paramref name="x"/> are "squashed" into separate 32-bit values representing the x- and y-coordinates respectively.
/// </summary>
/// <param name="x">The 64-bit value to decode.</param>
/// <returns>A tuple of values representing the x- and y-coordinates decoded from the given Morton code.</returns>
/// <returns>A tuple of 32-bit values representing the x- and y-coordinates decoded from the given Morton code.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static (uint X, uint Y) MortonDecode(ulong x)
{
Expand Down
56 changes: 29 additions & 27 deletions test/Garnet.test/GeoHashTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Licensed under the MIT license.

using System;
using System.Globalization;
using Garnet.server;
using NUnit.Framework;

Expand All @@ -10,43 +11,42 @@ namespace Garnet.test
public class GeoHashTests
{
[Test]
[TestCase(0.0, 0.0, 0xC000000000000)]
[TestCase(30.5388942218, 104.0555758833, 4024744861876082)]
[TestCase(27.988056, 86.925278, 3636631039000829)]
[TestCase(37.502669, 15.087269, 3476216502357864)]
[TestCase(38.115556, 13.361389, 3476004292229755)]
[TestCase(38.918250, -77.427944, 1787100258949719)]
[TestCase(47.642219912251285, -122.14205560231471, 1557413161902764)]
[TestCase(-89.0, -179.0, 111258114535)]
// TODO: Investigate corner-cases
// [TestCase(-90.0, -180.0, ?)]
// [TestCase(90.0, 180.0, 0)]
[TestCase(-90.0, -180.0, 0)]
public void CanEncodeAndDecodeCoordinates(
double latitude,
double longitude,
long expectedHashInteger)
[TestCase(30.5388942218, 104.0555758833)]
[TestCase(27.988056, 86.925278)]
[TestCase(37.502669, 15.087269)]
[TestCase(38.115556, 13.361389)]
[TestCase(38.918250, -77.427944)]
[TestCase(-90.0, -180.0)]
[TestCase(0.0, 0.0)]
[TestCase(double.Epsilon, double.Epsilon)]
[TestCase(-double.Epsilon, -double.Epsilon)]
[TestCase(90.0, 180.0)]
[TestCase(89.99999999999999, 179.99999999999997)] // double.BitDecrement((Lat/Long)Max)
public void CanEncodeAndDecodeCoordinates(double latitude, double longitude)
{
const double Epsilon = 0.00001;

var hashinteger = GeoHash.GeoToLongValue(latitude, longitude);
Assert.AreEqual(expectedHashInteger, hashinteger);

var (actualLatitude, actualLongitude) = GeoHash.GetCoordinatesFromLong(hashinteger);

const double Epsilon = 0.00001;
Assert.IsTrue(Math.Abs(latitude - actualLatitude) <= Epsilon);
Assert.IsTrue(Math.Abs(longitude - actualLongitude) <= Epsilon);
var latError = Math.Abs(latitude - actualLatitude);
var lonError = Math.Abs(longitude - actualLongitude);

Assert.IsTrue(latError <= Epsilon, "Math.Abs(latError)=" + latError.ToString("F16", CultureInfo.InvariantCulture));
Assert.IsTrue(lonError <= Epsilon, "Math.Abs(lonError)=" + latError.ToString("F16", CultureInfo.InvariantCulture));
}

[Test]
[TestCase(30.5388942218, 104.0555758833, 4024744861876082, "wm3vxz6vywh")]
[TestCase(27.988056, 86.925278, 3636631039000829, "tuvz4p141z8")]
[TestCase(37.502669, 15.087269, 3476216502357864, "sqdtr74hyu0")]
[TestCase(38.115556, 13.361389, 3476004292229755, "sqc8b49rnys")]
[TestCase(38.918250, -77.427944, 1787100258949719, "dqbvqhfenps")]
[TestCase(-89.0, -179.0, 111258114535, "000twy01mts")]
// TODO: Investigate corner-cases
// [TestCase(-90.0, -180.0, ?, ")]
// [TestCase(0.0, 0.0, 0xC000000000000, "7zzzzzzzzzz")]
[TestCase(90.0, 180.0, 0, "00000000000")]
[TestCase(0.0, 0.0, 0xC000000000000, "s0000000000")]
[TestCase(-90.0, -180.0, 0, "00000000000")]
[TestCase(90.0, 180.0, 0xFFFFFFFFFFFFF, "zzzzzzzzzzs")]
[TestCase(89.99999999999999, 179.99999999999997, 0xFFFFFFFFFFFFF, "zzzzzzzzzzs")]
public void CanEncodeAndDecodeCoordinatesWithGeoHashCode(
double latitude,
double longitude,
Expand All @@ -57,8 +57,10 @@ public void CanEncodeAndDecodeCoordinatesWithGeoHashCode(
var hash = GeoHash.GetGeoHashCode(hashInteger);

Assert.AreEqual(expectedHashInteger, hashInteger);
// Only first 10 characters = 50-bits
Assert.AreEqual(expectedHash[..10], hash[..10]);

// Note: while we are comparing the entire textual representation of geohash (11 characters)
// we are comparing in 52-bit precision, not 55-bit that is expected from GeoHash standard.
Assert.AreEqual(expectedHash, hash);
}
}
}

0 comments on commit 80ebee4

Please sign in to comment.