Maths for HLSL BxDFs (template cmath, tgmath) #803

keptsecret · 2024-12-16T04:25:00Z

No description provided.

…into maths_for_bxdfs_hlsl

include/nbl/builtin/hlsl/tgmath.hlsl

include/nbl/builtin/hlsl/math/functions.hlsl

include/nbl/builtin/hlsl/tgmath.hlsl

devshgraphicsprogramming · 2024-12-19T13:12:39Z

include/nbl/builtin/hlsl/math/functions.hlsl

+namespace impl
+{
+struct bitFields    // need template?
+{
+    using this_t = bitFields;
+
+    static this_t create(uint32_t base, uint32_t value, uint32_t offset, uint32_t count)
+    {
+        this_t retval;
+        retval.base = base;
+        retval.value = value;
+        retval.offset = offset;
+        retval.count = count;
+        return retval;
+    }
+
+    uint32_t __insert()
+    {
+        const uint32_t shifted_masked_value = (value & ((0x1u << count) - 1u)) << offset;
+        const uint32_t lo = base & ((0x1u << offset) - 1u);
+        const uint32_t hi = base ^ lo;
+        return (hi << count) | shifted_masked_value | lo;
+    }
+
+    uint32_t __overwrite()
+    {
+#ifdef __HLSL_VERSION
+        return spirv::bitFieldInsert<uint32_t>(base, value, offset, count);
+#else
+        // TODO: double check implementation
+        const uint32_t shifted_masked_value = ~(0xffffffffu << count) << offset;
+        base &= ~shifted_masked_value;
+        return base | (value << offset);
+#endif
+    }
+
+    uint32_t base;
+    uint32_t value;
+    uint32_t offset;
+    uint32_t count;
+};
+}
+
+uint32_t bitFieldOverwrite(uint32_t base, uint32_t value, uint32_t offset, uint32_t count)
+{
+    impl::bitFields b = impl::bitFields::create(base, value, offset, count);
+    return b.__overwrite();
+}
+
+uint32_t bitFieldInsert(uint32_t base, uint32_t value, uint32_t offset, uint32_t count)
+{
+    impl::bitFields b = impl::bitFields::create(base, value, offset, count);
+    return b.__insert();
+}


but we have glm and spirv for this ?

The situation with bitfieldInsert is confusing to me. This mostly reflects the GLSL version we have:

Nabla/include/nbl/builtin/glsl/math/functions.glsl

Lines 263 to 277 in c7bdd2a

uint nbl_glsl_bitfieldOverwrite(in uint base, in uint value, in uint offset, in uint count)

{

return bitfieldInsert(base,value,int(offset),int(count));

}

uint nbl_glsl_bitfieldInsert_impl(in uint base, in uint shifted_masked_value, in uint lo, in uint count)

{

const uint hi = base^lo;

return (hi<<count)|shifted_masked_value|lo;

}

uint nbl_glsl_bitfieldInsert(in uint base, uint value, in uint offset, in uint count)

{

const uint shifted_masked_value = (value&((0x1u<<count)-1u))<<offset;

return nbl_glsl_bitfieldInsert_impl(base,shifted_masked_value,base&((0x1u<<offset)-1u),count);

}

But there's also these ones in glsl_compat/core.hlsl:

Nabla/include/nbl/builtin/hlsl/glsl_compat/core.hlsl

Lines 210 to 226 in c7bdd2a

template<typename T>

T bitfieldExtract( T val, uint32_t offsetBits, uint32_t numBits )

{

return impl::bitfieldExtract<T, is_signed<T>::value, is_integral<T>::value>::__call(val,offsetBits,numBits);

}

template<typename T>

T bitfieldInsert(T base, T insert, uint32_t offset, uint32_t bits)

{

return spirv::bitFieldInsert<T>(base, insert, offset, bits);

}

template<typename T>

T bitfieldReverse(T value)

{

return spirv::bitFieldReverse<T>(value);

}

So maybe I can remove this one? But then there's no bitfieldOverwrite and I don't know if they do the same thing as the GLSL version.

@Przemog1 is responsible here, but in general:

There's should be the SPIR-V intrinsic in spirv namespace which provides this GLSL builtin to HLSL.

Then there should be an implementation in C++ which does the same thing, its very likely that glm already has one.

And finally a nbl::hlsl:: namespace function which calls the SPIR-V or C++ depending on the language __HLSL_VERSION

do not remove anything, coordinate with Przemek.

devshgraphicsprogramming · 2024-12-19T13:16:07Z

include/nbl/builtin/hlsl/math/functions.hlsl

+namespace impl
+{
+template<typename T, uint16_t M, uint16_t N, uint16_t P>
+struct applyChainRule4D
+{
+    static matrix<T, P, M> __call(matrix<T, N, M> dFdG, matrix<T, P, N> dGdR)
+    {
+#ifdef __HLSL_VERSION
+        return mul(dFdG, dGdR);
+#else
+        return dFdG * dGdR; // glm
+#endif
+    }
+};
+
+template<typename T, uint16_t M, uint16_t N>
+struct applyChainRule3D : applyChainRule4D<T,M,N,1>
+{
+    static vector<T, N> __call(matrix<T, N, M> dFdG, vector<T, N> dGdR)
+    {
+#ifdef __HLSL_VERSION
+        return mul(dFdG, dGdR);
+#else
+        return dFdG * dGdR; // glm
+#endif
+    }
+};
+
+template<typename T, uint16_t M>
+struct applyChainRule2D : applyChainRule4D<T,M,1,1>
+{
+    static vector<T, M> __call(vector<T, M> dFdG, T dGdR)
+    {
+#ifdef __HLSL_VERSION
+        return mul(dFdG, dGdR);
+#else
+        return dFdG * dGdR; // glm
+#endif
+    }
+};
+
+template<typename T>
+struct applyChainRule1D : applyChainRule4D<T,1,1,1>
+{
+    static T __call(T dFdG, T dGdR)
+    {
+        return dFdG * dGdR;
+    }
+};
+}
+
+// possible to derive M,N,P automatically?
+template<typename T, uint16_t M, uint16_t N, uint16_t P NBL_FUNC_REQUIRES(is_scalar_v<T> && M>1 && N>1 && P>1)
+matrix<T, P, M> applyChainRule(matrix<T, N, M> dFdG, matrix<T, P, N> dGdR)
+{
+    return impl::applyChainRule4D<T,M,N,P>::__call(dFdG, dGdR);
+}
+
+template<typename T, uint16_t M, uint16_t N NBL_FUNC_REQUIRES(is_scalar_v<T> && M>1 && N>1)
+vector<T, N> applyChainRule(matrix<T, N, M> dFdG, vector<T, N> dGdR)
+{
+    return impl::applyChainRule3D<T,M,N>::__call(dFdG, dGdR);
+}
+
+template<typename T, uint16_t M NBL_FUNC_REQUIRES(is_scalar_v<T> && M>1)
+vector<T, M> applyChainRule(vector<T, M> dFdG, T dGdR)
+{
+    return impl::applyChainRule2D<T,M>::__call(dFdG, dGdR);
+}
+
+template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T>)
+T applyChainRule(T dFdG, T dGdR)
+{
+    return impl::applyChainRule1D<T>::__call(dFdG, dGdR);
+}


I thought this wa doable through just a simple

template<typename T, uint16_t M, uint16_t N, uint16_t P NBL_FUNC_REQUIRES(is_scalar_v<T> && M>1 && N>1 && P>1) matrix<T,M,P> applyChainRule(matrix<T,N,M> dFdG, matrix<T,M,P> dGdR) { return mul(dFdG,dGdR); }

which just call mul

remember GLSL was silly and had NxM on the matrix names backwards as MxN

Also in HLSL you can have 1x or x1 matrix types, in GLSL you couldn't so you can remove the overloads with vector and the >1 dimension requirements
https://godbolt.org/z/qoTj5xPMG

GLM uses * for matrix multiplications, doesn't it?
Could change to use mul in #804

Also don't need partial spec?

GLM uses * for matrix multiplications, doesn't it? Could change to use mul in #804

we have our own mul now, for both C++ and HLSL

Also don't need partial spec?

There's no custom/specialized behaviour to enable, the only thing we could do is maybe allow a different Matrix type.

But that can be noted as a TODO in a comment.

…g/Nabla into cpp_intrinsics_improvements

include/nbl/builtin/hlsl/tgmath.hlsl

devshgraphicsprogramming · 2025-01-07T06:37:25Z

include/nbl/builtin/hlsl/math/functions.hlsl

+// TOOD: is this doing what it should be?
+template<typename T>
+struct lp_norm<T,1,false>


yes its correct but you'll never hit it because 1 is Odd, wrong boolean value

devshgraphicsprogramming · 2025-01-07T06:38:32Z

include/nbl/builtin/hlsl/math/functions.hlsl

+{
+    static scalar_type_t<T> __sum(const T v)
+    {
+        return dot<T>(v, v);   // TODO: wait for overloaded dot?


use the nabla templated dot and we're good

devshgraphicsprogramming · 2025-01-07T06:38:56Z

include/nbl/builtin/hlsl/math/functions.hlsl

+
+    static scalar_type_t<T> __call(const T v)
+    {
+        return sqrt<T>(__sum(v));


wrong template param to sqrt should be the scalar_type instead of T

devshgraphicsprogramming · 2025-01-07T06:40:16Z

include/nbl/builtin/hlsl/math/functions.hlsl

+template<typename T, uint32_t LP, bool Odd=(LP&0x1)>
+struct lp_norm;


to general nitpicks:

use the functional.hlsl array getters instead of operator[]

use the vector_traits to establish the scalar_type_t because the one from type_traits is only a trait for native vectors

devshgraphicsprogramming · 2025-01-07T06:43:00Z

include/nbl/builtin/hlsl/math/functions.hlsl

+template <typename T NBL_FUNC_REQUIRES(is_scalar_v<T>)
+vector<T, 3> reflect(vector<T, 3> I, vector<T, 3> N, T NdotI)
+{
+    return N * 2.0f * NdotI - I;
+}
+
+template <typename T NBL_FUNC_REQUIRES(is_scalar_v<T>)
+vector<T, 3> reflect(vector<T, 3> I, vector<T, 3> N)
+{
+    T NdotI = dot<T>(N, I);


your dot wont compile, because its dot<VectorT> not dot<ScalarT>

I'd probably require /*Vectorial<T> && */vector_traits<T>::Dimension==3 instead of taking T and slapping it in a vector (perf note, vector_traits existing is same as checking Vectorial but will make Clang/DXC choke a lot less)

devshgraphicsprogramming · 2025-01-07T06:43:50Z

include/nbl/builtin/hlsl/math/functions.hlsl

+namespace impl
+{
+template<typename T>
+struct orientedEtas;
+
+template<>
+struct orientedEtas<float>
+{
+    static bool __call(NBL_REF_ARG(float) orientedEta, NBL_REF_ARG(float) rcpOrientedEta, float NdotI, float eta)
+    {
+        const bool backside = NdotI < 0.0;
+        const float rcpEta = 1.0 / eta;
+        orientedEta = backside ? rcpEta : eta;
+        rcpOrientedEta = backside ? eta : rcpEta;
+        return backside;
+    }
+};
+
+template<>
+struct orientedEtas<float32_t3>
+{
+    static bool __call(NBL_REF_ARG(float32_t3) orientedEta, NBL_REF_ARG(float32_t3) rcpOrientedEta, float NdotI, float32_t3 eta)
+    {
+        const bool backside = NdotI < 0.0;
+        const float32_t3 rcpEta = (float32_t3)1.0 / eta;
+        orientedEta = backside ? rcpEta:eta;
+        rcpOrientedEta = backside ? eta:rcpEta;
+        return backside;
+    }
+};
+}
+
+template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T> || is_vector_v<T>)
+bool getOrientedEtas(NBL_REF_ARG(T) orientedEta, NBL_REF_ARG(T) rcpOrientedEta, scalar_type_t<T> NdotI, T eta)
+{
+    return impl::orientedEtas<T>::__call(orientedEta, rcpOrientedEta, NdotI, eta);
+}


this stuff belongs in bxdf/common or even bxdf/fresnel

CC: @keptsecret

devshgraphicsprogramming · 2025-01-07T06:45:54Z

include/nbl/builtin/hlsl/math/functions.hlsl

+}
+
+template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T>)
+vector<T,3> refract(vector<T,3> I, vector<T,3> N, bool backside, T NdotI, T NdotI2, T rcpOrientedEta, T rcpOrientedEta2)
+{
+    impl::refract<T> r = impl::refract<T>::create(I, N, backside, NdotI, NdotI2, rcpOrientedEta, rcpOrientedEta2);
+    return r.doRefract();
+}
+
+template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T>)
+vector<T,3> refract(vector<T,3> I, vector<T,3> N, T NdotI, T eta)
+{
+    impl::refract<T> r = impl::refract<T>::create(I, N, NdotI, eta);
+    return r.doRefract();
+}
+
+template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T>)
+vector<T,3> refract(vector<T,3> I, vector<T,3> N, T eta)
+{
+    impl::refract<T> r = impl::refract<T>::create(I, N, eta);
+    return r.doRefract();
+}
+
+template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T>)
+T reflectRefract_computeNdotT(bool backside, T NdotI2, T rcpOrientedEta2)
+{
+    impl::refract<T> r;
+    r.NdotI2 = NdotI2;
+    r.rcpOrientedEta2 = rcpOrientedEta2;
+    r.backside = backside;
+    return r.computeNdotT();
+}
+
+template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T>)
+vector<T,3> reflectRefract_impl(bool _refract, vector<T,3> _I, vector<T,3> _N, T _NdotI, T _NdotTorR, T _rcpOrientedEta)
+{
+    return impl::refract<T>::doReflectRefract(_refract, _I, _N, _NdotI, _NdotTorR, _rcpOrientedEta);
+}
+
+template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T>)
+vector<T,3> reflectRefract(bool _refract, vector<T,3> I, vector<T,3> N, bool backside, T NdotI, T NdotI2, T rcpOrientedEta, T rcpOrientedEta2)
+{
+    impl::refract<T> r = impl::refract<T>::create(I, N, backside, NdotI, NdotI2, rcpOrientedEta, rcpOrientedEta2);
+    return r.doReflectRefract(_refract);
+}
+
+template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T>)
+vector<T,3> reflectRefract(bool _refract, vector<T,3> I, vector<T,3> N, T NdotI, T eta)
+{
+    impl::refract<T> r = impl::refract<T>::create(I, N, NdotI, eta);
+    return r.doReflectRefract(_refract);
+}


remove them all and take the struct out of the impl namespace, the only reason (as I've explained to @keptsecret ) so many overloads existed is because we didn't have structs & methods in HLSL.

Now that we have, we will oonly use structs for complex things like this.

Also this is fresnel related stuff and should be somewhere in bxdf/fresnel not regular common math

devshgraphicsprogramming · 2025-01-07T06:48:17Z

include/nbl/builtin/hlsl/math/functions.hlsl

+    T rcpOrientedEta;
+    T rcpOrientedEta2;


stuff that deals with refractive indices definitely belongs in bxdf/fresnel

devshgraphicsprogramming · 2025-01-07T06:49:17Z

include/nbl/builtin/hlsl/math/functions.hlsl

+{
+    c = cos<T>(theta);
+    s = sqrt<T>(1.0-c*c);
+    s = (theta < 0.0) ? -s : s; // TODO: test with XOR


make that signflip or negate funuction that takes a bool, and then use it here

there is flipSign function in the hlsl/ieee754.hlsl file

there is flipSign function in the hlsl/ieee754.hlsl file

the flipSign needs a bool arg to tell whether to flip or not

devshgraphicsprogramming · 2025-01-07T06:50:49Z

include/nbl/builtin/hlsl/math/functions.hlsl

+
+
+// valid only for `theta` in [-PI,PI]
+template <typename T NBL_FUNC_REQUIRES(is_scalar_v<T>)


you should change most is_scalar_v into a bool concept that matches float scalars and also allows emulated_float64_t

Also scalar matches integers and booleans, on which sin and cos don't make sense

I've created such concept already
https://github.com/Devsh-Graphics-Programming/Nabla/blob/maths_for_bxdfs_hlsl/include/nbl/builtin/hlsl/concepts/core.hlsl#L103-L105

however name of this function will be changed to with next commit FloatingPointLikeScalar

devshgraphicsprogramming · 2025-01-07T06:57:35Z

include/nbl/builtin/hlsl/math/functions.hlsl

+template <typename T NBL_FUNC_REQUIRES(is_scalar_v<T>)
+matrix<T, 3, 2> frisvad(vector<T, 3> n) // TODO: confirm dimensions of matrix


frankly I'd make it void frisvad(const T normal, NBL_REF_ARG(T) tangent, NBL_REF_ARG(T) bitangent) requiring that vector_traits<T>::Dimension==3

the reason is that this way its more semantically sound, instead of trying to scratch your head what row/column of the matrix is what (in the original GLSL, the first column - the first vector you get with operator[] or provide with the constructor was the tangent)

We had this conversation with @keptsecret on his thread and PR

Also https://github.com/Devsh-Graphics-Programming/Nabla/pull/803/files#r1904982026

devshgraphicsprogramming · 2025-01-07T06:59:26Z

include/nbl/builtin/hlsl/math/functions.hlsl

+	return (n.z < -0.9999999) ? matrix<T, 2, 3>(vector<T, 3>(0.0,-1.0,0.0), vector<T, 3>(-1.0,0.0,0.0)) : 
+        matrix<T, 2, 3>(vector<T, 3>(1.0-n.x*n.x*a, b, -n.x), vector<T, 3>(b, 1.0-n.y*n.y*a, -n.y));


also use an if instead of ? we get no perf diff, but much better legibility (also the n.z condition can be moved up)

devshgraphicsprogramming · 2025-01-07T07:03:36Z

include/nbl/builtin/hlsl/math/functions.hlsl

+// @ return abs(x) if cond==true, max(x,0.0) otherwise
+template <typename T NBL_FUNC_REQUIRES(is_scalar_v<T> || is_vector_v<T>)
+T conditionalAbsOrMax(bool cond, T x, T limit);
+
+template <>
+float conditionalAbsOrMax<float>(bool cond, float x, float limit)
+{
+    const float condAbs = asfloat(asuint(x) & uint(cond ? 0x7fFFffFFu : 0xffFFffFFu));
+    return max(condAbs,limit);
+}
+
+template <>
+float32_t2 conditionalAbsOrMax<float32_t2>(bool cond, float32_t2 x, float32_t2 limit)
+{
+    const float32_t2 condAbs = asfloat(asuint(x) & select(cond, (uint32_t2)0x7fFFffFFu, (uint32_t2)0xffFFffFFu));
+    return max(condAbs,limit);
+}
+
+template <>
+float32_t3 conditionalAbsOrMax<float32_t3>(bool cond, float32_t3 x, float32_t3 limit)
+{
+    const float32_t3 condAbs = asfloat(asuint(x) & select(cond, (uint32_t3)0x7fFFffFFu, (uint32_t3)0xffFFffFFu));
+    return max(condAbs,limit);
+}
+
+template <>
+float32_t4 conditionalAbsOrMax<float32_t4>(bool cond, float32_t4 x, float32_t4 limit)
+{
+    const float32_t4 condAbs = asfloat(asuint(x) & select(cond, (uint32_t4)0x7fFFffFFu, (uint32_t4)0xffFFffFFu));
+    return max(condAbs,limit);


Require that T is floating point (matching emulated_float too), leave signed integer impl as a TODO

because the condAbs is not constexpr, you can use our nbl::hlsl::bit_cast

you can use nbl::hlsl::promote to turn a single scalar into a vector.

Also use mix instead of select then it should all work cross C++ and HLSL

devshgraphicsprogramming · 2025-01-07T07:05:31Z

include/nbl/builtin/hlsl/math/functions.hlsl

+template<typename T, uint32_t LP NBL_FUNC_REQUIRES(LP>0)
+scalar_type_t<T> lpNormPreroot(NBL_CONST_REF_ARG(T) v)
+{
+    return impl::lp_norm<T,LP>::__sum(v);
+}
+
+template<typename T, uint32_t LP>


you probably want to require that vector_traits<T>::scalar_type is a float (including emulated float)

devshgraphicsprogramming · 2025-01-09T15:44:35Z

include/nbl/builtin/hlsl/math/functions.hlsl

-matrix<T, 3, 2> frisvad(vector<T, 3> n) // TODO: confirm dimensions of matrix
+matrix<T, 2, 3> frisvad(vector<T, 3> n)


dont use matrix its semantically confusing

devshgraphicsprogramming · 2025-01-09T17:09:45Z

include/nbl/builtin/hlsl/ieee754.hlsl

+	const AsUint toAsUint = ieee754::impl::bitCastToUintType(to);
+	const AsUint fromAsUint = ieee754::impl::bitCastToUintType(from);
+
+	return bit_cast<FloatingPoint>(toAsUint | extractSignPreserveBitPattern(from));


this does not copy sign, because you need to clear the sign bit on toAsUint first

right now it will make to negative if any of the inputs are negative

btw are you sure you want to force that FloatingPoint is non-emulated and scalar? It could work on any float even emulated 64

devshgraphicsprogramming · 2025-01-09T17:11:03Z

include/nbl/builtin/hlsl/ieee754.hlsl

+}
+
+template <typename FloatingPoint NBL_FUNC_REQUIRES(hlsl::is_floating_point_v<FloatingPoint>&& hlsl::is_scalar_v<FloatingPoint>)
+NBL_CONSTEXPR_INLINE_FUNC FloatingPoint flipSign(FloatingPoint val)


bool indicating whether to flip would be super useful actually and it would always get optimized away if const

btw are you sure you want to force that FloatingPoint is non-emulated and scalar? It could work on any float even emulated 64

devshgraphicsprogramming · 2025-01-09T19:51:02Z

include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl

+#ifndef __HLSL_VERSION
+#include <bitset>
+#endif


is this needed here?

devshgraphicsprogramming · 2025-01-09T20:54:37Z

include/nbl/builtin/hlsl/spirv_intrinsics/glsl.std.450.hlsl

+template<typename FloatingPoint>
+[[vk::ext_instruction(GLSLstd450::GLSLstd450Pow, "GLSL.std.450")]]
+enable_if_t<is_floating_point<FloatingPoint>::value && !is_matrix_v<FloatingPoint>, FloatingPoint> pow(FloatingPoint lhs, FloatingPoint rhs);
+
+template<typename FloatingPoint>
+[[vk::ext_instruction(GLSLstd450::GLSLstd450Exp, "GLSL.std.450")]]
+enable_if_t<is_floating_point<FloatingPoint>::value && !is_matrix_v<FloatingPoint>, FloatingPoint> exp(FloatingPoint val);
+
 template<typename FloatingPoint>
 [[vk::ext_instruction(GLSLstd450::GLSLstd450Exp2, "GLSL.std.450")]]
 enable_if_t<is_floating_point<FloatingPoint>::value && !is_matrix_v<FloatingPoint>, FloatingPoint> exp2(FloatingPoint val);

+template<typename FloatingPoint>
+[[vk::ext_instruction(GLSLstd450::GLSLstd450Log, "GLSL.std.450")]]
+enable_if_t<is_floating_point<FloatingPoint>::value && !is_matrix_v<FloatingPoint>, FloatingPoint> log(FloatingPoint val);


pow, exp and log don't take float64 without an extension

also as per our discord conversation, we need 5 named concepts in spirv namespace so the enable_if_t are clearer and easier to compare with SPIR-V spec

devshgraphicsprogramming · 2025-01-09T20:58:35Z

include/nbl/builtin/hlsl/tgmath.hlsl

+template<typename FloatingPoint NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<FloatingPoint>)
+inline FloatingPoint erf(FloatingPoint x)
+{
+    return tgmath_impl::erf_helper<FloatingPoint>::__call(x);
+}
+
+template<typename FloatingPoint NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<FloatingPoint>)
+inline FloatingPoint erfInv(FloatingPoint x)
+{
+    return tgmath_impl::erfInv_helper<FloatingPoint>::__call(x);
+}
+
+template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<T> || concepts::Vectorial<T>)
+inline T floor(NBL_CONST_REF_ARG(T) val)
+{
+    return tgmath_impl::floor_helper<T>::__call(val);
+}
+
+template<typename T, typename U NBL_FUNC_REQUIRES((concepts::FloatingPointLikeScalar<T> || concepts::FloatingPointLikeVectorial<T>) && (concepts::FloatingPointLikeScalar<U> || is_same_v<U, bool>))
+inline T lerp(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a)
+{
+    return tgmath_impl::lerp_helper<T, U>::__call(x, y, a);
+}
+
+template<typename FloatingPoint NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<FloatingPoint>)
+inline bool isnan(NBL_CONST_REF_ARG(FloatingPoint) val)
+{
+    return tgmath_impl::isnan_helper<FloatingPoint>::__call(val);
+}
+
+template<typename V NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<V>)
+inline vector<bool, vector_traits<V>::Dimension> isnan(NBL_CONST_REF_ARG(V) val)
+{
+    return tgmath_impl::isnan_helper<V>::__call(val);
+}
+
+template<typename FloatingPoint NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<FloatingPoint>)
+inline FloatingPoint isinf(NBL_CONST_REF_ARG(FloatingPoint) val)
+{
+    return tgmath_impl::isinf_helper<FloatingPoint>::__call(val);
+}
+
+template<typename V NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<V>)
+inline vector<bool, vector_traits<V>::Dimension> isinf(NBL_CONST_REF_ARG(V) val)
+{
+    return tgmath_impl::isinf_helper<V>::__call(val);
+}
+
+template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<T> || concepts::FloatingPointLikeVectorial<T>)
+inline T pow(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y)
+{
+    return tgmath_impl::pow_helper<T>::__call(x, y);
+}
+
+template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<T> || concepts::FloatingPointLikeVectorial<T>)
+inline T exp(NBL_CONST_REF_ARG(T) x)
+{
+    return tgmath_impl::exp_helper<T>::__call(x);
+}
+
+
+template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<T> || concepts::Vectorial<T>)
+inline T exp2(NBL_CONST_REF_ARG(T) x)
+{
+    return tgmath_impl::exp2_helper<T>::__call(x);
+}
+
+template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<T> || concepts::FloatingPointLikeVectorial<T>)
+inline T log(NBL_CONST_REF_ARG(T) x)
+{
+    return tgmath_impl::log_helper<T>::__call(x);
+}
+
+template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<T> || concepts::SignedIntegral<T> || concepts::FloatingPointLikeVectorial<T> || concepts::SignedIntVectorial<T>)


maybe don't put any constraints on the functions, and let the helpers do all the error reporting (no point doing it twice) as long as each _helper declares NBL_STRUCT_CONSTRAINABLE in the EMPTY forward declaration of the primary template

devshgraphicsprogramming · 2025-01-09T21:02:11Z

include/nbl/builtin/hlsl/tgmath.hlsl

+inline bool isnan(NBL_CONST_REF_ARG(FloatingPoint) val)
+{
+    return tgmath_impl::isnan_helper<FloatingPoint>::__call(val);
+}
+
+template<typename V NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<V>)
+inline vector<bool, vector_traits<V>::Dimension> isnan(NBL_CONST_REF_ARG(V) val)
+{
+    return tgmath_impl::isnan_helper<V>::__call(val);
+}
+
+template<typename FloatingPoint NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<FloatingPoint>)
+inline FloatingPoint isinf(NBL_CONST_REF_ARG(FloatingPoint) val)
+{
+    return tgmath_impl::isinf_helper<FloatingPoint>::__call(val);
+}
+
+template<typename V NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<V>)
+inline vector<bool, vector_traits<V>::Dimension> isinf(NBL_CONST_REF_ARG(V) val)
+{
+    return tgmath_impl::isinf_helper<V>::__call(val);
+}


instead of overload, maybe you just want to require that this _helper declares a using return_t = ?

devshgraphicsprogramming · 2025-01-09T21:10:31Z

include/nbl/builtin/hlsl/impl/tgmath_impl.hlsl

+namespace nbl
+{
+namespace hlsl
+{
+namespace tgmath_impl
+{
+
+template<typename T, typename U NBL_STRUCT_CONSTRAINABLE>
+struct lerp_helper;
+
+#ifdef __HLSL_VERSION
+#define MIX_FUNCTION spirv::fMix
+#else
+#define MIX_FUNCTION glm::mix
+#endif


as per our discord conversation, every helper should have a FIRST partial specialization done with is_same_v<decltype(spirv::...),expected_Return_type> constraint in a #ifdef __HLSL_VERSION block

then the C++ versions and cross-language adaptors for emulated should come

devshgraphicsprogramming · 2025-01-09T21:19:17Z

include/nbl/builtin/hlsl/impl/tgmath_impl.hlsl

+inline bool isnan_uint_impl(UnsignedInteger val)
+{
+	using AsFloat = typename float_of_size<sizeof(UnsignedInteger)>::type;
+	return bool((ieee754::extractBiasedExponent<UnsignedInteger>(val) == ieee754::traits<AsFloat>::specialValueExp) && (val & ieee754::traits<AsFloat>::mantissaMask));


there's a faster check:

AND with (numeric_limits<UnsignedInteger>::max>>1) - clear the sign mask

check for > (specialValueExp<<mantissaBits)

devshgraphicsprogramming · 2025-01-09T21:21:11Z

include/nbl/builtin/hlsl/impl/tgmath_impl.hlsl

+template<typename FloatingPoint>
+NBL_PARTIAL_REQ_TOP(concepts::FloatingPointScalar<FloatingPoint>)
+struct isnan_helper<FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPointScalar<FloatingPoint>) >
+{
+	static bool __call(NBL_CONST_REF_ARG(FloatingPoint) x)
+	{
+#ifdef __HLSL_VERSION
+		return spirv::isNan<FloatingPoint>(x);
+#else
+		// GCC and Clang will always return false with call to std::isnan when fast math is enabled,
+		// this implementation will always return appropriate output regardless is fas math is enabled or not
+		using AsUint = typename unsigned_integer_of_size<sizeof(FloatingPoint)>::type;
+		return tgmath_impl::isnan_uint_impl(reinterpret_cast<const AsUint&>(x));
+#endif
+	}
+};
+
+template<typename V>
+NBL_PARTIAL_REQ_TOP(concepts::Vectorial<V>)
+struct isnan_helper<V NBL_PARTIAL_REQ_BOT(concepts::Vectorial<V>) >
+{
+	using output_t = vector<bool, hlsl::vector_traits<V>::Dimension>;


as per the discord convom, its best if there's a whole separate SPIR-V specialization which is not predicated on a concept but if the SPIR-V intrinsic is callable/compiles with that type

and the C++ specialization can be just for a scalar.

So you have 2 specializations in two different parts of __HLSL_VERSION block, instead of one that tries to reconcile incompatible concepts

Ideally I'd keep the functions without SPIR-V intrinsic equivalents in a separate file or near the bottom of this one

devshgraphicsprogramming · 2025-01-09T21:25:15Z

include/nbl/builtin/hlsl/impl/tgmath_impl.hlsl

+// ERF
+
+template<typename T NBL_STRUCT_CONSTRAINABLE>
+struct erf_helper;
+
+template<typename FloatingPoint>
+NBL_PARTIAL_REQ_TOP(concepts::FloatingPointScalar<FloatingPoint>)
+struct erf_helper<FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPointScalar<FloatingPoint>) >


the non-spirv forwarding functions can be done however you like, most sane is probably to do the per-channel broadcast specs as macros for quick partial spec

the Scalar constraint you have here is fine, but not for the potentially SPIR-V vector perfect forwarding ones above

devshgraphicsprogramming · 2025-01-09T21:40:42Z

include/nbl/builtin/hlsl/impl/tgmath_impl.hlsl

+		const FloatingPoint a1 = 0.254829592;
+		const FloatingPoint a2 = -0.284496736;
+		const FloatingPoint a3 = 1.421413741;
+		const FloatingPoint a4 = -1.453152027;
+		const FloatingPoint a5 = 1.061405429;
+		const FloatingPoint p = 0.3275911;


pay attention to 2 things:

whether the constant literal has all the digits needed (float32 and float64 often have different digits

to get true FP64 constants in HLSL you need to use a stupid suffix, there's actually an NBL_ macro you need to place around them cause DXC is holding back on implementing explicit sized fp16, fp32, fp64 suffices on float literals

Also, usually an implementation of a complex math function is different for float16, float32 and float64 (different levels of polynomial approximation/coefficients and number of steps/iterations) so I'd have a separate partial spec in preprocessor blocks of `__HLSL_VERSION

devshgraphicsprogramming · 2025-01-09T21:42:52Z

intrinsics.hlsl and functions.hlsl I'll review some other day

keptsecret added 3 commits December 16, 2024 11:23

moved math changes over

9cac1c9

rsqrt not from stl

8de3ce4

fix example conflict

aa0748e

keptsecret changed the base branch from master to cpp_compat_intrinsics_refactor December 16, 2024 07:16

keptsecret added 5 commits December 16, 2024 14:46

moved math funcs to tgmath

b33aaed

some needed math funcs

dda71b8

some more math funcs

0b46b67

Merge remote-tracking branch 'origin/cpp_compat_intrinsics_refactor' …

acadc15

…into maths_for_bxdfs_hlsl

fixed cpp compat types

a5d48de

devshgraphicsprogramming reviewed Dec 18, 2024

View reviewed changes

include/nbl/builtin/hlsl/tgmath.hlsl Outdated Show resolved Hide resolved

Base automatically changed from cpp_compat_intrinsics_refactor to master December 18, 2024 10:37

Added ieee754.hlsl and improved intrinsics.hlsl

0064f57

devshgraphicsprogramming mentioned this pull request Dec 18, 2024

Emulated float64 #696

Merged

keptsecret added 2 commits December 19, 2024 09:11

Merge branch 'master' into maths_for_bxdfs_hlsl

89f37f8

some cpp compat fixes

28427b5

devshgraphicsprogramming reviewed Dec 19, 2024

View reviewed changes

include/nbl/builtin/hlsl/math/functions.hlsl Outdated Show resolved Hide resolved

devshgraphicsprogramming reviewed Dec 19, 2024

View reviewed changes

include/nbl/builtin/hlsl/math/functions.hlsl Outdated Show resolved Hide resolved

devshgraphicsprogramming reviewed Dec 19, 2024

View reviewed changes

include/nbl/builtin/hlsl/tgmath.hlsl Outdated Show resolved Hide resolved

devshgraphicsprogramming reviewed Dec 19, 2024

View reviewed changes

Przemog1 and others added 6 commits December 19, 2024 18:34

Merge branch 'master' of https://github.com/Devsh-Graphics-Programmin…

8e957ce

…g/Nabla into cpp_intrinsics_improvements

Improved mul function

86cabe4

isinf and isnan workaround

0770eb9

Merge branch 'master' into maths_for_bxdfs_hlsl

6690cf8

review changes

a517d22

use templated glm methods

911d8fc

keptsecret commented Dec 20, 2024

View reviewed changes

include/nbl/builtin/hlsl/tgmath.hlsl Show resolved Hide resolved

keptsecret and others added 3 commits December 20, 2024 15:03

reverted changes, use glm

14e1010

Updated examples

fb7b643

Added tgmath.hlsl

845707c

devshgraphicsprogramming reviewed Jan 7, 2025

View reviewed changes

Przemog1 and others added 3 commits January 7, 2025 13:38

Refactored concepts

9a44ce8

Corrections

5eb02f2

fix frisvad mat, lp_norm loop

96efff4

devshgraphicsprogramming reviewed Jan 9, 2025

View reviewed changes

	uint nbl_glsl_bitfieldOverwrite(in uint base, in uint value, in uint offset, in uint count)
	{
	return bitfieldInsert(base,value,int(offset),int(count));
	}

	uint nbl_glsl_bitfieldInsert_impl(in uint base, in uint shifted_masked_value, in uint lo, in uint count)
	{
	const uint hi = base^lo;
	return (hi<<count)\|shifted_masked_value\|lo;
	}
	uint nbl_glsl_bitfieldInsert(in uint base, uint value, in uint offset, in uint count)
	{
	const uint shifted_masked_value = (value&((0x1u<<count)-1u))<<offset;
	return nbl_glsl_bitfieldInsert_impl(base,shifted_masked_value,base&((0x1u<<offset)-1u),count);
	}

	template<typename T>
	T bitfieldExtract( T val, uint32_t offsetBits, uint32_t numBits )
	{
	return impl::bitfieldExtract<T, is_signed<T>::value, is_integral<T>::value>::__call(val,offsetBits,numBits);
	}

	template<typename T>
	T bitfieldInsert(T base, T insert, uint32_t offset, uint32_t bits)
	{
	return spirv::bitFieldInsert<T>(base, insert, offset, bits);
	}

	template<typename T>
	T bitfieldReverse(T value)
	{
	return spirv::bitFieldReverse<T>(value);
	}

		template<typename T, uint32_t LP, bool Odd=(LP&0x1)>
		struct lp_norm;



		// valid only for `theta` in [-PI,PI]
		template <typename T NBL_FUNC_REQUIRES(is_scalar_v<T>)

		template <typename T NBL_FUNC_REQUIRES(is_scalar_v<T>)
		matrix<T, 3, 2> frisvad(vector<T, 3> n) // TODO: confirm dimensions of matrix

		return (n.z < -0.9999999) ? matrix<T, 2, 3>(vector<T, 3>(0.0,-1.0,0.0), vector<T, 3>(-1.0,0.0,0.0)) :
		matrix<T, 2, 3>(vector<T, 3>(1.0-n.xn.xa, b, -n.x), vector<T, 3>(b, 1.0-n.yn.ya, -n.y));

		matrix<T, 3, 2> frisvad(vector<T, 3> n) // TODO: confirm dimensions of matrix
		matrix<T, 2, 3> frisvad(vector<T, 3> n)

Maths for HLSL BxDFs (template cmath, tgmath) #803

Are you sure you want to change the base?

Maths for HLSL BxDFs (template cmath, tgmath) #803

Conversation

keptsecret commented Dec 16, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

devshgraphicsprogramming Dec 19, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

devshgraphicsprogramming Jan 7, 2025 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

devshgraphicsprogramming Jan 7, 2025 • edited Loading

Choose a reason for hiding this comment

devshgraphicsprogramming Jan 7, 2025 • edited Loading

Choose a reason for hiding this comment

devshgraphicsprogramming Jan 7, 2025 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Przemog1 Jan 7, 2025 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

devshgraphicsprogramming Jan 9, 2025 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

devshgraphicsprogramming commented Jan 9, 2025

devshgraphicsprogramming Dec 19, 2024 •

edited

Loading

devshgraphicsprogramming Jan 7, 2025 •

edited

Loading

devshgraphicsprogramming Jan 7, 2025 •

edited

Loading

devshgraphicsprogramming Jan 7, 2025 •

edited

Loading

devshgraphicsprogramming Jan 7, 2025 •

edited

Loading

Przemog1 Jan 7, 2025 •

edited

Loading

devshgraphicsprogramming Jan 9, 2025 •

edited

Loading