-
Notifications
You must be signed in to change notification settings - Fork 59
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Maths for HLSL BxDFs (template cmath, tgmath) #803
base: master
Are you sure you want to change the base?
Conversation
namespace impl | ||
{ | ||
struct bitFields // need template? | ||
{ | ||
using this_t = bitFields; | ||
|
||
static this_t create(uint32_t base, uint32_t value, uint32_t offset, uint32_t count) | ||
{ | ||
this_t retval; | ||
retval.base = base; | ||
retval.value = value; | ||
retval.offset = offset; | ||
retval.count = count; | ||
return retval; | ||
} | ||
|
||
uint32_t __insert() | ||
{ | ||
const uint32_t shifted_masked_value = (value & ((0x1u << count) - 1u)) << offset; | ||
const uint32_t lo = base & ((0x1u << offset) - 1u); | ||
const uint32_t hi = base ^ lo; | ||
return (hi << count) | shifted_masked_value | lo; | ||
} | ||
|
||
uint32_t __overwrite() | ||
{ | ||
#ifdef __HLSL_VERSION | ||
return spirv::bitFieldInsert<uint32_t>(base, value, offset, count); | ||
#else | ||
// TODO: double check implementation | ||
const uint32_t shifted_masked_value = ~(0xffffffffu << count) << offset; | ||
base &= ~shifted_masked_value; | ||
return base | (value << offset); | ||
#endif | ||
} | ||
|
||
uint32_t base; | ||
uint32_t value; | ||
uint32_t offset; | ||
uint32_t count; | ||
}; | ||
} | ||
|
||
uint32_t bitFieldOverwrite(uint32_t base, uint32_t value, uint32_t offset, uint32_t count) | ||
{ | ||
impl::bitFields b = impl::bitFields::create(base, value, offset, count); | ||
return b.__overwrite(); | ||
} | ||
|
||
uint32_t bitFieldInsert(uint32_t base, uint32_t value, uint32_t offset, uint32_t count) | ||
{ | ||
impl::bitFields b = impl::bitFields::create(base, value, offset, count); | ||
return b.__insert(); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
but we have glm and spirv for this ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The situation with bitfieldInsert
is confusing to me. This mostly reflects the GLSL version we have:
Nabla/include/nbl/builtin/glsl/math/functions.glsl
Lines 263 to 277 in c7bdd2a
uint nbl_glsl_bitfieldOverwrite(in uint base, in uint value, in uint offset, in uint count) | |
{ | |
return bitfieldInsert(base,value,int(offset),int(count)); | |
} | |
uint nbl_glsl_bitfieldInsert_impl(in uint base, in uint shifted_masked_value, in uint lo, in uint count) | |
{ | |
const uint hi = base^lo; | |
return (hi<<count)|shifted_masked_value|lo; | |
} | |
uint nbl_glsl_bitfieldInsert(in uint base, uint value, in uint offset, in uint count) | |
{ | |
const uint shifted_masked_value = (value&((0x1u<<count)-1u))<<offset; | |
return nbl_glsl_bitfieldInsert_impl(base,shifted_masked_value,base&((0x1u<<offset)-1u),count); | |
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But there's also these ones in glsl_compat/core.hlsl
:
Nabla/include/nbl/builtin/hlsl/glsl_compat/core.hlsl
Lines 210 to 226 in c7bdd2a
template<typename T> | |
T bitfieldExtract( T val, uint32_t offsetBits, uint32_t numBits ) | |
{ | |
return impl::bitfieldExtract<T, is_signed<T>::value, is_integral<T>::value>::__call(val,offsetBits,numBits); | |
} | |
template<typename T> | |
T bitfieldInsert(T base, T insert, uint32_t offset, uint32_t bits) | |
{ | |
return spirv::bitFieldInsert<T>(base, insert, offset, bits); | |
} | |
template<typename T> | |
T bitfieldReverse(T value) | |
{ | |
return spirv::bitFieldReverse<T>(value); | |
} |
So maybe I can remove this one? But then there's no bitfieldOverwrite
and I don't know if they do the same thing as the GLSL version.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Przemog1 is responsible here, but in general:
There's should be the SPIR-V intrinsic in spirv
namespace which provides this GLSL builtin to HLSL.
Then there should be an implementation in C++ which does the same thing, its very likely that glm
already has one.
And finally a nbl::hlsl::
namespace function which calls the SPIR-V or C++ depending on the language __HLSL_VERSION
do not remove anything, coordinate with Przemek.
namespace impl | ||
{ | ||
template<typename T, uint16_t M, uint16_t N, uint16_t P> | ||
struct applyChainRule4D | ||
{ | ||
static matrix<T, P, M> __call(matrix<T, N, M> dFdG, matrix<T, P, N> dGdR) | ||
{ | ||
#ifdef __HLSL_VERSION | ||
return mul(dFdG, dGdR); | ||
#else | ||
return dFdG * dGdR; // glm | ||
#endif | ||
} | ||
}; | ||
|
||
template<typename T, uint16_t M, uint16_t N> | ||
struct applyChainRule3D : applyChainRule4D<T,M,N,1> | ||
{ | ||
static vector<T, N> __call(matrix<T, N, M> dFdG, vector<T, N> dGdR) | ||
{ | ||
#ifdef __HLSL_VERSION | ||
return mul(dFdG, dGdR); | ||
#else | ||
return dFdG * dGdR; // glm | ||
#endif | ||
} | ||
}; | ||
|
||
template<typename T, uint16_t M> | ||
struct applyChainRule2D : applyChainRule4D<T,M,1,1> | ||
{ | ||
static vector<T, M> __call(vector<T, M> dFdG, T dGdR) | ||
{ | ||
#ifdef __HLSL_VERSION | ||
return mul(dFdG, dGdR); | ||
#else | ||
return dFdG * dGdR; // glm | ||
#endif | ||
} | ||
}; | ||
|
||
template<typename T> | ||
struct applyChainRule1D : applyChainRule4D<T,1,1,1> | ||
{ | ||
static T __call(T dFdG, T dGdR) | ||
{ | ||
return dFdG * dGdR; | ||
} | ||
}; | ||
} | ||
|
||
// possible to derive M,N,P automatically? | ||
template<typename T, uint16_t M, uint16_t N, uint16_t P NBL_FUNC_REQUIRES(is_scalar_v<T> && M>1 && N>1 && P>1) | ||
matrix<T, P, M> applyChainRule(matrix<T, N, M> dFdG, matrix<T, P, N> dGdR) | ||
{ | ||
return impl::applyChainRule4D<T,M,N,P>::__call(dFdG, dGdR); | ||
} | ||
|
||
template<typename T, uint16_t M, uint16_t N NBL_FUNC_REQUIRES(is_scalar_v<T> && M>1 && N>1) | ||
vector<T, N> applyChainRule(matrix<T, N, M> dFdG, vector<T, N> dGdR) | ||
{ | ||
return impl::applyChainRule3D<T,M,N>::__call(dFdG, dGdR); | ||
} | ||
|
||
template<typename T, uint16_t M NBL_FUNC_REQUIRES(is_scalar_v<T> && M>1) | ||
vector<T, M> applyChainRule(vector<T, M> dFdG, T dGdR) | ||
{ | ||
return impl::applyChainRule2D<T,M>::__call(dFdG, dGdR); | ||
} | ||
|
||
template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T>) | ||
T applyChainRule(T dFdG, T dGdR) | ||
{ | ||
return impl::applyChainRule1D<T>::__call(dFdG, dGdR); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought this wa doable through just a simple
template<typename T, uint16_t M, uint16_t N, uint16_t P NBL_FUNC_REQUIRES(is_scalar_v<T> && M>1 && N>1 && P>1)
matrix<T,M,P> applyChainRule(matrix<T,N,M> dFdG, matrix<T,M,P> dGdR)
{
return mul(dFdG,dGdR);
}
which just call mul
remember GLSL was silly and had NxM on the matrix names backwards as MxN
Also in HLSL you can have 1x or x1 matrix types, in GLSL you couldn't so you can remove the overloads with vector
and the >1
dimension requirements
https://godbolt.org/z/qoTj5xPMG
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GLM uses *
for matrix multiplications, doesn't it?
Could change to use mul
in #804
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also don't need partial spec?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GLM uses
*
for matrix multiplications, doesn't it? Could change to usemul
in #804
we have our own mul
now, for both C++ and HLSL
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also don't need partial spec?
There's no custom/specialized behaviour to enable, the only thing we could do is maybe allow a different Matrix
type.
But that can be noted as a TODO in a comment.
// TOOD: is this doing what it should be? | ||
template<typename T> | ||
struct lp_norm<T,1,false> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes its correct but you'll never hit it because 1 is Odd, wrong boolean value
{ | ||
static scalar_type_t<T> __sum(const T v) | ||
{ | ||
return dot<T>(v, v); // TODO: wait for overloaded dot? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use the nabla templated dot and we're good
|
||
static scalar_type_t<T> __call(const T v) | ||
{ | ||
return sqrt<T>(__sum(v)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
wrong template param to sqrt
should be the scalar_type instead of T
template<typename T, uint32_t LP, bool Odd=(LP&0x1)> | ||
struct lp_norm; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
to general nitpicks:
- use the
functional.hlsl
array getters instead ofoperator[]
- use the
vector_traits
to establish thescalar_type_t
because the one fromtype_traits
is only a trait for native vectors
template <typename T NBL_FUNC_REQUIRES(is_scalar_v<T>) | ||
vector<T, 3> reflect(vector<T, 3> I, vector<T, 3> N, T NdotI) | ||
{ | ||
return N * 2.0f * NdotI - I; | ||
} | ||
|
||
template <typename T NBL_FUNC_REQUIRES(is_scalar_v<T>) | ||
vector<T, 3> reflect(vector<T, 3> I, vector<T, 3> N) | ||
{ | ||
T NdotI = dot<T>(N, I); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
- your
dot
wont compile, because itsdot<VectorT>
notdot<ScalarT>
- I'd probably require
/*Vectorial<T> && */vector_traits<T>::Dimension==3
instead of takingT
and slapping it in avector
(perf note, vector_traits existing is same as checking Vectorial but will make Clang/DXC choke a lot less)
namespace impl | ||
{ | ||
template<typename T> | ||
struct orientedEtas; | ||
|
||
template<> | ||
struct orientedEtas<float> | ||
{ | ||
static bool __call(NBL_REF_ARG(float) orientedEta, NBL_REF_ARG(float) rcpOrientedEta, float NdotI, float eta) | ||
{ | ||
const bool backside = NdotI < 0.0; | ||
const float rcpEta = 1.0 / eta; | ||
orientedEta = backside ? rcpEta : eta; | ||
rcpOrientedEta = backside ? eta : rcpEta; | ||
return backside; | ||
} | ||
}; | ||
|
||
template<> | ||
struct orientedEtas<float32_t3> | ||
{ | ||
static bool __call(NBL_REF_ARG(float32_t3) orientedEta, NBL_REF_ARG(float32_t3) rcpOrientedEta, float NdotI, float32_t3 eta) | ||
{ | ||
const bool backside = NdotI < 0.0; | ||
const float32_t3 rcpEta = (float32_t3)1.0 / eta; | ||
orientedEta = backside ? rcpEta:eta; | ||
rcpOrientedEta = backside ? eta:rcpEta; | ||
return backside; | ||
} | ||
}; | ||
} | ||
|
||
template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T> || is_vector_v<T>) | ||
bool getOrientedEtas(NBL_REF_ARG(T) orientedEta, NBL_REF_ARG(T) rcpOrientedEta, scalar_type_t<T> NdotI, T eta) | ||
{ | ||
return impl::orientedEtas<T>::__call(orientedEta, rcpOrientedEta, NdotI, eta); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this stuff belongs in bxdf/common
or even bxdf/fresnel
CC: @keptsecret
} | ||
|
||
template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T>) | ||
vector<T,3> refract(vector<T,3> I, vector<T,3> N, bool backside, T NdotI, T NdotI2, T rcpOrientedEta, T rcpOrientedEta2) | ||
{ | ||
impl::refract<T> r = impl::refract<T>::create(I, N, backside, NdotI, NdotI2, rcpOrientedEta, rcpOrientedEta2); | ||
return r.doRefract(); | ||
} | ||
|
||
template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T>) | ||
vector<T,3> refract(vector<T,3> I, vector<T,3> N, T NdotI, T eta) | ||
{ | ||
impl::refract<T> r = impl::refract<T>::create(I, N, NdotI, eta); | ||
return r.doRefract(); | ||
} | ||
|
||
template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T>) | ||
vector<T,3> refract(vector<T,3> I, vector<T,3> N, T eta) | ||
{ | ||
impl::refract<T> r = impl::refract<T>::create(I, N, eta); | ||
return r.doRefract(); | ||
} | ||
|
||
template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T>) | ||
T reflectRefract_computeNdotT(bool backside, T NdotI2, T rcpOrientedEta2) | ||
{ | ||
impl::refract<T> r; | ||
r.NdotI2 = NdotI2; | ||
r.rcpOrientedEta2 = rcpOrientedEta2; | ||
r.backside = backside; | ||
return r.computeNdotT(); | ||
} | ||
|
||
template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T>) | ||
vector<T,3> reflectRefract_impl(bool _refract, vector<T,3> _I, vector<T,3> _N, T _NdotI, T _NdotTorR, T _rcpOrientedEta) | ||
{ | ||
return impl::refract<T>::doReflectRefract(_refract, _I, _N, _NdotI, _NdotTorR, _rcpOrientedEta); | ||
} | ||
|
||
template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T>) | ||
vector<T,3> reflectRefract(bool _refract, vector<T,3> I, vector<T,3> N, bool backside, T NdotI, T NdotI2, T rcpOrientedEta, T rcpOrientedEta2) | ||
{ | ||
impl::refract<T> r = impl::refract<T>::create(I, N, backside, NdotI, NdotI2, rcpOrientedEta, rcpOrientedEta2); | ||
return r.doReflectRefract(_refract); | ||
} | ||
|
||
template<typename T NBL_FUNC_REQUIRES(is_scalar_v<T>) | ||
vector<T,3> reflectRefract(bool _refract, vector<T,3> I, vector<T,3> N, T NdotI, T eta) | ||
{ | ||
impl::refract<T> r = impl::refract<T>::create(I, N, NdotI, eta); | ||
return r.doReflectRefract(_refract); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remove them all and take the struct out of the impl
namespace, the only reason (as I've explained to @keptsecret ) so many overloads existed is because we didn't have structs & methods in HLSL.
Now that we have, we will oonly use structs for complex things like this.
Also this is fresnel related stuff and should be somewhere in bxdf/fresnel
not regular common math
T rcpOrientedEta; | ||
T rcpOrientedEta2; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
stuff that deals with refractive indices definitely belongs in bxdf/fresnel
{ | ||
c = cos<T>(theta); | ||
s = sqrt<T>(1.0-c*c); | ||
s = (theta < 0.0) ? -s : s; // TODO: test with XOR |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
make that signflip
or negate
funuction that takes a bool, and then use it here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
there is flipSign
function in the hlsl/ieee754.hlsl
file
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
there is
flipSign
function in thehlsl/ieee754.hlsl
file
the flipSign
needs a bool arg to tell whether to flip or not
|
||
|
||
// valid only for `theta` in [-PI,PI] | ||
template <typename T NBL_FUNC_REQUIRES(is_scalar_v<T>) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you should change most is_scalar_v
into a bool concept that matches float scalars and also allows emulated_float64_t
Also scalar
matches integers and booleans, on which sin and cos don't make sense
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've created such concept already
https://github.com/Devsh-Graphics-Programming/Nabla/blob/maths_for_bxdfs_hlsl/include/nbl/builtin/hlsl/concepts/core.hlsl#L103-L105
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
however name of this function will be changed to with next commit FloatingPointLikeScalar
template <typename T NBL_FUNC_REQUIRES(is_scalar_v<T>) | ||
matrix<T, 3, 2> frisvad(vector<T, 3> n) // TODO: confirm dimensions of matrix |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
frankly I'd make it void frisvad(const T normal, NBL_REF_ARG(T) tangent, NBL_REF_ARG(T) bitangent)
requiring that vector_traits<T>::Dimension==3
the reason is that this way its more semantically sound, instead of trying to scratch your head what row/column of the matrix is what (in the original GLSL, the first column - the first vector you get with operator[]
or provide with the constructor was the tangent)
We had this conversation with @keptsecret on his thread and PR
Also https://github.com/Devsh-Graphics-Programming/Nabla/pull/803/files#r1904982026
return (n.z < -0.9999999) ? matrix<T, 2, 3>(vector<T, 3>(0.0,-1.0,0.0), vector<T, 3>(-1.0,0.0,0.0)) : | ||
matrix<T, 2, 3>(vector<T, 3>(1.0-n.x*n.x*a, b, -n.x), vector<T, 3>(b, 1.0-n.y*n.y*a, -n.y)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
also use an if instead of ?
we get no perf diff, but much better legibility (also the n.z
condition can be moved up)
// @ return abs(x) if cond==true, max(x,0.0) otherwise | ||
template <typename T NBL_FUNC_REQUIRES(is_scalar_v<T> || is_vector_v<T>) | ||
T conditionalAbsOrMax(bool cond, T x, T limit); | ||
|
||
template <> | ||
float conditionalAbsOrMax<float>(bool cond, float x, float limit) | ||
{ | ||
const float condAbs = asfloat(asuint(x) & uint(cond ? 0x7fFFffFFu : 0xffFFffFFu)); | ||
return max(condAbs,limit); | ||
} | ||
|
||
template <> | ||
float32_t2 conditionalAbsOrMax<float32_t2>(bool cond, float32_t2 x, float32_t2 limit) | ||
{ | ||
const float32_t2 condAbs = asfloat(asuint(x) & select(cond, (uint32_t2)0x7fFFffFFu, (uint32_t2)0xffFFffFFu)); | ||
return max(condAbs,limit); | ||
} | ||
|
||
template <> | ||
float32_t3 conditionalAbsOrMax<float32_t3>(bool cond, float32_t3 x, float32_t3 limit) | ||
{ | ||
const float32_t3 condAbs = asfloat(asuint(x) & select(cond, (uint32_t3)0x7fFFffFFu, (uint32_t3)0xffFFffFFu)); | ||
return max(condAbs,limit); | ||
} | ||
|
||
template <> | ||
float32_t4 conditionalAbsOrMax<float32_t4>(bool cond, float32_t4 x, float32_t4 limit) | ||
{ | ||
const float32_t4 condAbs = asfloat(asuint(x) & select(cond, (uint32_t4)0x7fFFffFFu, (uint32_t4)0xffFFffFFu)); | ||
return max(condAbs,limit); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Require that T
is floating point (matching emulated_float too), leave signed integer impl as a TODO
because the condAbs
is not constexpr, you can use our nbl::hlsl::bit_cast
you can use nbl::hlsl::promote
to turn a single scalar into a vector.
Also use mix
instead of select
then it should all work cross C++ and HLSL
template<typename T, uint32_t LP NBL_FUNC_REQUIRES(LP>0) | ||
scalar_type_t<T> lpNormPreroot(NBL_CONST_REF_ARG(T) v) | ||
{ | ||
return impl::lp_norm<T,LP>::__sum(v); | ||
} | ||
|
||
template<typename T, uint32_t LP> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you probably want to require that vector_traits<T>::scalar_type
is a float (including emulated float)
matrix<T, 3, 2> frisvad(vector<T, 3> n) // TODO: confirm dimensions of matrix | ||
matrix<T, 2, 3> frisvad(vector<T, 3> n) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
dont use matrix
its semantically confusing
const AsUint toAsUint = ieee754::impl::bitCastToUintType(to); | ||
const AsUint fromAsUint = ieee754::impl::bitCastToUintType(from); | ||
|
||
return bit_cast<FloatingPoint>(toAsUint | extractSignPreserveBitPattern(from)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this does not copy sign, because you need to clear the sign bit on toAsUint
first
right now it will make to
negative if any of the inputs are negative
btw are you sure you want to force that FloatingPoint
is non-emulated and scalar? It could work on any float even emulated 64
} | ||
|
||
template <typename FloatingPoint NBL_FUNC_REQUIRES(hlsl::is_floating_point_v<FloatingPoint>&& hlsl::is_scalar_v<FloatingPoint>) | ||
NBL_CONSTEXPR_INLINE_FUNC FloatingPoint flipSign(FloatingPoint val) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bool indicating whether to flip would be super useful actually and it would always get optimized away if const
btw are you sure you want to force that FloatingPoint is non-emulated and scalar? It could work on any float even emulated 64
#ifndef __HLSL_VERSION | ||
#include <bitset> | ||
#endif |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is this needed here?
template<typename FloatingPoint> | ||
[[vk::ext_instruction(GLSLstd450::GLSLstd450Pow, "GLSL.std.450")]] | ||
enable_if_t<is_floating_point<FloatingPoint>::value && !is_matrix_v<FloatingPoint>, FloatingPoint> pow(FloatingPoint lhs, FloatingPoint rhs); | ||
|
||
template<typename FloatingPoint> | ||
[[vk::ext_instruction(GLSLstd450::GLSLstd450Exp, "GLSL.std.450")]] | ||
enable_if_t<is_floating_point<FloatingPoint>::value && !is_matrix_v<FloatingPoint>, FloatingPoint> exp(FloatingPoint val); | ||
|
||
template<typename FloatingPoint> | ||
[[vk::ext_instruction(GLSLstd450::GLSLstd450Exp2, "GLSL.std.450")]] | ||
enable_if_t<is_floating_point<FloatingPoint>::value && !is_matrix_v<FloatingPoint>, FloatingPoint> exp2(FloatingPoint val); | ||
|
||
template<typename FloatingPoint> | ||
[[vk::ext_instruction(GLSLstd450::GLSLstd450Log, "GLSL.std.450")]] | ||
enable_if_t<is_floating_point<FloatingPoint>::value && !is_matrix_v<FloatingPoint>, FloatingPoint> log(FloatingPoint val); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pow, exp and log don't take float64 without an extension
also as per our discord conversation, we need 5 named concepts in spirv
namespace so the enable_if_t
are clearer and easier to compare with SPIR-V spec
template<typename FloatingPoint NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<FloatingPoint>) | ||
inline FloatingPoint erf(FloatingPoint x) | ||
{ | ||
return tgmath_impl::erf_helper<FloatingPoint>::__call(x); | ||
} | ||
|
||
template<typename FloatingPoint NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<FloatingPoint>) | ||
inline FloatingPoint erfInv(FloatingPoint x) | ||
{ | ||
return tgmath_impl::erfInv_helper<FloatingPoint>::__call(x); | ||
} | ||
|
||
template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<T> || concepts::Vectorial<T>) | ||
inline T floor(NBL_CONST_REF_ARG(T) val) | ||
{ | ||
return tgmath_impl::floor_helper<T>::__call(val); | ||
} | ||
|
||
template<typename T, typename U NBL_FUNC_REQUIRES((concepts::FloatingPointLikeScalar<T> || concepts::FloatingPointLikeVectorial<T>) && (concepts::FloatingPointLikeScalar<U> || is_same_v<U, bool>)) | ||
inline T lerp(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a) | ||
{ | ||
return tgmath_impl::lerp_helper<T, U>::__call(x, y, a); | ||
} | ||
|
||
template<typename FloatingPoint NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<FloatingPoint>) | ||
inline bool isnan(NBL_CONST_REF_ARG(FloatingPoint) val) | ||
{ | ||
return tgmath_impl::isnan_helper<FloatingPoint>::__call(val); | ||
} | ||
|
||
template<typename V NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<V>) | ||
inline vector<bool, vector_traits<V>::Dimension> isnan(NBL_CONST_REF_ARG(V) val) | ||
{ | ||
return tgmath_impl::isnan_helper<V>::__call(val); | ||
} | ||
|
||
template<typename FloatingPoint NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<FloatingPoint>) | ||
inline FloatingPoint isinf(NBL_CONST_REF_ARG(FloatingPoint) val) | ||
{ | ||
return tgmath_impl::isinf_helper<FloatingPoint>::__call(val); | ||
} | ||
|
||
template<typename V NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<V>) | ||
inline vector<bool, vector_traits<V>::Dimension> isinf(NBL_CONST_REF_ARG(V) val) | ||
{ | ||
return tgmath_impl::isinf_helper<V>::__call(val); | ||
} | ||
|
||
template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<T> || concepts::FloatingPointLikeVectorial<T>) | ||
inline T pow(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y) | ||
{ | ||
return tgmath_impl::pow_helper<T>::__call(x, y); | ||
} | ||
|
||
template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<T> || concepts::FloatingPointLikeVectorial<T>) | ||
inline T exp(NBL_CONST_REF_ARG(T) x) | ||
{ | ||
return tgmath_impl::exp_helper<T>::__call(x); | ||
} | ||
|
||
|
||
template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<T> || concepts::Vectorial<T>) | ||
inline T exp2(NBL_CONST_REF_ARG(T) x) | ||
{ | ||
return tgmath_impl::exp2_helper<T>::__call(x); | ||
} | ||
|
||
template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<T> || concepts::FloatingPointLikeVectorial<T>) | ||
inline T log(NBL_CONST_REF_ARG(T) x) | ||
{ | ||
return tgmath_impl::log_helper<T>::__call(x); | ||
} | ||
|
||
template<typename T NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<T> || concepts::SignedIntegral<T> || concepts::FloatingPointLikeVectorial<T> || concepts::SignedIntVectorial<T>) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe don't put any constraints on the functions, and let the helpers do all the error reporting (no point doing it twice) as long as each _helper
declares NBL_STRUCT_CONSTRAINABLE
in the EMPTY forward declaration of the primary template
inline bool isnan(NBL_CONST_REF_ARG(FloatingPoint) val) | ||
{ | ||
return tgmath_impl::isnan_helper<FloatingPoint>::__call(val); | ||
} | ||
|
||
template<typename V NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<V>) | ||
inline vector<bool, vector_traits<V>::Dimension> isnan(NBL_CONST_REF_ARG(V) val) | ||
{ | ||
return tgmath_impl::isnan_helper<V>::__call(val); | ||
} | ||
|
||
template<typename FloatingPoint NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<FloatingPoint>) | ||
inline FloatingPoint isinf(NBL_CONST_REF_ARG(FloatingPoint) val) | ||
{ | ||
return tgmath_impl::isinf_helper<FloatingPoint>::__call(val); | ||
} | ||
|
||
template<typename V NBL_FUNC_REQUIRES(concepts::FloatingPointLikeVectorial<V>) | ||
inline vector<bool, vector_traits<V>::Dimension> isinf(NBL_CONST_REF_ARG(V) val) | ||
{ | ||
return tgmath_impl::isinf_helper<V>::__call(val); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
instead of overload, maybe you just want to require that this _helper
declares a using return_t =
?
namespace nbl | ||
{ | ||
namespace hlsl | ||
{ | ||
namespace tgmath_impl | ||
{ | ||
|
||
template<typename T, typename U NBL_STRUCT_CONSTRAINABLE> | ||
struct lerp_helper; | ||
|
||
#ifdef __HLSL_VERSION | ||
#define MIX_FUNCTION spirv::fMix | ||
#else | ||
#define MIX_FUNCTION glm::mix | ||
#endif |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
as per our discord conversation, every helper should have a FIRST partial specialization done with is_same_v<decltype(spirv::...),expected_Return_type>
constraint in a #ifdef __HLSL_VERSION
block
then the C++ versions and cross-language adaptors for emulated should come
inline bool isnan_uint_impl(UnsignedInteger val) | ||
{ | ||
using AsFloat = typename float_of_size<sizeof(UnsignedInteger)>::type; | ||
return bool((ieee754::extractBiasedExponent<UnsignedInteger>(val) == ieee754::traits<AsFloat>::specialValueExp) && (val & ieee754::traits<AsFloat>::mantissaMask)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
there's a faster check:
- AND with
(numeric_limits<UnsignedInteger>::max>>1)
- clear the sign mask - check for
> (specialValueExp<<mantissaBits)
template<typename FloatingPoint> | ||
NBL_PARTIAL_REQ_TOP(concepts::FloatingPointScalar<FloatingPoint>) | ||
struct isnan_helper<FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPointScalar<FloatingPoint>) > | ||
{ | ||
static bool __call(NBL_CONST_REF_ARG(FloatingPoint) x) | ||
{ | ||
#ifdef __HLSL_VERSION | ||
return spirv::isNan<FloatingPoint>(x); | ||
#else | ||
// GCC and Clang will always return false with call to std::isnan when fast math is enabled, | ||
// this implementation will always return appropriate output regardless is fas math is enabled or not | ||
using AsUint = typename unsigned_integer_of_size<sizeof(FloatingPoint)>::type; | ||
return tgmath_impl::isnan_uint_impl(reinterpret_cast<const AsUint&>(x)); | ||
#endif | ||
} | ||
}; | ||
|
||
template<typename V> | ||
NBL_PARTIAL_REQ_TOP(concepts::Vectorial<V>) | ||
struct isnan_helper<V NBL_PARTIAL_REQ_BOT(concepts::Vectorial<V>) > | ||
{ | ||
using output_t = vector<bool, hlsl::vector_traits<V>::Dimension>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
as per the discord convom, its best if there's a whole separate SPIR-V specialization which is not predicated on a concept but if the SPIR-V intrinsic is callable/compiles with that type
and the C++ specialization can be just for a scalar.
So you have 2 specializations in two different parts of __HLSL_VERSION
block, instead of one that tries to reconcile incompatible concepts
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ideally I'd keep the functions without SPIR-V intrinsic equivalents in a separate file or near the bottom of this one
// ERF | ||
|
||
template<typename T NBL_STRUCT_CONSTRAINABLE> | ||
struct erf_helper; | ||
|
||
template<typename FloatingPoint> | ||
NBL_PARTIAL_REQ_TOP(concepts::FloatingPointScalar<FloatingPoint>) | ||
struct erf_helper<FloatingPoint NBL_PARTIAL_REQ_BOT(concepts::FloatingPointScalar<FloatingPoint>) > |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the non-spirv forwarding functions can be done however you like, most sane is probably to do the per-channel broadcast specs as macros for quick partial spec
the Scalar constraint you have here is fine, but not for the potentially SPIR-V vector perfect forwarding ones above
const FloatingPoint a1 = 0.254829592; | ||
const FloatingPoint a2 = -0.284496736; | ||
const FloatingPoint a3 = 1.421413741; | ||
const FloatingPoint a4 = -1.453152027; | ||
const FloatingPoint a5 = 1.061405429; | ||
const FloatingPoint p = 0.3275911; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pay attention to 2 things:
- whether the constant literal has all the digits needed (float32 and float64 often have different digits
- to get true FP64 constants in HLSL you need to use a stupid suffix, there's actually an
NBL_
macro you need to place around them cause DXC is holding back on implementing explicit sized fp16, fp32, fp64 suffices on float literals
Also, usually an implementation of a complex math function is different for float16, float32 and float64 (different levels of polynomial approximation/coefficients and number of steps/iterations) so I'd have a separate partial spec in preprocessor blocks of `__HLSL_VERSION
|
No description provided.