EXPERIMENTAL: Metal backend (#441)
This is not a continuation of the Metal backend; this is simply bringing the branch up to date and merging it as-is behind an experiment. --------- Co-authored-by: Isaac Marovitz <isaacryu@icloud.com> Co-authored-by: Samuliak <samuliak77@gmail.com> Co-authored-by: SamoZ256 <96914946+SamoZ256@users.noreply.github.com> Co-authored-by: Isaac Marovitz <42140194+IsaacMarovitz@users.noreply.github.com> Co-authored-by: riperiperi <rhy3756547@hotmail.com> Co-authored-by: Gabriel A <gab.dark.100@gmail.com>
This commit is contained in:
@@ -0,0 +1,5 @@
|
||||
template<typename T>
|
||||
inline T findLSB(T x)
|
||||
{
|
||||
return select(ctz(x), T(-1), x == T(0));
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
template<typename T>
|
||||
inline T findMSBS32(T x)
|
||||
{
|
||||
return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0));
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
template<typename T>
|
||||
inline T findMSBU32(T x)
|
||||
{
|
||||
T v = select(x, T(-1) - x, x < T(0));
|
||||
return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
namespace Ryujinx.Graphics.Shader.CodeGen.Msl
|
||||
{
|
||||
static class HelperFunctionNames
|
||||
{
|
||||
public static string FindLSB = "findLSB";
|
||||
public static string FindMSBS32 = "findMSBS32";
|
||||
public static string FindMSBU32 = "findMSBU32";
|
||||
public static string SwizzleAdd = "swizzleAdd";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
template<typename T>
|
||||
[[clang::optnone]] T PreciseFAdd(T l, T r) {
|
||||
return fma(T(1), l, r);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
[[clang::optnone]] T PreciseFSub(T l, T r) {
|
||||
return fma(T(-1), r, l);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
[[clang::optnone]] T PreciseFMul(T l, T r) {
|
||||
return fma(l, r, T(0));
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
float swizzleAdd(float x, float y, int mask, uint thread_index_in_simdgroup)
|
||||
{
|
||||
float4 xLut = float4(1.0, -1.0, 1.0, 0.0);
|
||||
float4 yLut = float4(1.0, 1.0, -1.0, 1.0);
|
||||
int lutIdx = (mask >> (int(thread_index_in_simdgroup & 3u) * 2)) & 3;
|
||||
return x * xLut[lutIdx] + y * yLut[lutIdx];
|
||||
}
|
||||
Reference in New Issue
Block a user