Implement VP9 loop filtering
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Common;
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.CompilerServices;
|
||||
@@ -75,17 +75,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Vector128<int> zero = Vector128<int>.Zero;
|
||||
Vector128<int> const64 = Vector128.Create(64);
|
||||
|
||||
ulong x, y;
|
||||
src -= SubpelTaps / 2 - 1;
|
||||
src -= (SubpelTaps / 2) - 1;
|
||||
|
||||
fixed (Array8<short>* xFilter = xFilters)
|
||||
{
|
||||
Vector128<short> vfilter = Sse2.LoadVector128((short*)xFilter + (uint)(x0Q4 & SubpelMask) * 8);
|
||||
Vector128<short> vfilter = Sse2.LoadVector128((short*)xFilter + ((uint)(x0Q4 & SubpelMask) * 8));
|
||||
|
||||
for (y = 0; y < (uint)h; ++y)
|
||||
for (ulong y = 0; y < (uint)h; ++y)
|
||||
{
|
||||
ulong srcOffset = (uint)x0Q4 >> SubpelBits;
|
||||
for (x = 0; x < (uint)w; x += 4)
|
||||
for (ulong x = 0; x < (uint)w; x += 4)
|
||||
{
|
||||
Vector128<short> vsrc0 = Sse41.ConvertToVector128Int16(&src[srcOffset + x]);
|
||||
Vector128<short> vsrc1 = Sse41.ConvertToVector128Int16(&src[srcOffset + x + 1]);
|
||||
@@ -94,8 +93,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
Vector128<int> sum0123 = MultiplyAddAdjacent(vsrc0, vsrc1, vsrc2, vsrc3, vfilter, zero);
|
||||
|
||||
Sse.StoreScalar((float*)&dst[x], PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle());
|
||||
Sse.StoreScalar((float*)&dst[x],
|
||||
PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle());
|
||||
}
|
||||
|
||||
src += srcStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
@@ -117,22 +118,20 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
if (Sse41.IsSupported && UseIntrinsics && xStepQ4 == 1 << SubpelBits)
|
||||
{
|
||||
ConvolveHorizSse41(src, srcStride, dst, dstStride, xFilters, x0Q4, w, h);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
int x, y;
|
||||
src -= SubpelTaps / 2 - 1;
|
||||
src -= (SubpelTaps / 2) - 1;
|
||||
|
||||
for (y = 0; y < h; ++y)
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
int xQ4 = x0Q4;
|
||||
for (x = 0; x < w; ++x)
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
byte* srcX = &src[xQ4 >> SubpelBits];
|
||||
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcX[k] * xFilter[k];
|
||||
}
|
||||
@@ -140,6 +139,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
dst[x] = BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits));
|
||||
xQ4 += xStepQ4;
|
||||
}
|
||||
|
||||
src += srcStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
@@ -156,25 +156,26 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
int x, y;
|
||||
src -= SubpelTaps / 2 - 1;
|
||||
src -= (SubpelTaps / 2) - 1;
|
||||
|
||||
for (y = 0; y < h; ++y)
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
int xQ4 = x0Q4;
|
||||
for (x = 0; x < w; ++x)
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
byte* srcX = &src[xQ4 >> SubpelBits];
|
||||
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcX[k] * xFilter[k];
|
||||
}
|
||||
|
||||
dst[x] = (byte)BitUtils.RoundPowerOfTwo(dst[x] + BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits)), 1);
|
||||
dst[x] = (byte)BitUtils.RoundPowerOfTwo(
|
||||
dst[x] + BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits)), 1);
|
||||
xQ4 += xStepQ4;
|
||||
}
|
||||
|
||||
src += srcStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
@@ -203,18 +204,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
srcStride * 6,
|
||||
srcStride * 7);
|
||||
|
||||
ulong x, y;
|
||||
src -= srcStride * (SubpelTaps / 2 - 1);
|
||||
src -= srcStride * ((SubpelTaps / 2) - 1);
|
||||
|
||||
fixed (Array8<short>* yFilter = yFilters)
|
||||
{
|
||||
Vector128<short> vfilter = Sse2.LoadVector128((short*)yFilter + (uint)(y0Q4 & SubpelMask) * 8);
|
||||
Vector128<short> vfilter = Sse2.LoadVector128((short*)yFilter + ((uint)(y0Q4 & SubpelMask) * 8));
|
||||
|
||||
ulong srcBaseY = (uint)y0Q4 >> SubpelBits;
|
||||
for (y = 0; y < (uint)h; ++y)
|
||||
for (ulong y = 0; y < (uint)h; ++y)
|
||||
{
|
||||
ulong srcOffset = (srcBaseY + y) * (uint)srcStride;
|
||||
for (x = 0; x < (uint)w; x += 4)
|
||||
for (ulong x = 0; x < (uint)w; x += 4)
|
||||
{
|
||||
Vector256<int> vsrc = Avx2.GatherVector256((uint*)&src[srcOffset + x], indices, 1).AsInt32();
|
||||
|
||||
@@ -240,8 +240,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
Vector128<int> sum0123 = MultiplyAddAdjacent(vsrc0, vsrc1, vsrc2, vsrc3, vfilter, zero);
|
||||
|
||||
Sse.StoreScalar((float*)&dst[x], PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle());
|
||||
Sse.StoreScalar((float*)&dst[x],
|
||||
PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle());
|
||||
}
|
||||
|
||||
dst += dstStride;
|
||||
}
|
||||
}
|
||||
@@ -262,22 +264,20 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
if (Avx2.IsSupported && UseIntrinsics && yStepQ4 == 1 << SubpelBits)
|
||||
{
|
||||
ConvolveVertAvx2(src, srcStride, dst, dstStride, yFilters, y0Q4, w, h);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
int x, y;
|
||||
src -= srcStride * (SubpelTaps / 2 - 1);
|
||||
src -= srcStride * ((SubpelTaps / 2) - 1);
|
||||
|
||||
for (x = 0; x < w; ++x)
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
int yQ4 = y0Q4;
|
||||
for (y = 0; y < h; ++y)
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
byte* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
|
||||
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcY[k * srcStride] * yFilter[k];
|
||||
}
|
||||
@@ -285,6 +285,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
dst[y * dstStride] = BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits));
|
||||
yQ4 += yStepQ4;
|
||||
}
|
||||
|
||||
++src;
|
||||
++dst;
|
||||
}
|
||||
@@ -301,18 +302,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
int x, y;
|
||||
src -= srcStride * (SubpelTaps / 2 - 1);
|
||||
src -= srcStride * ((SubpelTaps / 2) - 1);
|
||||
|
||||
for (x = 0; x < w; ++x)
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
int yQ4 = y0Q4;
|
||||
for (y = 0; y < h; ++y)
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
byte* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
|
||||
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcY[k * srcStride] * yFilter[k];
|
||||
}
|
||||
@@ -321,6 +321,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
dst[y * dstStride] + BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits)), 1);
|
||||
yQ4 += yStepQ4;
|
||||
}
|
||||
|
||||
++src;
|
||||
++dst;
|
||||
}
|
||||
@@ -420,15 +421,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
// ==> yStepQ4 = 64. Since w and h are at most 16, the temp buffer is still
|
||||
// big enough.
|
||||
byte* temp = stackalloc byte[64 * 135];
|
||||
int intermediateHeight = (((h - 1) * yStepQ4 + y0Q4) >> SubpelBits) + SubpelTaps;
|
||||
int intermediateHeight = ((((h - 1) * yStepQ4) + y0Q4) >> SubpelBits) + SubpelTaps;
|
||||
|
||||
Debug.Assert(w <= 64);
|
||||
Debug.Assert(h <= 64);
|
||||
Debug.Assert(yStepQ4 <= 32 || (yStepQ4 <= 64 && h <= 32));
|
||||
Debug.Assert(xStepQ4 <= 64);
|
||||
|
||||
ConvolveHoriz(src - srcStride * (SubpelTaps / 2 - 1), srcStride, temp, 64, filter, x0Q4, xStepQ4, w, intermediateHeight);
|
||||
ConvolveVert(temp + 64 * (SubpelTaps / 2 - 1), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h);
|
||||
ConvolveHoriz(src - (srcStride * ((SubpelTaps / 2) - 1)), srcStride, temp, 64, filter, x0Q4, xStepQ4, w,
|
||||
intermediateHeight);
|
||||
ConvolveVert(temp + (64 * ((SubpelTaps / 2) - 1)), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h);
|
||||
}
|
||||
|
||||
public static unsafe void Convolve8Avg(
|
||||
@@ -489,11 +491,9 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
int x, y;
|
||||
|
||||
for (y = 0; y < h; ++y)
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
for (x = 0; x < w; ++x)
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
dst[x] = (byte)BitUtils.RoundPowerOfTwo(dst[x] + src[x], 1);
|
||||
}
|
||||
@@ -611,18 +611,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
int x, y;
|
||||
src -= SubpelTaps / 2 - 1;
|
||||
src -= (SubpelTaps / 2) - 1;
|
||||
|
||||
for (y = 0; y < h; ++y)
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
int xQ4 = x0Q4;
|
||||
for (x = 0; x < w; ++x)
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
ushort* srcX = &src[xQ4 >> SubpelBits];
|
||||
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcX[k] * xFilter[k];
|
||||
}
|
||||
@@ -630,6 +629,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
dst[x] = BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd);
|
||||
xQ4 += xStepQ4;
|
||||
}
|
||||
|
||||
src += srcStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
@@ -647,25 +647,26 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
int x, y;
|
||||
src -= SubpelTaps / 2 - 1;
|
||||
src -= (SubpelTaps / 2) - 1;
|
||||
|
||||
for (y = 0; y < h; ++y)
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
int xQ4 = x0Q4;
|
||||
for (x = 0; x < w; ++x)
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
ushort* srcX = &src[xQ4 >> SubpelBits];
|
||||
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcX[k] * xFilter[k];
|
||||
}
|
||||
|
||||
dst[x] = (ushort)BitUtils.RoundPowerOfTwo(dst[x] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd), 1);
|
||||
dst[x] = (ushort)BitUtils.RoundPowerOfTwo(
|
||||
dst[x] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd), 1);
|
||||
xQ4 += xStepQ4;
|
||||
}
|
||||
|
||||
src += srcStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
@@ -683,18 +684,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
int x, y;
|
||||
src -= srcStride * (SubpelTaps / 2 - 1);
|
||||
src -= srcStride * ((SubpelTaps / 2) - 1);
|
||||
|
||||
for (x = 0; x < w; ++x)
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
int yQ4 = y0Q4;
|
||||
for (y = 0; y < h; ++y)
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
ushort* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
|
||||
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcY[k * srcStride] * yFilter[k];
|
||||
}
|
||||
@@ -702,6 +702,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
dst[y * dstStride] = BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd);
|
||||
yQ4 += yStepQ4;
|
||||
}
|
||||
|
||||
++src;
|
||||
++dst;
|
||||
}
|
||||
@@ -719,26 +720,27 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
int x, y;
|
||||
src -= srcStride * (SubpelTaps / 2 - 1);
|
||||
src -= srcStride * ((SubpelTaps / 2) - 1);
|
||||
|
||||
for (x = 0; x < w; ++x)
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
int yQ4 = y0Q4;
|
||||
for (y = 0; y < h; ++y)
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
ushort* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
|
||||
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcY[k * srcStride] * yFilter[k];
|
||||
}
|
||||
|
||||
dst[y * dstStride] = (ushort)BitUtils.RoundPowerOfTwo(
|
||||
dst[y * dstStride] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd), 1);
|
||||
dst[y * dstStride] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd),
|
||||
1);
|
||||
yQ4 += yStepQ4;
|
||||
}
|
||||
|
||||
++src;
|
||||
++dst;
|
||||
}
|
||||
@@ -771,15 +773,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
// --Require an additional SubpelTaps rows for the 8-tap filter tails.
|
||||
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
|
||||
ushort* temp = stackalloc ushort[64 * 135];
|
||||
int intermediateHeight = (((h - 1) * yStepQ4 + y0Q4) >> SubpelBits) + SubpelTaps;
|
||||
int intermediateHeight = ((((h - 1) * yStepQ4) + y0Q4) >> SubpelBits) + SubpelTaps;
|
||||
|
||||
Debug.Assert(w <= 64);
|
||||
Debug.Assert(h <= 64);
|
||||
Debug.Assert(yStepQ4 <= 32);
|
||||
Debug.Assert(xStepQ4 <= 32);
|
||||
|
||||
HighbdConvolveHoriz(src - srcStride * (SubpelTaps / 2 - 1), srcStride, temp, 64, filter, x0Q4, xStepQ4, w, intermediateHeight, bd);
|
||||
HighbdConvolveVert(temp + 64 * (SubpelTaps / 2 - 1), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h, bd);
|
||||
HighbdConvolveHoriz(src - (srcStride * ((SubpelTaps / 2) - 1)), srcStride, temp, 64, filter, x0Q4, xStepQ4,
|
||||
w, intermediateHeight, bd);
|
||||
HighbdConvolveVert(temp + (64 * ((SubpelTaps / 2) - 1)), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h,
|
||||
bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdConvolve8Horiz(
|
||||
@@ -928,11 +932,9 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
int x, y;
|
||||
|
||||
for (y = 0; y < h; ++y)
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
for (x = 0; x < w; ++x)
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
dst[x] = (ushort)BitUtils.RoundPowerOfTwo(dst[x] + src[x], 1);
|
||||
}
|
||||
@@ -942,4 +944,4 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -9,4 +9,4 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
public const int SubpelShifts = 1 << SubpelBits;
|
||||
public const int SubpelTaps = 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Common;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Common;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
@@ -6,22 +6,22 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
private static unsafe ref byte Dst(byte* dst, int stride, int x, int y)
|
||||
{
|
||||
return ref dst[x + y * stride];
|
||||
return ref dst[x + (y * stride)];
|
||||
}
|
||||
|
||||
private static unsafe ref ushort Dst(ushort* dst, int stride, int x, int y)
|
||||
{
|
||||
return ref dst[x + y * stride];
|
||||
return ref dst[x + (y * stride)];
|
||||
}
|
||||
|
||||
private static byte Avg3(byte a, byte b, byte c)
|
||||
{
|
||||
return (byte)((a + 2 * b + c + 2) >> 2);
|
||||
return (byte)((a + (2 * b) + c + 2) >> 2);
|
||||
}
|
||||
|
||||
private static ushort Avg3(ushort a, ushort b, ushort c)
|
||||
{
|
||||
return (ushort)((a + 2 * b + c + 2) >> 2);
|
||||
return (ushort)((a + (2 * b) + c + 2) >> 2);
|
||||
}
|
||||
|
||||
private static byte Avg2(byte a, byte b)
|
||||
@@ -51,9 +51,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void D207Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int r, c;
|
||||
// First column
|
||||
for (r = 0; r < bs - 1; ++r)
|
||||
for (int r = 0; r < bs - 1; ++r)
|
||||
{
|
||||
dst[r * stride] = Avg2(left[r], left[r + 1]);
|
||||
}
|
||||
@@ -62,7 +61,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
dst++;
|
||||
|
||||
// Second column
|
||||
for (r = 0; r < bs - 2; ++r)
|
||||
for (int r = 0; r < bs - 2; ++r)
|
||||
{
|
||||
dst[r * stride] = Avg3(left[r], left[r + 1], left[r + 2]);
|
||||
}
|
||||
@@ -72,16 +71,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
dst++;
|
||||
|
||||
// Rest of last row
|
||||
for (c = 0; c < bs - 2; ++c)
|
||||
for (int c = 0; c < bs - 2; ++c)
|
||||
{
|
||||
dst[(bs - 1) * stride + c] = left[bs - 1];
|
||||
dst[((bs - 1) * stride) + c] = left[bs - 1];
|
||||
}
|
||||
|
||||
for (r = bs - 2; r >= 0; --r)
|
||||
for (int r = bs - 2; r >= 0; --r)
|
||||
{
|
||||
for (c = 0; c < bs - 2; ++c)
|
||||
for (int c = 0; c < bs - 2; ++c)
|
||||
{
|
||||
dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
|
||||
dst[(r * stride) + c] = dst[((r + 1) * stride) + c - 2];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -103,19 +102,18 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void D63Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int r, c;
|
||||
int size;
|
||||
for (c = 0; c < bs; ++c)
|
||||
for (int c = 0; c < bs; ++c)
|
||||
{
|
||||
dst[c] = Avg2(above[c], above[c + 1]);
|
||||
dst[stride + c] = Avg3(above[c], above[c + 1], above[c + 2]);
|
||||
}
|
||||
for (r = 2, size = bs - 2; r < bs; r += 2, --size)
|
||||
|
||||
for (int r = 2, size = bs - 2; r < bs; r += 2, --size)
|
||||
{
|
||||
MemoryUtil.Copy(dst + (r + 0) * stride, dst + (r >> 1), size);
|
||||
MemoryUtil.Fill(dst + (r + 0) * stride + size, above[bs - 1], bs - size);
|
||||
MemoryUtil.Copy(dst + (r + 1) * stride, dst + stride + (r >> 1), size);
|
||||
MemoryUtil.Fill(dst + (r + 1) * stride + size, above[bs - 1], bs - size);
|
||||
MemoryUtil.Copy(dst + ((r + 0) * stride), dst + (r >> 1), size);
|
||||
MemoryUtil.Fill(dst + ((r + 0) * stride) + size, above[bs - 1], bs - size);
|
||||
MemoryUtil.Copy(dst + ((r + 1) * stride), dst + stride + (r >> 1), size);
|
||||
MemoryUtil.Fill(dst + ((r + 1) * stride) + size, above[bs - 1], bs - size);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -138,15 +136,15 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
byte aboveRight = above[bs - 1];
|
||||
byte* dstRow0 = dst;
|
||||
int x, size;
|
||||
|
||||
for (x = 0; x < bs - 1; ++x)
|
||||
for (int x = 0; x < bs - 1; ++x)
|
||||
{
|
||||
dst[x] = Avg3(above[x], above[x + 1], above[x + 2]);
|
||||
}
|
||||
|
||||
dst[bs - 1] = aboveRight;
|
||||
dst += stride;
|
||||
for (x = 1, size = bs - 2; x < bs; ++x, --size)
|
||||
for (int x = 1, size = bs - 2; x < bs; ++x, --size)
|
||||
{
|
||||
MemoryUtil.Copy(dst, dstRow0 + x, size);
|
||||
MemoryUtil.Fill(dst + size, aboveRight, x + 1);
|
||||
@@ -171,10 +169,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void D117Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int r, c;
|
||||
|
||||
// First row
|
||||
for (c = 0; c < bs; c++)
|
||||
for (int c = 0; c < bs; c++)
|
||||
{
|
||||
dst[c] = Avg2(above[c - 1], above[c]);
|
||||
}
|
||||
@@ -183,7 +179,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
// Second row
|
||||
dst[0] = Avg3(left[0], above[-1], above[0]);
|
||||
for (c = 1; c < bs; c++)
|
||||
for (int c = 1; c < bs; c++)
|
||||
{
|
||||
dst[c] = Avg3(above[c - 2], above[c - 1], above[c]);
|
||||
}
|
||||
@@ -192,17 +188,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
// The rest of first col
|
||||
dst[0] = Avg3(above[-1], left[0], left[1]);
|
||||
for (r = 3; r < bs; ++r)
|
||||
for (int r = 3; r < bs; ++r)
|
||||
{
|
||||
dst[(r - 2) * stride] = Avg3(left[r - 3], left[r - 2], left[r - 1]);
|
||||
}
|
||||
|
||||
// The rest of the block
|
||||
for (r = 2; r < bs; ++r)
|
||||
for (int r = 2; r < bs; ++r)
|
||||
{
|
||||
for (c = 1; c < bs; c++)
|
||||
for (int c = 1; c < bs; c++)
|
||||
{
|
||||
dst[c] = dst[-2 * stride + c - 1];
|
||||
dst[c] = dst[(-2 * stride) + c - 1];
|
||||
}
|
||||
|
||||
dst += stride;
|
||||
@@ -226,26 +222,26 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void D135Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int i;
|
||||
byte* border = stackalloc byte[32 + 32 - 1]; // outer border from bottom-left to top-right
|
||||
|
||||
// Dst(dst, stride, bs, bs - 2)[0], i.e., border starting at bottom-left
|
||||
for (i = 0; i < bs - 2; ++i)
|
||||
for (int i = 0; i < bs - 2; ++i)
|
||||
{
|
||||
border[i] = Avg3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]);
|
||||
}
|
||||
|
||||
border[bs - 2] = Avg3(above[-1], left[0], left[1]);
|
||||
border[bs - 1] = Avg3(left[0], above[-1], above[0]);
|
||||
border[bs - 0] = Avg3(above[-1], above[0], above[1]);
|
||||
// dst[0][2, size), i.e., remaining top border ascending
|
||||
for (i = 0; i < bs - 2; ++i)
|
||||
for (int i = 0; i < bs - 2; ++i)
|
||||
{
|
||||
border[bs + 1 + i] = Avg3(above[i], above[i + 1], above[i + 2]);
|
||||
}
|
||||
|
||||
for (i = 0; i < bs; ++i)
|
||||
for (int i = 0; i < bs; ++i)
|
||||
{
|
||||
MemoryUtil.Copy(dst + i * stride, border + bs - 1 - i, bs);
|
||||
MemoryUtil.Copy(dst + (i * stride), border + bs - 1 - i, bs);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -266,9 +262,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void D153Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int r, c;
|
||||
dst[0] = Avg2(above[-1], left[0]);
|
||||
for (r = 1; r < bs; r++)
|
||||
for (int r = 1; r < bs; r++)
|
||||
{
|
||||
dst[r * stride] = Avg2(left[r - 1], left[r]);
|
||||
}
|
||||
@@ -277,23 +272,23 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
dst[0] = Avg3(left[0], above[-1], above[0]);
|
||||
dst[stride] = Avg3(above[-1], left[0], left[1]);
|
||||
for (r = 2; r < bs; r++)
|
||||
for (int r = 2; r < bs; r++)
|
||||
{
|
||||
dst[r * stride] = Avg3(left[r - 2], left[r - 1], left[r]);
|
||||
}
|
||||
|
||||
dst++;
|
||||
|
||||
for (c = 0; c < bs - 2; c++)
|
||||
for (int c = 0; c < bs - 2; c++)
|
||||
{
|
||||
dst[c] = Avg3(above[c - 1], above[c], above[c + 1]);
|
||||
}
|
||||
|
||||
dst += stride;
|
||||
|
||||
for (r = 1; r < bs; ++r)
|
||||
for (int r = 1; r < bs; ++r)
|
||||
{
|
||||
for (c = 0; c < bs - 2; c++)
|
||||
for (int c = 0; c < bs - 2; c++)
|
||||
{
|
||||
dst[c] = dst[-stride + c - 2];
|
||||
}
|
||||
@@ -324,9 +319,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void VPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int r;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Copy(dst, above, bs);
|
||||
dst += stride;
|
||||
@@ -355,43 +348,40 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void HPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int r;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Fill(dst, left[r], bs);
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
|
||||
public static unsafe void TMPredictor4x4(byte* dst, int stride, byte* above, byte* left)
|
||||
public static unsafe void TmPredictor4x4(byte* dst, int stride, byte* above, byte* left)
|
||||
{
|
||||
TMPredictor(dst, stride, 4, above, left);
|
||||
TmPredictor(dst, stride, 4, above, left);
|
||||
}
|
||||
|
||||
public static unsafe void TMPredictor8x8(byte* dst, int stride, byte* above, byte* left)
|
||||
public static unsafe void TmPredictor8x8(byte* dst, int stride, byte* above, byte* left)
|
||||
{
|
||||
TMPredictor(dst, stride, 8, above, left);
|
||||
TmPredictor(dst, stride, 8, above, left);
|
||||
}
|
||||
|
||||
public static unsafe void TMPredictor16x16(byte* dst, int stride, byte* above, byte* left)
|
||||
public static unsafe void TmPredictor16x16(byte* dst, int stride, byte* above, byte* left)
|
||||
{
|
||||
TMPredictor(dst, stride, 16, above, left);
|
||||
TmPredictor(dst, stride, 16, above, left);
|
||||
}
|
||||
|
||||
public static unsafe void TMPredictor32x32(byte* dst, int stride, byte* above, byte* left)
|
||||
public static unsafe void TmPredictor32x32(byte* dst, int stride, byte* above, byte* left)
|
||||
{
|
||||
TMPredictor(dst, stride, 32, above, left);
|
||||
TmPredictor(dst, stride, 32, above, left);
|
||||
}
|
||||
|
||||
private static unsafe void TMPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
private static unsafe void TmPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int r, c;
|
||||
int yTopLeft = above[-1];
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
for (c = 0; c < bs; c++)
|
||||
for (int c = 0; c < bs; c++)
|
||||
{
|
||||
dst[c] = BitUtils.ClipPixel(left[r] + above[c] - yTopLeft);
|
||||
}
|
||||
@@ -422,9 +412,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void Dc128Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int r;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Fill(dst, (byte)128, bs);
|
||||
dst += stride;
|
||||
@@ -453,16 +441,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void DcLeftPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int i, r, expectedDc, sum = 0;
|
||||
int expectedDc, sum = 0;
|
||||
|
||||
for (i = 0; i < bs; i++)
|
||||
for (int i = 0; i < bs; i++)
|
||||
{
|
||||
sum += left[i];
|
||||
}
|
||||
|
||||
expectedDc = (sum + (bs >> 1)) / bs;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Fill(dst, (byte)expectedDc, bs);
|
||||
dst += stride;
|
||||
@@ -491,16 +479,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void DcTopPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int i, r, expectedDc, sum = 0;
|
||||
int expectedDc, sum = 0;
|
||||
|
||||
for (i = 0; i < bs; i++)
|
||||
for (int i = 0; i < bs; i++)
|
||||
{
|
||||
sum += above[i];
|
||||
}
|
||||
|
||||
expectedDc = (sum + (bs >> 1)) / bs;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Fill(dst, (byte)expectedDc, bs);
|
||||
dst += stride;
|
||||
@@ -529,10 +517,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void DcPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int i, r, expectedDc, sum = 0;
|
||||
int expectedDc, sum = 0;
|
||||
int count = 2 * bs;
|
||||
|
||||
for (i = 0; i < bs; i++)
|
||||
for (int i = 0; i < bs; i++)
|
||||
{
|
||||
sum += above[i];
|
||||
sum += left[i];
|
||||
@@ -540,7 +528,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
expectedDc = (sum + (count >> 1)) / count;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Fill(dst, (byte)expectedDc, bs);
|
||||
dst += stride;
|
||||
@@ -555,10 +543,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
byte k = left[2];
|
||||
byte l = left[3];
|
||||
|
||||
MemoryUtil.Fill(dst + stride * 0, Avg3(h, I, j), 4);
|
||||
MemoryUtil.Fill(dst + stride * 1, Avg3(I, j, k), 4);
|
||||
MemoryUtil.Fill(dst + stride * 2, Avg3(j, k, l), 4);
|
||||
MemoryUtil.Fill(dst + stride * 3, Avg3(k, l, l), 4);
|
||||
MemoryUtil.Fill(dst + (stride * 0), Avg3(h, I, j), 4);
|
||||
MemoryUtil.Fill(dst + (stride * 1), Avg3(I, j, k), 4);
|
||||
MemoryUtil.Fill(dst + (stride * 2), Avg3(j, k, l), 4);
|
||||
MemoryUtil.Fill(dst + (stride * 3), Avg3(k, l, l), 4);
|
||||
}
|
||||
|
||||
public static unsafe void VePredictor4x4(byte* dst, int stride, byte* above, byte* left)
|
||||
@@ -574,9 +562,9 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
dst[1] = Avg3(I, j, k);
|
||||
dst[2] = Avg3(j, k, l);
|
||||
dst[3] = Avg3(k, l, m);
|
||||
MemoryUtil.Copy(dst + stride * 1, dst, 4);
|
||||
MemoryUtil.Copy(dst + stride * 2, dst, 4);
|
||||
MemoryUtil.Copy(dst + stride * 3, dst, 4);
|
||||
MemoryUtil.Copy(dst + (stride * 1), dst, 4);
|
||||
MemoryUtil.Copy(dst + (stride * 2), dst, 4);
|
||||
MemoryUtil.Copy(dst + (stride * 3), dst, 4);
|
||||
}
|
||||
|
||||
public static unsafe void D207Predictor4x4(byte* dst, int stride, byte* above, byte* left)
|
||||
@@ -591,7 +579,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Dst(dst, stride, 1, 0) = Avg3(I, j, k);
|
||||
Dst(dst, stride, 3, 0) = Dst(dst, stride, 1, 1) = Avg3(j, k, l);
|
||||
Dst(dst, stride, 3, 1) = Dst(dst, stride, 1, 2) = Avg3(k, l, l);
|
||||
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 0, 3) = Dst(dst, stride, 1, 3) = Dst(dst, stride, 2, 3) = Dst(dst, stride, 3, 3) = l;
|
||||
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 0, 3) =
|
||||
Dst(dst, stride, 1, 3) = Dst(dst, stride, 2, 3) = Dst(dst, stride, 3, 3) = l;
|
||||
}
|
||||
|
||||
public static unsafe void D63Predictor4x4(byte* dst, int stride, byte* above, byte* left)
|
||||
@@ -616,7 +605,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Dst(dst, stride, 3, 3) = Avg3(e, f, g); // Differs from vp8
|
||||
}
|
||||
|
||||
public static unsafe void D63ePredictor4x4(byte* dst, int stride, byte* above, byte* left)
|
||||
public static unsafe void D63EPredictor4x4(byte* dst, int stride, byte* above, byte* left)
|
||||
{
|
||||
byte a = above[0];
|
||||
byte b = above[1];
|
||||
@@ -652,13 +641,14 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Dst(dst, stride, 0, 0) = Avg3(a, b, c);
|
||||
Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 1) = Avg3(b, c, d);
|
||||
Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 2) = Avg3(c, d, e);
|
||||
Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
|
||||
Dst(dst, stride, 3, 0) =
|
||||
Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
|
||||
Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 3) = Avg3(e, f, g);
|
||||
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 3) = Avg3(f, g, h);
|
||||
Dst(dst, stride, 3, 3) = h; // differs from vp8
|
||||
}
|
||||
|
||||
public static unsafe void D45ePredictor4x4(byte* dst, int stride, byte* above, byte* left)
|
||||
public static unsafe void D45EPredictor4x4(byte* dst, int stride, byte* above, byte* left)
|
||||
{
|
||||
byte a = above[0];
|
||||
byte b = above[1];
|
||||
@@ -671,7 +661,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Dst(dst, stride, 0, 0) = Avg3(a, b, c);
|
||||
Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 1) = Avg3(b, c, d);
|
||||
Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 2) = Avg3(c, d, e);
|
||||
Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
|
||||
Dst(dst, stride, 3, 0) =
|
||||
Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
|
||||
Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 3) = Avg3(e, f, g);
|
||||
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 3) = Avg3(f, g, h);
|
||||
Dst(dst, stride, 3, 3) = Avg3(g, h, h);
|
||||
@@ -714,7 +705,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Dst(dst, stride, 0, 3) = Avg3(j, k, l);
|
||||
Dst(dst, stride, 1, 3) = Dst(dst, stride, 0, 2) = Avg3(I, j, k);
|
||||
Dst(dst, stride, 2, 3) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 1) = Avg3(x, I, j);
|
||||
Dst(dst, stride, 3, 3) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 0) = Avg3(a, x, I);
|
||||
Dst(dst, stride, 3, 3) =
|
||||
Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 0) = Avg3(a, x, I);
|
||||
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 0) = Avg3(b, a, x);
|
||||
Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 0) = Avg3(c, b, a);
|
||||
Dst(dst, stride, 3, 0) = Avg3(d, c, b);
|
||||
@@ -758,38 +750,39 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdD207Predictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdD207Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdD207Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int r, c;
|
||||
|
||||
// First column.
|
||||
for (r = 0; r < bs - 1; ++r)
|
||||
for (int r = 0; r < bs - 1; ++r)
|
||||
{
|
||||
dst[r * stride] = Avg2(left[r], left[r + 1]);
|
||||
}
|
||||
|
||||
dst[(bs - 1) * stride] = left[bs - 1];
|
||||
dst++;
|
||||
|
||||
// Second column.
|
||||
for (r = 0; r < bs - 2; ++r)
|
||||
for (int r = 0; r < bs - 2; ++r)
|
||||
{
|
||||
dst[r * stride] = Avg3(left[r], left[r + 1], left[r + 2]);
|
||||
}
|
||||
|
||||
dst[(bs - 2) * stride] = Avg3(left[bs - 2], left[bs - 1], left[bs - 1]);
|
||||
dst[(bs - 1) * stride] = left[bs - 1];
|
||||
dst++;
|
||||
|
||||
// Rest of last row.
|
||||
for (c = 0; c < bs - 2; ++c)
|
||||
for (int c = 0; c < bs - 2; ++c)
|
||||
{
|
||||
dst[(bs - 1) * stride + c] = left[bs - 1];
|
||||
dst[((bs - 1) * stride) + c] = left[bs - 1];
|
||||
}
|
||||
|
||||
for (r = bs - 2; r >= 0; --r)
|
||||
for (int r = bs - 2; r >= 0; --r)
|
||||
{
|
||||
for (c = 0; c < bs - 2; ++c)
|
||||
for (int c = 0; c < bs - 2; ++c)
|
||||
{
|
||||
dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
|
||||
dst[(r * stride) + c] = dst[((r + 1) * stride) + c - 2];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -809,21 +802,21 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdD63Predictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdD63Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdD63Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int r, c;
|
||||
int size;
|
||||
for (c = 0; c < bs; ++c)
|
||||
for (int c = 0; c < bs; ++c)
|
||||
{
|
||||
dst[c] = Avg2(above[c], above[c + 1]);
|
||||
dst[stride + c] = Avg3(above[c], above[c + 1], above[c + 2]);
|
||||
}
|
||||
for (r = 2, size = bs - 2; r < bs; r += 2, --size)
|
||||
|
||||
for (int r = 2, size = bs - 2; r < bs; r += 2, --size)
|
||||
{
|
||||
MemoryUtil.Copy(dst + (r + 0) * stride, dst + (r >> 1), size);
|
||||
MemoryUtil.Fill(dst + (r + 0) * stride + size, above[bs - 1], bs - size);
|
||||
MemoryUtil.Copy(dst + (r + 1) * stride, dst + stride + (r >> 1), size);
|
||||
MemoryUtil.Fill(dst + (r + 1) * stride + size, above[bs - 1], bs - size);
|
||||
MemoryUtil.Copy(dst + ((r + 0) * stride), dst + (r >> 1), size);
|
||||
MemoryUtil.Fill(dst + ((r + 0) * stride) + size, above[bs - 1], bs - size);
|
||||
MemoryUtil.Copy(dst + ((r + 1) * stride), dst + stride + (r >> 1), size);
|
||||
MemoryUtil.Fill(dst + ((r + 1) * stride) + size, above[bs - 1], bs - size);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -842,19 +835,20 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdD45Predictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdD45Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdD45Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
ushort aboveRight = above[bs - 1];
|
||||
ushort* dstRow0 = dst;
|
||||
int x, size;
|
||||
|
||||
for (x = 0; x < bs - 1; ++x)
|
||||
for (int x = 0; x < bs - 1; ++x)
|
||||
{
|
||||
dst[x] = Avg3(above[x], above[x + 1], above[x + 2]);
|
||||
}
|
||||
|
||||
dst[bs - 1] = aboveRight;
|
||||
dst += stride;
|
||||
for (x = 1, size = bs - 2; x < bs; ++x, --size)
|
||||
for (int x = 1, size = bs - 2; x < bs; ++x, --size)
|
||||
{
|
||||
MemoryUtil.Copy(dst, dstRow0 + x, size);
|
||||
MemoryUtil.Fill(dst + size, aboveRight, x + 1);
|
||||
@@ -877,12 +871,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdD117Predictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdD117Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdD117Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int r, c;
|
||||
|
||||
// First row
|
||||
for (c = 0; c < bs; c++)
|
||||
for (int c = 0; c < bs; c++)
|
||||
{
|
||||
dst[c] = Avg2(above[c - 1], above[c]);
|
||||
}
|
||||
@@ -891,7 +884,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
// Second row
|
||||
dst[0] = Avg3(left[0], above[-1], above[0]);
|
||||
for (c = 1; c < bs; c++)
|
||||
for (int c = 1; c < bs; c++)
|
||||
{
|
||||
dst[c] = Avg3(above[c - 2], above[c - 1], above[c]);
|
||||
}
|
||||
@@ -900,17 +893,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
// The rest of first col
|
||||
dst[0] = Avg3(above[-1], left[0], left[1]);
|
||||
for (r = 3; r < bs; ++r)
|
||||
for (int r = 3; r < bs; ++r)
|
||||
{
|
||||
dst[(r - 2) * stride] = Avg3(left[r - 3], left[r - 2], left[r - 1]);
|
||||
}
|
||||
|
||||
// The rest of the block
|
||||
for (r = 2; r < bs; ++r)
|
||||
for (int r = 2; r < bs; ++r)
|
||||
{
|
||||
for (c = 1; c < bs; c++)
|
||||
for (int c = 1; c < bs; c++)
|
||||
{
|
||||
dst[c] = dst[-2 * stride + c - 1];
|
||||
dst[c] = dst[(-2 * stride) + c - 1];
|
||||
}
|
||||
|
||||
dst += stride;
|
||||
@@ -932,28 +925,29 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdD135Predictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdD135Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdD135Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int i;
|
||||
ushort* border = stackalloc ushort[32 + 32 - 1]; // Outer border from bottom-left to top-right
|
||||
|
||||
// Dst(dst, stride, bs, bs - 2)[0], i.e., border starting at bottom-left
|
||||
for (i = 0; i < bs - 2; ++i)
|
||||
for (int i = 0; i < bs - 2; ++i)
|
||||
{
|
||||
border[i] = Avg3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]);
|
||||
}
|
||||
|
||||
border[bs - 2] = Avg3(above[-1], left[0], left[1]);
|
||||
border[bs - 1] = Avg3(left[0], above[-1], above[0]);
|
||||
border[bs - 0] = Avg3(above[-1], above[0], above[1]);
|
||||
// dst[0][2, size), i.e., remaining top border ascending
|
||||
for (i = 0; i < bs - 2; ++i)
|
||||
for (int i = 0; i < bs - 2; ++i)
|
||||
{
|
||||
border[bs + 1 + i] = Avg3(above[i], above[i + 1], above[i + 2]);
|
||||
}
|
||||
|
||||
for (i = 0; i < bs; ++i)
|
||||
for (int i = 0; i < bs; ++i)
|
||||
{
|
||||
MemoryUtil.Copy(dst + i * stride, border + bs - 1 - i, bs);
|
||||
MemoryUtil.Copy(dst + (i * stride), border + bs - 1 - i, bs);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -972,11 +966,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdD153Predictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdD153Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdD153Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int r, c;
|
||||
dst[0] = Avg2(above[-1], left[0]);
|
||||
for (r = 1; r < bs; r++)
|
||||
for (int r = 1; r < bs; r++)
|
||||
{
|
||||
dst[r * stride] = Avg2(left[r - 1], left[r]);
|
||||
}
|
||||
@@ -985,23 +979,23 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
dst[0] = Avg3(left[0], above[-1], above[0]);
|
||||
dst[stride] = Avg3(above[-1], left[0], left[1]);
|
||||
for (r = 2; r < bs; r++)
|
||||
for (int r = 2; r < bs; r++)
|
||||
{
|
||||
dst[r * stride] = Avg3(left[r - 2], left[r - 1], left[r]);
|
||||
}
|
||||
|
||||
dst++;
|
||||
|
||||
for (c = 0; c < bs - 2; c++)
|
||||
for (int c = 0; c < bs - 2; c++)
|
||||
{
|
||||
dst[c] = Avg3(above[c - 1], above[c], above[c + 1]);
|
||||
}
|
||||
|
||||
dst += stride;
|
||||
|
||||
for (r = 1; r < bs; ++r)
|
||||
for (int r = 1; r < bs; ++r)
|
||||
{
|
||||
for (c = 0; c < bs - 2; c++)
|
||||
for (int c = 0; c < bs - 2; c++)
|
||||
{
|
||||
dst[c] = dst[-stride + c - 2];
|
||||
}
|
||||
@@ -1030,10 +1024,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdVPredictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdVPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdVPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int r;
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Copy(dst, above, bs);
|
||||
dst += stride;
|
||||
@@ -1060,44 +1054,44 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdHPredictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdHPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdHPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int r;
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Fill(dst, left[r], bs);
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
|
||||
public static unsafe void HighbdTMPredictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
public static unsafe void HighbdTmPredictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
{
|
||||
HighbdTMPredictor(dst, stride, 4, above, left, bd);
|
||||
HighbdTmPredictor(dst, stride, 4, above, left, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdTMPredictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
public static unsafe void HighbdTmPredictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
{
|
||||
HighbdTMPredictor(dst, stride, 8, above, left, bd);
|
||||
HighbdTmPredictor(dst, stride, 8, above, left, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdTMPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
public static unsafe void HighbdTmPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
{
|
||||
HighbdTMPredictor(dst, stride, 16, above, left, bd);
|
||||
HighbdTmPredictor(dst, stride, 16, above, left, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdTMPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
public static unsafe void HighbdTmPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
{
|
||||
HighbdTMPredictor(dst, stride, 32, above, left, bd);
|
||||
HighbdTmPredictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdTMPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdTmPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int r, c;
|
||||
int yTopLeft = above[-1];
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
for (c = 0; c < bs; c++)
|
||||
for (int c = 0; c < bs; c++)
|
||||
{
|
||||
dst[c] = BitUtils.ClipPixelHighbd(left[r] + above[c] - yTopLeft, bd);
|
||||
}
|
||||
@@ -1116,21 +1110,22 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdDc128Predictor(dst, stride, 8, above, left, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdDc128Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
public static unsafe void HighbdDc128Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
HighbdDc128Predictor(dst, stride, 16, above, left, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdDc128Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
public static unsafe void HighbdDc128Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
HighbdDc128Predictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdDc128Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdDc128Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int r;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Fill(dst, (ushort)(128 << (bd - 8)), bs);
|
||||
dst += stride;
|
||||
@@ -1147,28 +1142,31 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdDcLeftPredictor(dst, stride, 8, above, left, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdDcLeftPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
public static unsafe void HighbdDcLeftPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
HighbdDcLeftPredictor(dst, stride, 16, above, left, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdDcLeftPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
public static unsafe void HighbdDcLeftPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
HighbdDcLeftPredictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdDcLeftPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdDcLeftPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int i, r, expectedDc, sum = 0;
|
||||
int expectedDc, sum = 0;
|
||||
|
||||
for (i = 0; i < bs; i++)
|
||||
for (int i = 0; i < bs; i++)
|
||||
{
|
||||
sum += left[i];
|
||||
}
|
||||
|
||||
expectedDc = (sum + (bs >> 1)) / bs;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Fill(dst, (ushort)expectedDc, bs);
|
||||
dst += stride;
|
||||
@@ -1185,28 +1183,31 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdDcTopPredictor(dst, stride, 8, above, left, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdDcTopPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
public static unsafe void HighbdDcTopPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
HighbdDcTopPredictor(dst, stride, 16, above, left, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdDcTopPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
public static unsafe void HighbdDcTopPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
HighbdDcTopPredictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdDcTopPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdDcTopPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int i, r, expectedDc, sum = 0;
|
||||
int expectedDc, sum = 0;
|
||||
|
||||
for (i = 0; i < bs; i++)
|
||||
for (int i = 0; i < bs; i++)
|
||||
{
|
||||
sum += above[i];
|
||||
}
|
||||
|
||||
expectedDc = (sum + (bs >> 1)) / bs;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Fill(dst, (ushort)expectedDc, bs);
|
||||
dst += stride;
|
||||
@@ -1233,12 +1234,13 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdDcPredictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdDcPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdDcPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int i, r, expectedDc, sum = 0;
|
||||
int expectedDc, sum = 0;
|
||||
int count = 2 * bs;
|
||||
|
||||
for (i = 0; i < bs; i++)
|
||||
for (int i = 0; i < bs; i++)
|
||||
{
|
||||
sum += above[i];
|
||||
sum += left[i];
|
||||
@@ -1246,7 +1248,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
expectedDc = (sum + (count >> 1)) / count;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Fill(dst, (ushort)expectedDc, bs);
|
||||
dst += stride;
|
||||
@@ -1265,7 +1267,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Dst(dst, stride, 1, 0) = Avg3(I, j, k);
|
||||
Dst(dst, stride, 3, 0) = Dst(dst, stride, 1, 1) = Avg3(j, k, l);
|
||||
Dst(dst, stride, 3, 1) = Dst(dst, stride, 1, 2) = Avg3(k, l, l);
|
||||
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 0, 3) = Dst(dst, stride, 1, 3) = Dst(dst, stride, 2, 3) = Dst(dst, stride, 3, 3) = l;
|
||||
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 0, 3) =
|
||||
Dst(dst, stride, 1, 3) = Dst(dst, stride, 2, 3) = Dst(dst, stride, 3, 3) = l;
|
||||
}
|
||||
|
||||
public static unsafe void HighbdD63Predictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
@@ -1303,7 +1306,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Dst(dst, stride, 0, 0) = Avg3(a, b, c);
|
||||
Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 1) = Avg3(b, c, d);
|
||||
Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 2) = Avg3(c, d, e);
|
||||
Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
|
||||
Dst(dst, stride, 3, 0) =
|
||||
Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
|
||||
Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 3) = Avg3(e, f, g);
|
||||
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 3) = Avg3(f, g, h);
|
||||
Dst(dst, stride, 3, 3) = h; // Differs from vp8
|
||||
@@ -1346,7 +1350,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Dst(dst, stride, 0, 3) = Avg3(j, k, l);
|
||||
Dst(dst, stride, 1, 3) = Dst(dst, stride, 0, 2) = Avg3(I, j, k);
|
||||
Dst(dst, stride, 2, 3) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 1) = Avg3(x, I, j);
|
||||
Dst(dst, stride, 3, 3) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 0) = Avg3(a, x, I);
|
||||
Dst(dst, stride, 3, 3) =
|
||||
Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 0) = Avg3(a, x, I);
|
||||
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 0) = Avg3(b, a, x);
|
||||
Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 0) = Avg3(c, b, a);
|
||||
Dst(dst, stride, 3, 0) = Avg3(d, c, b);
|
||||
@@ -1376,4 +1381,4 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Dst(dst, stride, 1, 3) = Avg3(l, k, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
229
src/Ryujinx.Graphics.Nvdec.Vp9/Dsp/LoopFilterAuto.cs
Normal file
229
src/Ryujinx.Graphics.Nvdec.Vp9/Dsp/LoopFilterAuto.cs
Normal file
@@ -0,0 +1,229 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using System;
|
||||
using System.Runtime.Intrinsics.X86;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
internal class LoopFilterAuto
|
||||
{
|
||||
public static void LpfHorizontal4(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit,
|
||||
ReadOnlySpan<byte> limit,
|
||||
ReadOnlySpan<byte> thresh)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfHorizontal4(s, pitch, blimit, limit, thresh);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfHorizontal4(s, pitch, blimit[0], limit[0], thresh[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfHorizontal4Dual(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit0,
|
||||
ReadOnlySpan<byte> limit0,
|
||||
ReadOnlySpan<byte> thresh0,
|
||||
ReadOnlySpan<byte> blimit1,
|
||||
ReadOnlySpan<byte> limit1,
|
||||
ReadOnlySpan<byte> thresh1)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfHorizontal4Dual(s, pitch, blimit0, limit0, thresh0, blimit1, limit1, thresh1);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfHorizontal4Dual(s, pitch, blimit0[0], limit0[0], thresh0[0], blimit1[0], limit1[0],
|
||||
thresh1[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfHorizontal8(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit,
|
||||
ReadOnlySpan<byte> limit,
|
||||
ReadOnlySpan<byte> thresh)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfHorizontal8(s, pitch, blimit, limit, thresh);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfHorizontal8(s, pitch, blimit[0], limit[0], thresh[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfHorizontal8Dual(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit0,
|
||||
ReadOnlySpan<byte> limit0,
|
||||
ReadOnlySpan<byte> thresh0,
|
||||
ReadOnlySpan<byte> blimit1,
|
||||
ReadOnlySpan<byte> limit1,
|
||||
ReadOnlySpan<byte> thresh1)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfHorizontal8Dual(s, pitch, blimit0, limit0, thresh0, blimit1, limit1, thresh1);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfHorizontal8Dual(s, pitch, blimit0[0], limit0[0], thresh0[0], blimit1[0], limit1[0],
|
||||
thresh1[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfHorizontal16(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit,
|
||||
ReadOnlySpan<byte> limit,
|
||||
ReadOnlySpan<byte> thresh)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfHorizontal16(s, pitch, blimit, limit, thresh);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfHorizontal16(s, pitch, blimit[0], limit[0], thresh[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfHorizontal16Dual(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit,
|
||||
ReadOnlySpan<byte> limit,
|
||||
ReadOnlySpan<byte> thresh)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfHorizontal16Dual(s, pitch, blimit, limit, thresh);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfHorizontal16Dual(s, pitch, blimit[0], limit[0], thresh[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfVertical4(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit,
|
||||
ReadOnlySpan<byte> limit,
|
||||
ReadOnlySpan<byte> thresh)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfVertical4(s, pitch, blimit, limit, thresh);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfVertical4(s, pitch, blimit[0], limit[0], thresh[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfVertical4Dual(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit0,
|
||||
ReadOnlySpan<byte> limit0,
|
||||
ReadOnlySpan<byte> thresh0,
|
||||
ReadOnlySpan<byte> blimit1,
|
||||
ReadOnlySpan<byte> limit1,
|
||||
ReadOnlySpan<byte> thresh1)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfVertical4Dual(s, pitch, blimit0, limit0, thresh0, blimit1, limit1, thresh1);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfVertical4Dual(s, pitch, blimit0[0], limit0[0], thresh0[0], blimit1[0], limit1[0],
|
||||
thresh1[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfVertical8(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit,
|
||||
ReadOnlySpan<byte> limit,
|
||||
ReadOnlySpan<byte> thresh)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfVertical8(s, pitch, blimit, limit, thresh);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfVertical8(s, pitch, blimit[0], limit[0], thresh[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfVertical8Dual(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit0,
|
||||
ReadOnlySpan<byte> limit0,
|
||||
ReadOnlySpan<byte> thresh0,
|
||||
ReadOnlySpan<byte> blimit1,
|
||||
ReadOnlySpan<byte> limit1,
|
||||
ReadOnlySpan<byte> thresh1)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfVertical8Dual(s, pitch, blimit0, limit0, thresh0, blimit1, limit1, thresh1);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfVertical8Dual(s, pitch, blimit0[0], limit0[0], thresh0[0], blimit1[0], limit1[0],
|
||||
thresh1[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfVertical16(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit,
|
||||
ReadOnlySpan<byte> limit,
|
||||
ReadOnlySpan<byte> thresh)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfVertical16(s, pitch, blimit, limit, thresh);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfVertical16(s, pitch, blimit[0], limit[0], thresh[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfVertical16Dual(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit,
|
||||
ReadOnlySpan<byte> limit,
|
||||
ReadOnlySpan<byte> thresh)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfVertical16Dual(s, pitch, blimit, limit, thresh);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfVertical16Dual(s, pitch, blimit[0], limit[0], thresh[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
1093
src/Ryujinx.Graphics.Nvdec.Vp9/Dsp/LoopFilterScalar.cs
Normal file
1093
src/Ryujinx.Graphics.Nvdec.Vp9/Dsp/LoopFilterScalar.cs
Normal file
File diff suppressed because it is too large
Load Diff
1837
src/Ryujinx.Graphics.Nvdec.Vp9/Dsp/LoopFilterSse2.cs
Normal file
1837
src/Ryujinx.Graphics.Nvdec.Vp9/Dsp/LoopFilterSse2.cs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,4 @@
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Common;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Common;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
||||
@@ -12,10 +12,9 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
Debug.Assert(den != 0);
|
||||
{
|
||||
int p = (int)(((ulong)num * 256 + (den >> 1)) / den);
|
||||
int p = (int)((((ulong)num * 256) + (den >> 1)) / den);
|
||||
// (p > 255) ? 255 : (p < 1) ? 1 : p;
|
||||
int clippedProb = p | ((255 - p) >> 23) | (p == 0 ? 1 : 0);
|
||||
|
||||
return (byte)clippedProb;
|
||||
}
|
||||
}
|
||||
@@ -23,13 +22,13 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
/* This function assumes prob1 and prob2 are already within [1,255] range. */
|
||||
public static byte WeightedProb(int prob1, int prob2, int factor)
|
||||
{
|
||||
return (byte)BitUtils.RoundPowerOfTwo(prob1 * (256 - factor) + prob2 * factor, 8);
|
||||
return (byte)BitUtils.RoundPowerOfTwo((prob1 * (256 - factor)) + (prob2 * factor), 8);
|
||||
}
|
||||
|
||||
// MODE_MV_MAX_UPDATE_FACTOR (128) * count / MODE_MV_COUNT_SAT;
|
||||
private static readonly uint[] _countToUpdateFactor = {
|
||||
0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64,
|
||||
70, 76, 83, 89, 96, 102, 108, 115, 121, 128,
|
||||
private static readonly uint[] CountToUpdateFactor =
|
||||
{
|
||||
0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64, 70, 76, 83, 89, 96, 102, 108, 115, 121, 128
|
||||
};
|
||||
|
||||
private const int ModeMvCountSat = 20;
|
||||
@@ -41,14 +40,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
return preProb;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint count = Math.Min(den, ModeMvCountSat);
|
||||
uint factor = _countToUpdateFactor[(int)count];
|
||||
byte prob = GetProb(ct0, den);
|
||||
|
||||
return WeightedProb(preProb, prob, (int)factor);
|
||||
}
|
||||
uint count = Math.Min(den, ModeMvCountSat);
|
||||
uint factor = CountToUpdateFactor[(int)count];
|
||||
byte prob = GetProb(ct0, den);
|
||||
return WeightedProb(preProb, prob, (int)factor);
|
||||
}
|
||||
|
||||
private static uint TreeMergeProbsImpl(
|
||||
@@ -59,17 +55,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Span<byte> probs)
|
||||
{
|
||||
int l = tree[i];
|
||||
uint leftCount = (l <= 0) ? counts[-l] : TreeMergeProbsImpl((uint)l, tree, preProbs, counts, probs);
|
||||
uint leftCount = l <= 0 ? counts[-l] : TreeMergeProbsImpl((uint)l, tree, preProbs, counts, probs);
|
||||
int r = tree[i + 1];
|
||||
uint rightCount = (r <= 0) ? counts[-r] : TreeMergeProbsImpl((uint)r, tree, preProbs, counts, probs);
|
||||
uint rightCount = r <= 0 ? counts[-r] : TreeMergeProbsImpl((uint)r, tree, preProbs, counts, probs);
|
||||
probs[(int)(i >> 1)] = ModeMvMergeProbs(preProbs[(int)(i >> 1)], leftCount, rightCount);
|
||||
|
||||
return leftCount + rightCount;
|
||||
}
|
||||
|
||||
public static void TreeMergeProbs(sbyte[] tree, ReadOnlySpan<byte> preProbs, ReadOnlySpan<uint> counts, Span<byte> probs)
|
||||
public static void TreeMergeProbs(sbyte[] tree, ReadOnlySpan<byte> preProbs, ReadOnlySpan<uint> counts,
|
||||
Span<byte> probs)
|
||||
{
|
||||
TreeMergeProbsImpl(0, tree, preProbs, counts, probs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Types;
|
||||
using System;
|
||||
using System.Buffers.Binary;
|
||||
|
||||
@@ -6,18 +7,18 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
internal struct Reader
|
||||
{
|
||||
private static readonly byte[] _norm = {
|
||||
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
private static readonly byte[] Norm =
|
||||
{
|
||||
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
private const int BdValueSize = sizeof(ulong) * 8;
|
||||
|
||||
// This is meant to be a large, positive constant that can still be efficiently
|
||||
@@ -36,16 +37,13 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
_buffer = new ArrayPtr<byte>(ref buffer[0], size);
|
||||
Value = 0;
|
||||
Count = -8;
|
||||
Range = 255;
|
||||
Fill();
|
||||
|
||||
return ReadBit() != 0; // Marker bit
|
||||
}
|
||||
_buffer = new ArrayPtr<byte>(ref buffer[0], size);
|
||||
Value = 0;
|
||||
Count = -8;
|
||||
Range = 255;
|
||||
Fill();
|
||||
return ReadBit() != 0; // Marker bit
|
||||
}
|
||||
|
||||
private void Fill()
|
||||
@@ -65,7 +63,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
ulong bigEndianValues = BinaryPrimitives.ReadUInt64BigEndian(buffer);
|
||||
nv = bigEndianValues >> (BdValueSize - bits);
|
||||
count += bits;
|
||||
buffer = buffer[(bits >> 3)..];
|
||||
buffer = buffer.Slice(bits >> 3);
|
||||
value = Value | (nv << (shift & 0x7));
|
||||
}
|
||||
else
|
||||
@@ -84,7 +82,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
count += 8;
|
||||
value |= (ulong)buffer[0] << shift;
|
||||
buffer = buffer[1..];
|
||||
buffer = buffer.Slice(1);
|
||||
shift -= 8;
|
||||
}
|
||||
}
|
||||
@@ -98,7 +96,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Count = count;
|
||||
}
|
||||
|
||||
public readonly bool HasError()
|
||||
public bool HasError()
|
||||
{
|
||||
// Check if we have reached the end of the buffer.
|
||||
//
|
||||
@@ -124,7 +122,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
ulong bigsplit;
|
||||
int count;
|
||||
uint range;
|
||||
uint split = (Range * (uint)prob + (256 - (uint)prob)) >> 8;
|
||||
uint split = ((Range * (uint)prob) + (256 - (uint)prob)) >> 8;
|
||||
|
||||
if (Count < 0)
|
||||
{
|
||||
@@ -146,7 +144,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
}
|
||||
|
||||
{
|
||||
int shift = _norm[range];
|
||||
int shift = Norm[range];
|
||||
range <<= shift;
|
||||
value <<= shift;
|
||||
count -= shift;
|
||||
@@ -188,7 +186,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
public int ReadBool(int prob, ref ulong value, ref int count, ref uint range)
|
||||
{
|
||||
uint split = (range * (uint)prob + (256 - (uint)prob)) >> 8;
|
||||
uint split = ((range * (uint)prob) + (256 - (uint)prob)) >> 8;
|
||||
ulong bigsplit = (ulong)split << (BdValueSize - 8);
|
||||
|
||||
if (count < 0)
|
||||
@@ -202,19 +200,20 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
if (value >= bigsplit)
|
||||
{
|
||||
range -= split;
|
||||
value -= bigsplit;
|
||||
range = range - split;
|
||||
value = value - bigsplit;
|
||||
{
|
||||
int shift = _norm[range];
|
||||
int shift = Norm[range];
|
||||
range <<= shift;
|
||||
value <<= shift;
|
||||
count -= shift;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
range = split;
|
||||
{
|
||||
int shift = _norm[range];
|
||||
int shift = Norm[range];
|
||||
range <<= shift;
|
||||
value <<= shift;
|
||||
count -= shift;
|
||||
@@ -230,7 +229,82 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Count -= 8;
|
||||
_buffer = _buffer.Slice(-1);
|
||||
}
|
||||
|
||||
return _buffer;
|
||||
}
|
||||
|
||||
private int DecodeUniform()
|
||||
{
|
||||
const int l = 8;
|
||||
const int m = (1 << l) - 191;
|
||||
int v = ReadLiteral(l - 1);
|
||||
return v < m ? v : (v << 1) - m + ReadBit();
|
||||
}
|
||||
|
||||
public int DecodeTermSubexp()
|
||||
{
|
||||
if (ReadBit() == 0)
|
||||
{
|
||||
return ReadLiteral(4);
|
||||
}
|
||||
|
||||
if (ReadBit() == 0)
|
||||
{
|
||||
return ReadLiteral(4) + 16;
|
||||
}
|
||||
|
||||
if (ReadBit() == 0)
|
||||
{
|
||||
return ReadLiteral(5) + 32;
|
||||
}
|
||||
|
||||
return DecodeUniform() + 64;
|
||||
}
|
||||
|
||||
public TxMode ReadTxMode()
|
||||
{
|
||||
TxMode txMode = (TxMode)ReadLiteral(2);
|
||||
if (txMode == TxMode.Allow32x32)
|
||||
{
|
||||
txMode += ReadBit();
|
||||
}
|
||||
|
||||
return txMode;
|
||||
}
|
||||
|
||||
public int ReadCoeff(
|
||||
ReadOnlySpan<byte> probs,
|
||||
int n,
|
||||
ref ulong value,
|
||||
ref int count,
|
||||
ref uint range)
|
||||
{
|
||||
int val = 0;
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
val = (val << 1) | ReadBool(probs[i], ref value, ref count, ref range);
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
public void DiffUpdateProb(ref byte p)
|
||||
{
|
||||
if (Read(Entropy.DiffUpdateProb) != 0)
|
||||
{
|
||||
p = (byte)DSubExp.InvRemapProb(DecodeTermSubexp(), p);
|
||||
}
|
||||
}
|
||||
|
||||
public void UpdateMvProbs(Span<byte> p, int n)
|
||||
{
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
if (Read(EntropyMv.UpdateProb) != 0)
|
||||
{
|
||||
p[i] = (byte)((ReadLiteral(7) << 1) | 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -13,42 +13,42 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
// for (int i = 1; i < 32; ++i)
|
||||
// Console.WriteLine("public const short CosPi{0}_64 = {1};", i, MathF.Round(16384 * MathF.Cos(i * MathF.PI / 64)));
|
||||
// Note: sin(k * Pi / 64) = cos((32 - k) * Pi / 64)
|
||||
public const short CosPi1_64 = 16364;
|
||||
public const short CosPi2_64 = 16305;
|
||||
public const short CosPi3_64 = 16207;
|
||||
public const short CosPi4_64 = 16069;
|
||||
public const short CosPi5_64 = 15893;
|
||||
public const short CosPi6_64 = 15679;
|
||||
public const short CosPi7_64 = 15426;
|
||||
public const short CosPi8_64 = 15137;
|
||||
public const short CosPi9_64 = 14811;
|
||||
public const short CosPi10_64 = 14449;
|
||||
public const short CosPi11_64 = 14053;
|
||||
public const short CosPi12_64 = 13623;
|
||||
public const short CosPi13_64 = 13160;
|
||||
public const short CosPi14_64 = 12665;
|
||||
public const short CosPi15_64 = 12140;
|
||||
public const short CosPi16_64 = 11585;
|
||||
public const short CosPi17_64 = 11003;
|
||||
public const short CosPi18_64 = 10394;
|
||||
public const short CosPi19_64 = 9760;
|
||||
public const short CosPi20_64 = 9102;
|
||||
public const short CosPi21_64 = 8423;
|
||||
public const short CosPi22_64 = 7723;
|
||||
public const short CosPi23_64 = 7005;
|
||||
public const short CosPi24_64 = 6270;
|
||||
public const short CosPi25_64 = 5520;
|
||||
public const short CosPi26_64 = 4756;
|
||||
public const short CosPi27_64 = 3981;
|
||||
public const short CosPi28_64 = 3196;
|
||||
public const short CosPi29_64 = 2404;
|
||||
public const short CosPi30_64 = 1606;
|
||||
public const short CosPi31_64 = 804;
|
||||
public const short CosPi164 = 16364;
|
||||
public const short CosPi264 = 16305;
|
||||
public const short CosPi364 = 16207;
|
||||
public const short CosPi464 = 16069;
|
||||
public const short CosPi564 = 15893;
|
||||
public const short CosPi664 = 15679;
|
||||
public const short CosPi764 = 15426;
|
||||
public const short CosPi864 = 15137;
|
||||
public const short CosPi964 = 14811;
|
||||
public const short CosPi1064 = 14449;
|
||||
public const short CosPi1164 = 14053;
|
||||
public const short CosPi1264 = 13623;
|
||||
public const short CosPi1364 = 13160;
|
||||
public const short CosPi1464 = 12665;
|
||||
public const short CosPi1564 = 12140;
|
||||
public const short CosPi1664 = 11585;
|
||||
public const short CosPi1764 = 11003;
|
||||
public const short CosPi1864 = 10394;
|
||||
public const short CosPi1964 = 9760;
|
||||
public const short CosPi2064 = 9102;
|
||||
public const short CosPi2164 = 8423;
|
||||
public const short CosPi2264 = 7723;
|
||||
public const short CosPi2364 = 7005;
|
||||
public const short CosPi2464 = 6270;
|
||||
public const short CosPi2564 = 5520;
|
||||
public const short CosPi2664 = 4756;
|
||||
public const short CosPi2764 = 3981;
|
||||
public const short CosPi2864 = 3196;
|
||||
public const short CosPi2964 = 2404;
|
||||
public const short CosPi3064 = 1606;
|
||||
public const short CosPi3164 = 804;
|
||||
|
||||
// 16384 * sqrt(2) * sin(kPi / 9) * 2 / 3
|
||||
public const short SinPi1_9 = 5283;
|
||||
public const short SinPi2_9 = 9929;
|
||||
public const short SinPi3_9 = 13377;
|
||||
public const short SinPi4_9 = 15212;
|
||||
public const short SinPi19 = 5283;
|
||||
public const short SinPi29 = 9929;
|
||||
public const short SinPi39 = 13377;
|
||||
public const short SinPi49 = 15212;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user