Implement VP9 loop filtering

This commit is contained in:
gdkchan
2023-02-04 19:44:30 -03:00
committed by KeatonTheBot
parent d135385cab
commit b69b432b4c
79 changed files with 11301 additions and 3006 deletions

View File

@@ -1,4 +1,4 @@
using Ryujinx.Common.Memory;
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using System.Diagnostics;
using System.Runtime.CompilerServices;
@@ -75,17 +75,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Vector128<int> zero = Vector128<int>.Zero;
Vector128<int> const64 = Vector128.Create(64);
ulong x, y;
src -= SubpelTaps / 2 - 1;
src -= (SubpelTaps / 2) - 1;
fixed (Array8<short>* xFilter = xFilters)
{
Vector128<short> vfilter = Sse2.LoadVector128((short*)xFilter + (uint)(x0Q4 & SubpelMask) * 8);
Vector128<short> vfilter = Sse2.LoadVector128((short*)xFilter + ((uint)(x0Q4 & SubpelMask) * 8));
for (y = 0; y < (uint)h; ++y)
for (ulong y = 0; y < (uint)h; ++y)
{
ulong srcOffset = (uint)x0Q4 >> SubpelBits;
for (x = 0; x < (uint)w; x += 4)
for (ulong x = 0; x < (uint)w; x += 4)
{
Vector128<short> vsrc0 = Sse41.ConvertToVector128Int16(&src[srcOffset + x]);
Vector128<short> vsrc1 = Sse41.ConvertToVector128Int16(&src[srcOffset + x + 1]);
@@ -94,8 +93,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Vector128<int> sum0123 = MultiplyAddAdjacent(vsrc0, vsrc1, vsrc2, vsrc3, vfilter, zero);
Sse.StoreScalar((float*)&dst[x], PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle());
Sse.StoreScalar((float*)&dst[x],
PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle());
}
src += srcStride;
dst += dstStride;
}
@@ -117,22 +118,20 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
if (Sse41.IsSupported && UseIntrinsics && xStepQ4 == 1 << SubpelBits)
{
ConvolveHorizSse41(src, srcStride, dst, dstStride, xFilters, x0Q4, w, h);
return;
}
int x, y;
src -= SubpelTaps / 2 - 1;
src -= (SubpelTaps / 2) - 1;
for (y = 0; y < h; ++y)
for (int y = 0; y < h; ++y)
{
int xQ4 = x0Q4;
for (x = 0; x < w; ++x)
for (int x = 0; x < w; ++x)
{
byte* srcX = &src[xQ4 >> SubpelBits];
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
int sum = 0;
for (int k = 0; k < SubpelTaps; ++k)
{
sum += srcX[k] * xFilter[k];
}
@@ -140,6 +139,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
dst[x] = BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits));
xQ4 += xStepQ4;
}
src += srcStride;
dst += dstStride;
}
@@ -156,25 +156,26 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
int w,
int h)
{
int x, y;
src -= SubpelTaps / 2 - 1;
src -= (SubpelTaps / 2) - 1;
for (y = 0; y < h; ++y)
for (int y = 0; y < h; ++y)
{
int xQ4 = x0Q4;
for (x = 0; x < w; ++x)
for (int x = 0; x < w; ++x)
{
byte* srcX = &src[xQ4 >> SubpelBits];
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
int sum = 0;
for (int k = 0; k < SubpelTaps; ++k)
{
sum += srcX[k] * xFilter[k];
}
dst[x] = (byte)BitUtils.RoundPowerOfTwo(dst[x] + BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits)), 1);
dst[x] = (byte)BitUtils.RoundPowerOfTwo(
dst[x] + BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits)), 1);
xQ4 += xStepQ4;
}
src += srcStride;
dst += dstStride;
}
@@ -203,18 +204,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
srcStride * 6,
srcStride * 7);
ulong x, y;
src -= srcStride * (SubpelTaps / 2 - 1);
src -= srcStride * ((SubpelTaps / 2) - 1);
fixed (Array8<short>* yFilter = yFilters)
{
Vector128<short> vfilter = Sse2.LoadVector128((short*)yFilter + (uint)(y0Q4 & SubpelMask) * 8);
Vector128<short> vfilter = Sse2.LoadVector128((short*)yFilter + ((uint)(y0Q4 & SubpelMask) * 8));
ulong srcBaseY = (uint)y0Q4 >> SubpelBits;
for (y = 0; y < (uint)h; ++y)
for (ulong y = 0; y < (uint)h; ++y)
{
ulong srcOffset = (srcBaseY + y) * (uint)srcStride;
for (x = 0; x < (uint)w; x += 4)
for (ulong x = 0; x < (uint)w; x += 4)
{
Vector256<int> vsrc = Avx2.GatherVector256((uint*)&src[srcOffset + x], indices, 1).AsInt32();
@@ -240,8 +240,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Vector128<int> sum0123 = MultiplyAddAdjacent(vsrc0, vsrc1, vsrc2, vsrc3, vfilter, zero);
Sse.StoreScalar((float*)&dst[x], PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle());
Sse.StoreScalar((float*)&dst[x],
PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle());
}
dst += dstStride;
}
}
@@ -262,22 +264,20 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
if (Avx2.IsSupported && UseIntrinsics && yStepQ4 == 1 << SubpelBits)
{
ConvolveVertAvx2(src, srcStride, dst, dstStride, yFilters, y0Q4, w, h);
return;
}
int x, y;
src -= srcStride * (SubpelTaps / 2 - 1);
src -= srcStride * ((SubpelTaps / 2) - 1);
for (x = 0; x < w; ++x)
for (int x = 0; x < w; ++x)
{
int yQ4 = y0Q4;
for (y = 0; y < h; ++y)
for (int y = 0; y < h; ++y)
{
byte* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
int sum = 0;
for (int k = 0; k < SubpelTaps; ++k)
{
sum += srcY[k * srcStride] * yFilter[k];
}
@@ -285,6 +285,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
dst[y * dstStride] = BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits));
yQ4 += yStepQ4;
}
++src;
++dst;
}
@@ -301,18 +302,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
int w,
int h)
{
int x, y;
src -= srcStride * (SubpelTaps / 2 - 1);
src -= srcStride * ((SubpelTaps / 2) - 1);
for (x = 0; x < w; ++x)
for (int x = 0; x < w; ++x)
{
int yQ4 = y0Q4;
for (y = 0; y < h; ++y)
for (int y = 0; y < h; ++y)
{
byte* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
int sum = 0;
for (int k = 0; k < SubpelTaps; ++k)
{
sum += srcY[k * srcStride] * yFilter[k];
}
@@ -321,6 +321,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
dst[y * dstStride] + BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits)), 1);
yQ4 += yStepQ4;
}
++src;
++dst;
}
@@ -420,15 +421,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
// ==> yStepQ4 = 64. Since w and h are at most 16, the temp buffer is still
// big enough.
byte* temp = stackalloc byte[64 * 135];
int intermediateHeight = (((h - 1) * yStepQ4 + y0Q4) >> SubpelBits) + SubpelTaps;
int intermediateHeight = ((((h - 1) * yStepQ4) + y0Q4) >> SubpelBits) + SubpelTaps;
Debug.Assert(w <= 64);
Debug.Assert(h <= 64);
Debug.Assert(yStepQ4 <= 32 || (yStepQ4 <= 64 && h <= 32));
Debug.Assert(xStepQ4 <= 64);
ConvolveHoriz(src - srcStride * (SubpelTaps / 2 - 1), srcStride, temp, 64, filter, x0Q4, xStepQ4, w, intermediateHeight);
ConvolveVert(temp + 64 * (SubpelTaps / 2 - 1), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h);
ConvolveHoriz(src - (srcStride * ((SubpelTaps / 2) - 1)), srcStride, temp, 64, filter, x0Q4, xStepQ4, w,
intermediateHeight);
ConvolveVert(temp + (64 * ((SubpelTaps / 2) - 1)), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h);
}
public static unsafe void Convolve8Avg(
@@ -489,11 +491,9 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
int w,
int h)
{
int x, y;
for (y = 0; y < h; ++y)
for (int y = 0; y < h; ++y)
{
for (x = 0; x < w; ++x)
for (int x = 0; x < w; ++x)
{
dst[x] = (byte)BitUtils.RoundPowerOfTwo(dst[x] + src[x], 1);
}
@@ -611,18 +611,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
int h,
int bd)
{
int x, y;
src -= SubpelTaps / 2 - 1;
src -= (SubpelTaps / 2) - 1;
for (y = 0; y < h; ++y)
for (int y = 0; y < h; ++y)
{
int xQ4 = x0Q4;
for (x = 0; x < w; ++x)
for (int x = 0; x < w; ++x)
{
ushort* srcX = &src[xQ4 >> SubpelBits];
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
int sum = 0;
for (int k = 0; k < SubpelTaps; ++k)
{
sum += srcX[k] * xFilter[k];
}
@@ -630,6 +629,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
dst[x] = BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd);
xQ4 += xStepQ4;
}
src += srcStride;
dst += dstStride;
}
@@ -647,25 +647,26 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
int h,
int bd)
{
int x, y;
src -= SubpelTaps / 2 - 1;
src -= (SubpelTaps / 2) - 1;
for (y = 0; y < h; ++y)
for (int y = 0; y < h; ++y)
{
int xQ4 = x0Q4;
for (x = 0; x < w; ++x)
for (int x = 0; x < w; ++x)
{
ushort* srcX = &src[xQ4 >> SubpelBits];
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
int sum = 0;
for (int k = 0; k < SubpelTaps; ++k)
{
sum += srcX[k] * xFilter[k];
}
dst[x] = (ushort)BitUtils.RoundPowerOfTwo(dst[x] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd), 1);
dst[x] = (ushort)BitUtils.RoundPowerOfTwo(
dst[x] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd), 1);
xQ4 += xStepQ4;
}
src += srcStride;
dst += dstStride;
}
@@ -683,18 +684,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
int h,
int bd)
{
int x, y;
src -= srcStride * (SubpelTaps / 2 - 1);
src -= srcStride * ((SubpelTaps / 2) - 1);
for (x = 0; x < w; ++x)
for (int x = 0; x < w; ++x)
{
int yQ4 = y0Q4;
for (y = 0; y < h; ++y)
for (int y = 0; y < h; ++y)
{
ushort* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
int sum = 0;
for (int k = 0; k < SubpelTaps; ++k)
{
sum += srcY[k * srcStride] * yFilter[k];
}
@@ -702,6 +702,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
dst[y * dstStride] = BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd);
yQ4 += yStepQ4;
}
++src;
++dst;
}
@@ -719,26 +720,27 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
int h,
int bd)
{
int x, y;
src -= srcStride * (SubpelTaps / 2 - 1);
src -= srcStride * ((SubpelTaps / 2) - 1);
for (x = 0; x < w; ++x)
for (int x = 0; x < w; ++x)
{
int yQ4 = y0Q4;
for (y = 0; y < h; ++y)
for (int y = 0; y < h; ++y)
{
ushort* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
int sum = 0;
for (int k = 0; k < SubpelTaps; ++k)
{
sum += srcY[k * srcStride] * yFilter[k];
}
dst[y * dstStride] = (ushort)BitUtils.RoundPowerOfTwo(
dst[y * dstStride] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd), 1);
dst[y * dstStride] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd),
1);
yQ4 += yStepQ4;
}
++src;
++dst;
}
@@ -771,15 +773,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
// --Require an additional SubpelTaps rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
ushort* temp = stackalloc ushort[64 * 135];
int intermediateHeight = (((h - 1) * yStepQ4 + y0Q4) >> SubpelBits) + SubpelTaps;
int intermediateHeight = ((((h - 1) * yStepQ4) + y0Q4) >> SubpelBits) + SubpelTaps;
Debug.Assert(w <= 64);
Debug.Assert(h <= 64);
Debug.Assert(yStepQ4 <= 32);
Debug.Assert(xStepQ4 <= 32);
HighbdConvolveHoriz(src - srcStride * (SubpelTaps / 2 - 1), srcStride, temp, 64, filter, x0Q4, xStepQ4, w, intermediateHeight, bd);
HighbdConvolveVert(temp + 64 * (SubpelTaps / 2 - 1), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h, bd);
HighbdConvolveHoriz(src - (srcStride * ((SubpelTaps / 2) - 1)), srcStride, temp, 64, filter, x0Q4, xStepQ4,
w, intermediateHeight, bd);
HighbdConvolveVert(temp + (64 * ((SubpelTaps / 2) - 1)), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h,
bd);
}
public static unsafe void HighbdConvolve8Horiz(
@@ -928,11 +932,9 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
int h,
int bd)
{
int x, y;
for (y = 0; y < h; ++y)
for (int y = 0; y < h; ++y)
{
for (x = 0; x < w; ++x)
for (int x = 0; x < w; ++x)
{
dst[x] = (ushort)BitUtils.RoundPowerOfTwo(dst[x] + src[x], 1);
}
@@ -942,4 +944,4 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
}
}
}
}
}

View File

@@ -9,4 +9,4 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
public const int SubpelShifts = 1 << SubpelBits;
public const int SubpelTaps = 8;
}
}
}

View File

@@ -1,4 +1,4 @@
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using Ryujinx.Graphics.Nvdec.Vp9.Common;
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
@@ -6,22 +6,22 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
private static unsafe ref byte Dst(byte* dst, int stride, int x, int y)
{
return ref dst[x + y * stride];
return ref dst[x + (y * stride)];
}
private static unsafe ref ushort Dst(ushort* dst, int stride, int x, int y)
{
return ref dst[x + y * stride];
return ref dst[x + (y * stride)];
}
private static byte Avg3(byte a, byte b, byte c)
{
return (byte)((a + 2 * b + c + 2) >> 2);
return (byte)((a + (2 * b) + c + 2) >> 2);
}
private static ushort Avg3(ushort a, ushort b, ushort c)
{
return (ushort)((a + 2 * b + c + 2) >> 2);
return (ushort)((a + (2 * b) + c + 2) >> 2);
}
private static byte Avg2(byte a, byte b)
@@ -51,9 +51,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void D207Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int r, c;
// First column
for (r = 0; r < bs - 1; ++r)
for (int r = 0; r < bs - 1; ++r)
{
dst[r * stride] = Avg2(left[r], left[r + 1]);
}
@@ -62,7 +61,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
dst++;
// Second column
for (r = 0; r < bs - 2; ++r)
for (int r = 0; r < bs - 2; ++r)
{
dst[r * stride] = Avg3(left[r], left[r + 1], left[r + 2]);
}
@@ -72,16 +71,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
dst++;
// Rest of last row
for (c = 0; c < bs - 2; ++c)
for (int c = 0; c < bs - 2; ++c)
{
dst[(bs - 1) * stride + c] = left[bs - 1];
dst[((bs - 1) * stride) + c] = left[bs - 1];
}
for (r = bs - 2; r >= 0; --r)
for (int r = bs - 2; r >= 0; --r)
{
for (c = 0; c < bs - 2; ++c)
for (int c = 0; c < bs - 2; ++c)
{
dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
dst[(r * stride) + c] = dst[((r + 1) * stride) + c - 2];
}
}
}
@@ -103,19 +102,18 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void D63Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int r, c;
int size;
for (c = 0; c < bs; ++c)
for (int c = 0; c < bs; ++c)
{
dst[c] = Avg2(above[c], above[c + 1]);
dst[stride + c] = Avg3(above[c], above[c + 1], above[c + 2]);
}
for (r = 2, size = bs - 2; r < bs; r += 2, --size)
for (int r = 2, size = bs - 2; r < bs; r += 2, --size)
{
MemoryUtil.Copy(dst + (r + 0) * stride, dst + (r >> 1), size);
MemoryUtil.Fill(dst + (r + 0) * stride + size, above[bs - 1], bs - size);
MemoryUtil.Copy(dst + (r + 1) * stride, dst + stride + (r >> 1), size);
MemoryUtil.Fill(dst + (r + 1) * stride + size, above[bs - 1], bs - size);
MemoryUtil.Copy(dst + ((r + 0) * stride), dst + (r >> 1), size);
MemoryUtil.Fill(dst + ((r + 0) * stride) + size, above[bs - 1], bs - size);
MemoryUtil.Copy(dst + ((r + 1) * stride), dst + stride + (r >> 1), size);
MemoryUtil.Fill(dst + ((r + 1) * stride) + size, above[bs - 1], bs - size);
}
}
@@ -138,15 +136,15 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
byte aboveRight = above[bs - 1];
byte* dstRow0 = dst;
int x, size;
for (x = 0; x < bs - 1; ++x)
for (int x = 0; x < bs - 1; ++x)
{
dst[x] = Avg3(above[x], above[x + 1], above[x + 2]);
}
dst[bs - 1] = aboveRight;
dst += stride;
for (x = 1, size = bs - 2; x < bs; ++x, --size)
for (int x = 1, size = bs - 2; x < bs; ++x, --size)
{
MemoryUtil.Copy(dst, dstRow0 + x, size);
MemoryUtil.Fill(dst + size, aboveRight, x + 1);
@@ -171,10 +169,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void D117Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int r, c;
// First row
for (c = 0; c < bs; c++)
for (int c = 0; c < bs; c++)
{
dst[c] = Avg2(above[c - 1], above[c]);
}
@@ -183,7 +179,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
// Second row
dst[0] = Avg3(left[0], above[-1], above[0]);
for (c = 1; c < bs; c++)
for (int c = 1; c < bs; c++)
{
dst[c] = Avg3(above[c - 2], above[c - 1], above[c]);
}
@@ -192,17 +188,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
// The rest of first col
dst[0] = Avg3(above[-1], left[0], left[1]);
for (r = 3; r < bs; ++r)
for (int r = 3; r < bs; ++r)
{
dst[(r - 2) * stride] = Avg3(left[r - 3], left[r - 2], left[r - 1]);
}
// The rest of the block
for (r = 2; r < bs; ++r)
for (int r = 2; r < bs; ++r)
{
for (c = 1; c < bs; c++)
for (int c = 1; c < bs; c++)
{
dst[c] = dst[-2 * stride + c - 1];
dst[c] = dst[(-2 * stride) + c - 1];
}
dst += stride;
@@ -226,26 +222,26 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void D135Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int i;
byte* border = stackalloc byte[32 + 32 - 1]; // outer border from bottom-left to top-right
// Dst(dst, stride, bs, bs - 2)[0], i.e., border starting at bottom-left
for (i = 0; i < bs - 2; ++i)
for (int i = 0; i < bs - 2; ++i)
{
border[i] = Avg3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]);
}
border[bs - 2] = Avg3(above[-1], left[0], left[1]);
border[bs - 1] = Avg3(left[0], above[-1], above[0]);
border[bs - 0] = Avg3(above[-1], above[0], above[1]);
// dst[0][2, size), i.e., remaining top border ascending
for (i = 0; i < bs - 2; ++i)
for (int i = 0; i < bs - 2; ++i)
{
border[bs + 1 + i] = Avg3(above[i], above[i + 1], above[i + 2]);
}
for (i = 0; i < bs; ++i)
for (int i = 0; i < bs; ++i)
{
MemoryUtil.Copy(dst + i * stride, border + bs - 1 - i, bs);
MemoryUtil.Copy(dst + (i * stride), border + bs - 1 - i, bs);
}
}
@@ -266,9 +262,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void D153Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int r, c;
dst[0] = Avg2(above[-1], left[0]);
for (r = 1; r < bs; r++)
for (int r = 1; r < bs; r++)
{
dst[r * stride] = Avg2(left[r - 1], left[r]);
}
@@ -277,23 +272,23 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
dst[0] = Avg3(left[0], above[-1], above[0]);
dst[stride] = Avg3(above[-1], left[0], left[1]);
for (r = 2; r < bs; r++)
for (int r = 2; r < bs; r++)
{
dst[r * stride] = Avg3(left[r - 2], left[r - 1], left[r]);
}
dst++;
for (c = 0; c < bs - 2; c++)
for (int c = 0; c < bs - 2; c++)
{
dst[c] = Avg3(above[c - 1], above[c], above[c + 1]);
}
dst += stride;
for (r = 1; r < bs; ++r)
for (int r = 1; r < bs; ++r)
{
for (c = 0; c < bs - 2; c++)
for (int c = 0; c < bs - 2; c++)
{
dst[c] = dst[-stride + c - 2];
}
@@ -324,9 +319,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void VPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int r;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Copy(dst, above, bs);
dst += stride;
@@ -355,43 +348,40 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void HPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int r;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Fill(dst, left[r], bs);
dst += stride;
}
}
public static unsafe void TMPredictor4x4(byte* dst, int stride, byte* above, byte* left)
public static unsafe void TmPredictor4x4(byte* dst, int stride, byte* above, byte* left)
{
TMPredictor(dst, stride, 4, above, left);
TmPredictor(dst, stride, 4, above, left);
}
public static unsafe void TMPredictor8x8(byte* dst, int stride, byte* above, byte* left)
public static unsafe void TmPredictor8x8(byte* dst, int stride, byte* above, byte* left)
{
TMPredictor(dst, stride, 8, above, left);
TmPredictor(dst, stride, 8, above, left);
}
public static unsafe void TMPredictor16x16(byte* dst, int stride, byte* above, byte* left)
public static unsafe void TmPredictor16x16(byte* dst, int stride, byte* above, byte* left)
{
TMPredictor(dst, stride, 16, above, left);
TmPredictor(dst, stride, 16, above, left);
}
public static unsafe void TMPredictor32x32(byte* dst, int stride, byte* above, byte* left)
public static unsafe void TmPredictor32x32(byte* dst, int stride, byte* above, byte* left)
{
TMPredictor(dst, stride, 32, above, left);
TmPredictor(dst, stride, 32, above, left);
}
private static unsafe void TMPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
private static unsafe void TmPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int r, c;
int yTopLeft = above[-1];
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
for (c = 0; c < bs; c++)
for (int c = 0; c < bs; c++)
{
dst[c] = BitUtils.ClipPixel(left[r] + above[c] - yTopLeft);
}
@@ -422,9 +412,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void Dc128Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int r;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Fill(dst, (byte)128, bs);
dst += stride;
@@ -453,16 +441,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void DcLeftPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int i, r, expectedDc, sum = 0;
int expectedDc, sum = 0;
for (i = 0; i < bs; i++)
for (int i = 0; i < bs; i++)
{
sum += left[i];
}
expectedDc = (sum + (bs >> 1)) / bs;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Fill(dst, (byte)expectedDc, bs);
dst += stride;
@@ -491,16 +479,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void DcTopPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int i, r, expectedDc, sum = 0;
int expectedDc, sum = 0;
for (i = 0; i < bs; i++)
for (int i = 0; i < bs; i++)
{
sum += above[i];
}
expectedDc = (sum + (bs >> 1)) / bs;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Fill(dst, (byte)expectedDc, bs);
dst += stride;
@@ -529,10 +517,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void DcPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int i, r, expectedDc, sum = 0;
int expectedDc, sum = 0;
int count = 2 * bs;
for (i = 0; i < bs; i++)
for (int i = 0; i < bs; i++)
{
sum += above[i];
sum += left[i];
@@ -540,7 +528,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
expectedDc = (sum + (count >> 1)) / count;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Fill(dst, (byte)expectedDc, bs);
dst += stride;
@@ -555,10 +543,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
byte k = left[2];
byte l = left[3];
MemoryUtil.Fill(dst + stride * 0, Avg3(h, I, j), 4);
MemoryUtil.Fill(dst + stride * 1, Avg3(I, j, k), 4);
MemoryUtil.Fill(dst + stride * 2, Avg3(j, k, l), 4);
MemoryUtil.Fill(dst + stride * 3, Avg3(k, l, l), 4);
MemoryUtil.Fill(dst + (stride * 0), Avg3(h, I, j), 4);
MemoryUtil.Fill(dst + (stride * 1), Avg3(I, j, k), 4);
MemoryUtil.Fill(dst + (stride * 2), Avg3(j, k, l), 4);
MemoryUtil.Fill(dst + (stride * 3), Avg3(k, l, l), 4);
}
public static unsafe void VePredictor4x4(byte* dst, int stride, byte* above, byte* left)
@@ -574,9 +562,9 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
dst[1] = Avg3(I, j, k);
dst[2] = Avg3(j, k, l);
dst[3] = Avg3(k, l, m);
MemoryUtil.Copy(dst + stride * 1, dst, 4);
MemoryUtil.Copy(dst + stride * 2, dst, 4);
MemoryUtil.Copy(dst + stride * 3, dst, 4);
MemoryUtil.Copy(dst + (stride * 1), dst, 4);
MemoryUtil.Copy(dst + (stride * 2), dst, 4);
MemoryUtil.Copy(dst + (stride * 3), dst, 4);
}
public static unsafe void D207Predictor4x4(byte* dst, int stride, byte* above, byte* left)
@@ -591,7 +579,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Dst(dst, stride, 1, 0) = Avg3(I, j, k);
Dst(dst, stride, 3, 0) = Dst(dst, stride, 1, 1) = Avg3(j, k, l);
Dst(dst, stride, 3, 1) = Dst(dst, stride, 1, 2) = Avg3(k, l, l);
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 0, 3) = Dst(dst, stride, 1, 3) = Dst(dst, stride, 2, 3) = Dst(dst, stride, 3, 3) = l;
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 0, 3) =
Dst(dst, stride, 1, 3) = Dst(dst, stride, 2, 3) = Dst(dst, stride, 3, 3) = l;
}
public static unsafe void D63Predictor4x4(byte* dst, int stride, byte* above, byte* left)
@@ -616,7 +605,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Dst(dst, stride, 3, 3) = Avg3(e, f, g); // Differs from vp8
}
public static unsafe void D63ePredictor4x4(byte* dst, int stride, byte* above, byte* left)
public static unsafe void D63EPredictor4x4(byte* dst, int stride, byte* above, byte* left)
{
byte a = above[0];
byte b = above[1];
@@ -652,13 +641,14 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Dst(dst, stride, 0, 0) = Avg3(a, b, c);
Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 1) = Avg3(b, c, d);
Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 2) = Avg3(c, d, e);
Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
Dst(dst, stride, 3, 0) =
Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 3) = Avg3(e, f, g);
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 3) = Avg3(f, g, h);
Dst(dst, stride, 3, 3) = h; // differs from vp8
}
public static unsafe void D45ePredictor4x4(byte* dst, int stride, byte* above, byte* left)
public static unsafe void D45EPredictor4x4(byte* dst, int stride, byte* above, byte* left)
{
byte a = above[0];
byte b = above[1];
@@ -671,7 +661,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Dst(dst, stride, 0, 0) = Avg3(a, b, c);
Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 1) = Avg3(b, c, d);
Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 2) = Avg3(c, d, e);
Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
Dst(dst, stride, 3, 0) =
Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 3) = Avg3(e, f, g);
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 3) = Avg3(f, g, h);
Dst(dst, stride, 3, 3) = Avg3(g, h, h);
@@ -714,7 +705,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Dst(dst, stride, 0, 3) = Avg3(j, k, l);
Dst(dst, stride, 1, 3) = Dst(dst, stride, 0, 2) = Avg3(I, j, k);
Dst(dst, stride, 2, 3) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 1) = Avg3(x, I, j);
Dst(dst, stride, 3, 3) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 0) = Avg3(a, x, I);
Dst(dst, stride, 3, 3) =
Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 0) = Avg3(a, x, I);
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 0) = Avg3(b, a, x);
Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 0) = Avg3(c, b, a);
Dst(dst, stride, 3, 0) = Avg3(d, c, b);
@@ -758,38 +750,39 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdD207Predictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdD207Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdD207Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int r, c;
// First column.
for (r = 0; r < bs - 1; ++r)
for (int r = 0; r < bs - 1; ++r)
{
dst[r * stride] = Avg2(left[r], left[r + 1]);
}
dst[(bs - 1) * stride] = left[bs - 1];
dst++;
// Second column.
for (r = 0; r < bs - 2; ++r)
for (int r = 0; r < bs - 2; ++r)
{
dst[r * stride] = Avg3(left[r], left[r + 1], left[r + 2]);
}
dst[(bs - 2) * stride] = Avg3(left[bs - 2], left[bs - 1], left[bs - 1]);
dst[(bs - 1) * stride] = left[bs - 1];
dst++;
// Rest of last row.
for (c = 0; c < bs - 2; ++c)
for (int c = 0; c < bs - 2; ++c)
{
dst[(bs - 1) * stride + c] = left[bs - 1];
dst[((bs - 1) * stride) + c] = left[bs - 1];
}
for (r = bs - 2; r >= 0; --r)
for (int r = bs - 2; r >= 0; --r)
{
for (c = 0; c < bs - 2; ++c)
for (int c = 0; c < bs - 2; ++c)
{
dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
dst[(r * stride) + c] = dst[((r + 1) * stride) + c - 2];
}
}
}
@@ -809,21 +802,21 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdD63Predictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdD63Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdD63Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int r, c;
int size;
for (c = 0; c < bs; ++c)
for (int c = 0; c < bs; ++c)
{
dst[c] = Avg2(above[c], above[c + 1]);
dst[stride + c] = Avg3(above[c], above[c + 1], above[c + 2]);
}
for (r = 2, size = bs - 2; r < bs; r += 2, --size)
for (int r = 2, size = bs - 2; r < bs; r += 2, --size)
{
MemoryUtil.Copy(dst + (r + 0) * stride, dst + (r >> 1), size);
MemoryUtil.Fill(dst + (r + 0) * stride + size, above[bs - 1], bs - size);
MemoryUtil.Copy(dst + (r + 1) * stride, dst + stride + (r >> 1), size);
MemoryUtil.Fill(dst + (r + 1) * stride + size, above[bs - 1], bs - size);
MemoryUtil.Copy(dst + ((r + 0) * stride), dst + (r >> 1), size);
MemoryUtil.Fill(dst + ((r + 0) * stride) + size, above[bs - 1], bs - size);
MemoryUtil.Copy(dst + ((r + 1) * stride), dst + stride + (r >> 1), size);
MemoryUtil.Fill(dst + ((r + 1) * stride) + size, above[bs - 1], bs - size);
}
}
@@ -842,19 +835,20 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdD45Predictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdD45Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdD45Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
ushort aboveRight = above[bs - 1];
ushort* dstRow0 = dst;
int x, size;
for (x = 0; x < bs - 1; ++x)
for (int x = 0; x < bs - 1; ++x)
{
dst[x] = Avg3(above[x], above[x + 1], above[x + 2]);
}
dst[bs - 1] = aboveRight;
dst += stride;
for (x = 1, size = bs - 2; x < bs; ++x, --size)
for (int x = 1, size = bs - 2; x < bs; ++x, --size)
{
MemoryUtil.Copy(dst, dstRow0 + x, size);
MemoryUtil.Fill(dst + size, aboveRight, x + 1);
@@ -877,12 +871,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdD117Predictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdD117Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdD117Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int r, c;
// First row
for (c = 0; c < bs; c++)
for (int c = 0; c < bs; c++)
{
dst[c] = Avg2(above[c - 1], above[c]);
}
@@ -891,7 +884,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
// Second row
dst[0] = Avg3(left[0], above[-1], above[0]);
for (c = 1; c < bs; c++)
for (int c = 1; c < bs; c++)
{
dst[c] = Avg3(above[c - 2], above[c - 1], above[c]);
}
@@ -900,17 +893,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
// The rest of first col
dst[0] = Avg3(above[-1], left[0], left[1]);
for (r = 3; r < bs; ++r)
for (int r = 3; r < bs; ++r)
{
dst[(r - 2) * stride] = Avg3(left[r - 3], left[r - 2], left[r - 1]);
}
// The rest of the block
for (r = 2; r < bs; ++r)
for (int r = 2; r < bs; ++r)
{
for (c = 1; c < bs; c++)
for (int c = 1; c < bs; c++)
{
dst[c] = dst[-2 * stride + c - 1];
dst[c] = dst[(-2 * stride) + c - 1];
}
dst += stride;
@@ -932,28 +925,29 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdD135Predictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdD135Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdD135Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int i;
ushort* border = stackalloc ushort[32 + 32 - 1]; // Outer border from bottom-left to top-right
// Dst(dst, stride, bs, bs - 2)[0], i.e., border starting at bottom-left
for (i = 0; i < bs - 2; ++i)
for (int i = 0; i < bs - 2; ++i)
{
border[i] = Avg3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]);
}
border[bs - 2] = Avg3(above[-1], left[0], left[1]);
border[bs - 1] = Avg3(left[0], above[-1], above[0]);
border[bs - 0] = Avg3(above[-1], above[0], above[1]);
// dst[0][2, size), i.e., remaining top border ascending
for (i = 0; i < bs - 2; ++i)
for (int i = 0; i < bs - 2; ++i)
{
border[bs + 1 + i] = Avg3(above[i], above[i + 1], above[i + 2]);
}
for (i = 0; i < bs; ++i)
for (int i = 0; i < bs; ++i)
{
MemoryUtil.Copy(dst + i * stride, border + bs - 1 - i, bs);
MemoryUtil.Copy(dst + (i * stride), border + bs - 1 - i, bs);
}
}
@@ -972,11 +966,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdD153Predictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdD153Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdD153Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int r, c;
dst[0] = Avg2(above[-1], left[0]);
for (r = 1; r < bs; r++)
for (int r = 1; r < bs; r++)
{
dst[r * stride] = Avg2(left[r - 1], left[r]);
}
@@ -985,23 +979,23 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
dst[0] = Avg3(left[0], above[-1], above[0]);
dst[stride] = Avg3(above[-1], left[0], left[1]);
for (r = 2; r < bs; r++)
for (int r = 2; r < bs; r++)
{
dst[r * stride] = Avg3(left[r - 2], left[r - 1], left[r]);
}
dst++;
for (c = 0; c < bs - 2; c++)
for (int c = 0; c < bs - 2; c++)
{
dst[c] = Avg3(above[c - 1], above[c], above[c + 1]);
}
dst += stride;
for (r = 1; r < bs; ++r)
for (int r = 1; r < bs; ++r)
{
for (c = 0; c < bs - 2; c++)
for (int c = 0; c < bs - 2; c++)
{
dst[c] = dst[-stride + c - 2];
}
@@ -1030,10 +1024,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdVPredictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdVPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdVPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int r;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Copy(dst, above, bs);
dst += stride;
@@ -1060,44 +1054,44 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdHPredictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdHPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdHPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int r;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Fill(dst, left[r], bs);
dst += stride;
}
}
public static unsafe void HighbdTMPredictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
public static unsafe void HighbdTmPredictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
{
HighbdTMPredictor(dst, stride, 4, above, left, bd);
HighbdTmPredictor(dst, stride, 4, above, left, bd);
}
public static unsafe void HighbdTMPredictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd)
public static unsafe void HighbdTmPredictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd)
{
HighbdTMPredictor(dst, stride, 8, above, left, bd);
HighbdTmPredictor(dst, stride, 8, above, left, bd);
}
public static unsafe void HighbdTMPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
public static unsafe void HighbdTmPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
{
HighbdTMPredictor(dst, stride, 16, above, left, bd);
HighbdTmPredictor(dst, stride, 16, above, left, bd);
}
public static unsafe void HighbdTMPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
public static unsafe void HighbdTmPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
{
HighbdTMPredictor(dst, stride, 32, above, left, bd);
HighbdTmPredictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdTMPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdTmPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int r, c;
int yTopLeft = above[-1];
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
for (c = 0; c < bs; c++)
for (int c = 0; c < bs; c++)
{
dst[c] = BitUtils.ClipPixelHighbd(left[r] + above[c] - yTopLeft, bd);
}
@@ -1116,21 +1110,22 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdDc128Predictor(dst, stride, 8, above, left, bd);
}
public static unsafe void HighbdDc128Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
public static unsafe void HighbdDc128Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left,
int bd)
{
HighbdDc128Predictor(dst, stride, 16, above, left, bd);
}
public static unsafe void HighbdDc128Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
public static unsafe void HighbdDc128Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left,
int bd)
{
HighbdDc128Predictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdDc128Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdDc128Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int r;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Fill(dst, (ushort)(128 << (bd - 8)), bs);
dst += stride;
@@ -1147,28 +1142,31 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdDcLeftPredictor(dst, stride, 8, above, left, bd);
}
public static unsafe void HighbdDcLeftPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
public static unsafe void HighbdDcLeftPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left,
int bd)
{
HighbdDcLeftPredictor(dst, stride, 16, above, left, bd);
}
public static unsafe void HighbdDcLeftPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
public static unsafe void HighbdDcLeftPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left,
int bd)
{
HighbdDcLeftPredictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdDcLeftPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdDcLeftPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int i, r, expectedDc, sum = 0;
int expectedDc, sum = 0;
for (i = 0; i < bs; i++)
for (int i = 0; i < bs; i++)
{
sum += left[i];
}
expectedDc = (sum + (bs >> 1)) / bs;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Fill(dst, (ushort)expectedDc, bs);
dst += stride;
@@ -1185,28 +1183,31 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdDcTopPredictor(dst, stride, 8, above, left, bd);
}
public static unsafe void HighbdDcTopPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
public static unsafe void HighbdDcTopPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left,
int bd)
{
HighbdDcTopPredictor(dst, stride, 16, above, left, bd);
}
public static unsafe void HighbdDcTopPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
public static unsafe void HighbdDcTopPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left,
int bd)
{
HighbdDcTopPredictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdDcTopPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdDcTopPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int i, r, expectedDc, sum = 0;
int expectedDc, sum = 0;
for (i = 0; i < bs; i++)
for (int i = 0; i < bs; i++)
{
sum += above[i];
}
expectedDc = (sum + (bs >> 1)) / bs;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Fill(dst, (ushort)expectedDc, bs);
dst += stride;
@@ -1233,12 +1234,13 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdDcPredictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdDcPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdDcPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int i, r, expectedDc, sum = 0;
int expectedDc, sum = 0;
int count = 2 * bs;
for (i = 0; i < bs; i++)
for (int i = 0; i < bs; i++)
{
sum += above[i];
sum += left[i];
@@ -1246,7 +1248,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
expectedDc = (sum + (count >> 1)) / count;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Fill(dst, (ushort)expectedDc, bs);
dst += stride;
@@ -1265,7 +1267,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Dst(dst, stride, 1, 0) = Avg3(I, j, k);
Dst(dst, stride, 3, 0) = Dst(dst, stride, 1, 1) = Avg3(j, k, l);
Dst(dst, stride, 3, 1) = Dst(dst, stride, 1, 2) = Avg3(k, l, l);
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 0, 3) = Dst(dst, stride, 1, 3) = Dst(dst, stride, 2, 3) = Dst(dst, stride, 3, 3) = l;
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 0, 3) =
Dst(dst, stride, 1, 3) = Dst(dst, stride, 2, 3) = Dst(dst, stride, 3, 3) = l;
}
public static unsafe void HighbdD63Predictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
@@ -1303,7 +1306,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Dst(dst, stride, 0, 0) = Avg3(a, b, c);
Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 1) = Avg3(b, c, d);
Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 2) = Avg3(c, d, e);
Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
Dst(dst, stride, 3, 0) =
Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 3) = Avg3(e, f, g);
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 3) = Avg3(f, g, h);
Dst(dst, stride, 3, 3) = h; // Differs from vp8
@@ -1346,7 +1350,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Dst(dst, stride, 0, 3) = Avg3(j, k, l);
Dst(dst, stride, 1, 3) = Dst(dst, stride, 0, 2) = Avg3(I, j, k);
Dst(dst, stride, 2, 3) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 1) = Avg3(x, I, j);
Dst(dst, stride, 3, 3) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 0) = Avg3(a, x, I);
Dst(dst, stride, 3, 3) =
Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 0) = Avg3(a, x, I);
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 0) = Avg3(b, a, x);
Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 0) = Avg3(c, b, a);
Dst(dst, stride, 3, 0) = Avg3(d, c, b);
@@ -1376,4 +1381,4 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Dst(dst, stride, 1, 3) = Avg3(l, k, j);
}
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,229 @@
using Ryujinx.Common.Memory;
using System;
using System.Runtime.Intrinsics.X86;
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
internal class LoopFilterAuto
{
public static void LpfHorizontal4(
ArrayPtr<byte> s,
int pitch,
ReadOnlySpan<byte> blimit,
ReadOnlySpan<byte> limit,
ReadOnlySpan<byte> thresh)
{
if (Sse2.IsSupported)
{
LoopFilterSse2.LpfHorizontal4(s, pitch, blimit, limit, thresh);
}
else
{
LoopFilterScalar.LpfHorizontal4(s, pitch, blimit[0], limit[0], thresh[0]);
}
}
public static void LpfHorizontal4Dual(
ArrayPtr<byte> s,
int pitch,
ReadOnlySpan<byte> blimit0,
ReadOnlySpan<byte> limit0,
ReadOnlySpan<byte> thresh0,
ReadOnlySpan<byte> blimit1,
ReadOnlySpan<byte> limit1,
ReadOnlySpan<byte> thresh1)
{
if (Sse2.IsSupported)
{
LoopFilterSse2.LpfHorizontal4Dual(s, pitch, blimit0, limit0, thresh0, blimit1, limit1, thresh1);
}
else
{
LoopFilterScalar.LpfHorizontal4Dual(s, pitch, blimit0[0], limit0[0], thresh0[0], blimit1[0], limit1[0],
thresh1[0]);
}
}
public static void LpfHorizontal8(
ArrayPtr<byte> s,
int pitch,
ReadOnlySpan<byte> blimit,
ReadOnlySpan<byte> limit,
ReadOnlySpan<byte> thresh)
{
if (Sse2.IsSupported)
{
LoopFilterSse2.LpfHorizontal8(s, pitch, blimit, limit, thresh);
}
else
{
LoopFilterScalar.LpfHorizontal8(s, pitch, blimit[0], limit[0], thresh[0]);
}
}
public static void LpfHorizontal8Dual(
ArrayPtr<byte> s,
int pitch,
ReadOnlySpan<byte> blimit0,
ReadOnlySpan<byte> limit0,
ReadOnlySpan<byte> thresh0,
ReadOnlySpan<byte> blimit1,
ReadOnlySpan<byte> limit1,
ReadOnlySpan<byte> thresh1)
{
if (Sse2.IsSupported)
{
LoopFilterSse2.LpfHorizontal8Dual(s, pitch, blimit0, limit0, thresh0, blimit1, limit1, thresh1);
}
else
{
LoopFilterScalar.LpfHorizontal8Dual(s, pitch, blimit0[0], limit0[0], thresh0[0], blimit1[0], limit1[0],
thresh1[0]);
}
}
public static void LpfHorizontal16(
ArrayPtr<byte> s,
int pitch,
ReadOnlySpan<byte> blimit,
ReadOnlySpan<byte> limit,
ReadOnlySpan<byte> thresh)
{
if (Sse2.IsSupported)
{
LoopFilterSse2.LpfHorizontal16(s, pitch, blimit, limit, thresh);
}
else
{
LoopFilterScalar.LpfHorizontal16(s, pitch, blimit[0], limit[0], thresh[0]);
}
}
public static void LpfHorizontal16Dual(
ArrayPtr<byte> s,
int pitch,
ReadOnlySpan<byte> blimit,
ReadOnlySpan<byte> limit,
ReadOnlySpan<byte> thresh)
{
if (Sse2.IsSupported)
{
LoopFilterSse2.LpfHorizontal16Dual(s, pitch, blimit, limit, thresh);
}
else
{
LoopFilterScalar.LpfHorizontal16Dual(s, pitch, blimit[0], limit[0], thresh[0]);
}
}
public static void LpfVertical4(
ArrayPtr<byte> s,
int pitch,
ReadOnlySpan<byte> blimit,
ReadOnlySpan<byte> limit,
ReadOnlySpan<byte> thresh)
{
if (Sse2.IsSupported)
{
LoopFilterSse2.LpfVertical4(s, pitch, blimit, limit, thresh);
}
else
{
LoopFilterScalar.LpfVertical4(s, pitch, blimit[0], limit[0], thresh[0]);
}
}
public static void LpfVertical4Dual(
ArrayPtr<byte> s,
int pitch,
ReadOnlySpan<byte> blimit0,
ReadOnlySpan<byte> limit0,
ReadOnlySpan<byte> thresh0,
ReadOnlySpan<byte> blimit1,
ReadOnlySpan<byte> limit1,
ReadOnlySpan<byte> thresh1)
{
if (Sse2.IsSupported)
{
LoopFilterSse2.LpfVertical4Dual(s, pitch, blimit0, limit0, thresh0, blimit1, limit1, thresh1);
}
else
{
LoopFilterScalar.LpfVertical4Dual(s, pitch, blimit0[0], limit0[0], thresh0[0], blimit1[0], limit1[0],
thresh1[0]);
}
}
public static void LpfVertical8(
ArrayPtr<byte> s,
int pitch,
ReadOnlySpan<byte> blimit,
ReadOnlySpan<byte> limit,
ReadOnlySpan<byte> thresh)
{
if (Sse2.IsSupported)
{
LoopFilterSse2.LpfVertical8(s, pitch, blimit, limit, thresh);
}
else
{
LoopFilterScalar.LpfVertical8(s, pitch, blimit[0], limit[0], thresh[0]);
}
}
public static void LpfVertical8Dual(
ArrayPtr<byte> s,
int pitch,
ReadOnlySpan<byte> blimit0,
ReadOnlySpan<byte> limit0,
ReadOnlySpan<byte> thresh0,
ReadOnlySpan<byte> blimit1,
ReadOnlySpan<byte> limit1,
ReadOnlySpan<byte> thresh1)
{
if (Sse2.IsSupported)
{
LoopFilterSse2.LpfVertical8Dual(s, pitch, blimit0, limit0, thresh0, blimit1, limit1, thresh1);
}
else
{
LoopFilterScalar.LpfVertical8Dual(s, pitch, blimit0[0], limit0[0], thresh0[0], blimit1[0], limit1[0],
thresh1[0]);
}
}
public static void LpfVertical16(
ArrayPtr<byte> s,
int pitch,
ReadOnlySpan<byte> blimit,
ReadOnlySpan<byte> limit,
ReadOnlySpan<byte> thresh)
{
if (Sse2.IsSupported)
{
LoopFilterSse2.LpfVertical16(s, pitch, blimit, limit, thresh);
}
else
{
LoopFilterScalar.LpfVertical16(s, pitch, blimit[0], limit[0], thresh[0]);
}
}
public static void LpfVertical16Dual(
ArrayPtr<byte> s,
int pitch,
ReadOnlySpan<byte> blimit,
ReadOnlySpan<byte> limit,
ReadOnlySpan<byte> thresh)
{
if (Sse2.IsSupported)
{
LoopFilterSse2.LpfVertical16Dual(s, pitch, blimit, limit, thresh);
}
else
{
LoopFilterScalar.LpfVertical16Dual(s, pitch, blimit[0], limit[0], thresh[0]);
}
}
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,4 @@
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using System;
using System.Diagnostics;
@@ -12,10 +12,9 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
Debug.Assert(den != 0);
{
int p = (int)(((ulong)num * 256 + (den >> 1)) / den);
int p = (int)((((ulong)num * 256) + (den >> 1)) / den);
// (p > 255) ? 255 : (p < 1) ? 1 : p;
int clippedProb = p | ((255 - p) >> 23) | (p == 0 ? 1 : 0);
return (byte)clippedProb;
}
}
@@ -23,13 +22,13 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
/* This function assumes prob1 and prob2 are already within [1,255] range. */
public static byte WeightedProb(int prob1, int prob2, int factor)
{
return (byte)BitUtils.RoundPowerOfTwo(prob1 * (256 - factor) + prob2 * factor, 8);
return (byte)BitUtils.RoundPowerOfTwo((prob1 * (256 - factor)) + (prob2 * factor), 8);
}
// MODE_MV_MAX_UPDATE_FACTOR (128) * count / MODE_MV_COUNT_SAT;
private static readonly uint[] _countToUpdateFactor = {
0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64,
70, 76, 83, 89, 96, 102, 108, 115, 121, 128,
private static readonly uint[] CountToUpdateFactor =
{
0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64, 70, 76, 83, 89, 96, 102, 108, 115, 121, 128
};
private const int ModeMvCountSat = 20;
@@ -41,14 +40,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
return preProb;
}
else
{
uint count = Math.Min(den, ModeMvCountSat);
uint factor = _countToUpdateFactor[(int)count];
byte prob = GetProb(ct0, den);
return WeightedProb(preProb, prob, (int)factor);
}
uint count = Math.Min(den, ModeMvCountSat);
uint factor = CountToUpdateFactor[(int)count];
byte prob = GetProb(ct0, den);
return WeightedProb(preProb, prob, (int)factor);
}
private static uint TreeMergeProbsImpl(
@@ -59,17 +55,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Span<byte> probs)
{
int l = tree[i];
uint leftCount = (l <= 0) ? counts[-l] : TreeMergeProbsImpl((uint)l, tree, preProbs, counts, probs);
uint leftCount = l <= 0 ? counts[-l] : TreeMergeProbsImpl((uint)l, tree, preProbs, counts, probs);
int r = tree[i + 1];
uint rightCount = (r <= 0) ? counts[-r] : TreeMergeProbsImpl((uint)r, tree, preProbs, counts, probs);
uint rightCount = r <= 0 ? counts[-r] : TreeMergeProbsImpl((uint)r, tree, preProbs, counts, probs);
probs[(int)(i >> 1)] = ModeMvMergeProbs(preProbs[(int)(i >> 1)], leftCount, rightCount);
return leftCount + rightCount;
}
public static void TreeMergeProbs(sbyte[] tree, ReadOnlySpan<byte> preProbs, ReadOnlySpan<uint> counts, Span<byte> probs)
public static void TreeMergeProbs(sbyte[] tree, ReadOnlySpan<byte> preProbs, ReadOnlySpan<uint> counts,
Span<byte> probs)
{
TreeMergeProbsImpl(0, tree, preProbs, counts, probs);
}
}
}
}

View File

@@ -1,4 +1,5 @@
using Ryujinx.Common.Memory;
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using System;
using System.Buffers.Binary;
@@ -6,18 +7,18 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
internal struct Reader
{
private static readonly byte[] _norm = {
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
private static readonly byte[] Norm =
{
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
private const int BdValueSize = sizeof(ulong) * 8;
// This is meant to be a large, positive constant that can still be efficiently
@@ -36,16 +37,13 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
return true;
}
else
{
_buffer = new ArrayPtr<byte>(ref buffer[0], size);
Value = 0;
Count = -8;
Range = 255;
Fill();
return ReadBit() != 0; // Marker bit
}
_buffer = new ArrayPtr<byte>(ref buffer[0], size);
Value = 0;
Count = -8;
Range = 255;
Fill();
return ReadBit() != 0; // Marker bit
}
private void Fill()
@@ -65,7 +63,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
ulong bigEndianValues = BinaryPrimitives.ReadUInt64BigEndian(buffer);
nv = bigEndianValues >> (BdValueSize - bits);
count += bits;
buffer = buffer[(bits >> 3)..];
buffer = buffer.Slice(bits >> 3);
value = Value | (nv << (shift & 0x7));
}
else
@@ -84,7 +82,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
count += 8;
value |= (ulong)buffer[0] << shift;
buffer = buffer[1..];
buffer = buffer.Slice(1);
shift -= 8;
}
}
@@ -98,7 +96,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Count = count;
}
public readonly bool HasError()
public bool HasError()
{
// Check if we have reached the end of the buffer.
//
@@ -124,7 +122,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
ulong bigsplit;
int count;
uint range;
uint split = (Range * (uint)prob + (256 - (uint)prob)) >> 8;
uint split = ((Range * (uint)prob) + (256 - (uint)prob)) >> 8;
if (Count < 0)
{
@@ -146,7 +144,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
}
{
int shift = _norm[range];
int shift = Norm[range];
range <<= shift;
value <<= shift;
count -= shift;
@@ -188,7 +186,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
public int ReadBool(int prob, ref ulong value, ref int count, ref uint range)
{
uint split = (range * (uint)prob + (256 - (uint)prob)) >> 8;
uint split = ((range * (uint)prob) + (256 - (uint)prob)) >> 8;
ulong bigsplit = (ulong)split << (BdValueSize - 8);
if (count < 0)
@@ -202,19 +200,20 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
if (value >= bigsplit)
{
range -= split;
value -= bigsplit;
range = range - split;
value = value - bigsplit;
{
int shift = _norm[range];
int shift = Norm[range];
range <<= shift;
value <<= shift;
count -= shift;
}
return 1;
}
range = split;
{
int shift = _norm[range];
int shift = Norm[range];
range <<= shift;
value <<= shift;
count -= shift;
@@ -230,7 +229,82 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Count -= 8;
_buffer = _buffer.Slice(-1);
}
return _buffer;
}
private int DecodeUniform()
{
const int l = 8;
const int m = (1 << l) - 191;
int v = ReadLiteral(l - 1);
return v < m ? v : (v << 1) - m + ReadBit();
}
public int DecodeTermSubexp()
{
if (ReadBit() == 0)
{
return ReadLiteral(4);
}
if (ReadBit() == 0)
{
return ReadLiteral(4) + 16;
}
if (ReadBit() == 0)
{
return ReadLiteral(5) + 32;
}
return DecodeUniform() + 64;
}
public TxMode ReadTxMode()
{
TxMode txMode = (TxMode)ReadLiteral(2);
if (txMode == TxMode.Allow32x32)
{
txMode += ReadBit();
}
return txMode;
}
public int ReadCoeff(
ReadOnlySpan<byte> probs,
int n,
ref ulong value,
ref int count,
ref uint range)
{
int val = 0;
for (int i = 0; i < n; ++i)
{
val = (val << 1) | ReadBool(probs[i], ref value, ref count, ref range);
}
return val;
}
public void DiffUpdateProb(ref byte p)
{
if (Read(Entropy.DiffUpdateProb) != 0)
{
p = (byte)DSubExp.InvRemapProb(DecodeTermSubexp(), p);
}
}
public void UpdateMvProbs(Span<byte> p, int n)
{
for (int i = 0; i < n; ++i)
{
if (Read(EntropyMv.UpdateProb) != 0)
{
p[i] = (byte)((ReadLiteral(7) << 1) | 1);
}
}
}
}
}
}

View File

@@ -13,42 +13,42 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
// for (int i = 1; i < 32; ++i)
// Console.WriteLine("public const short CosPi{0}_64 = {1};", i, MathF.Round(16384 * MathF.Cos(i * MathF.PI / 64)));
// Note: sin(k * Pi / 64) = cos((32 - k) * Pi / 64)
public const short CosPi1_64 = 16364;
public const short CosPi2_64 = 16305;
public const short CosPi3_64 = 16207;
public const short CosPi4_64 = 16069;
public const short CosPi5_64 = 15893;
public const short CosPi6_64 = 15679;
public const short CosPi7_64 = 15426;
public const short CosPi8_64 = 15137;
public const short CosPi9_64 = 14811;
public const short CosPi10_64 = 14449;
public const short CosPi11_64 = 14053;
public const short CosPi12_64 = 13623;
public const short CosPi13_64 = 13160;
public const short CosPi14_64 = 12665;
public const short CosPi15_64 = 12140;
public const short CosPi16_64 = 11585;
public const short CosPi17_64 = 11003;
public const short CosPi18_64 = 10394;
public const short CosPi19_64 = 9760;
public const short CosPi20_64 = 9102;
public const short CosPi21_64 = 8423;
public const short CosPi22_64 = 7723;
public const short CosPi23_64 = 7005;
public const short CosPi24_64 = 6270;
public const short CosPi25_64 = 5520;
public const short CosPi26_64 = 4756;
public const short CosPi27_64 = 3981;
public const short CosPi28_64 = 3196;
public const short CosPi29_64 = 2404;
public const short CosPi30_64 = 1606;
public const short CosPi31_64 = 804;
public const short CosPi164 = 16364;
public const short CosPi264 = 16305;
public const short CosPi364 = 16207;
public const short CosPi464 = 16069;
public const short CosPi564 = 15893;
public const short CosPi664 = 15679;
public const short CosPi764 = 15426;
public const short CosPi864 = 15137;
public const short CosPi964 = 14811;
public const short CosPi1064 = 14449;
public const short CosPi1164 = 14053;
public const short CosPi1264 = 13623;
public const short CosPi1364 = 13160;
public const short CosPi1464 = 12665;
public const short CosPi1564 = 12140;
public const short CosPi1664 = 11585;
public const short CosPi1764 = 11003;
public const short CosPi1864 = 10394;
public const short CosPi1964 = 9760;
public const short CosPi2064 = 9102;
public const short CosPi2164 = 8423;
public const short CosPi2264 = 7723;
public const short CosPi2364 = 7005;
public const short CosPi2464 = 6270;
public const short CosPi2564 = 5520;
public const short CosPi2664 = 4756;
public const short CosPi2764 = 3981;
public const short CosPi2864 = 3196;
public const short CosPi2964 = 2404;
public const short CosPi3064 = 1606;
public const short CosPi3164 = 804;
// 16384 * sqrt(2) * sin(kPi / 9) * 2 / 3
public const short SinPi1_9 = 5283;
public const short SinPi2_9 = 9929;
public const short SinPi3_9 = 13377;
public const short SinPi4_9 = 15212;
public const short SinPi19 = 5283;
public const short SinPi29 = 9929;
public const short SinPi39 = 13377;
public const short SinPi49 = 15212;
}
}
}