Implement VP9 loop filtering (#550)
Unmerged PR from OG Ryujinx (#4367). From @gdkchan: > The main goal of this change is porting the loop filtering from libvpx, which should fix the block artifacts on some VP9 videos on games using NVDEC to decode them. In addition to that, there are two other changes: > > - The remaining decoder code required to decode a VP9 video (with headers included) has been added. That was done because it's much better to test the decoder standalone with a video file. I decided to keep that code on the emulator, even if some of it is unused, since it makes standalone testing easier in the future too, and we can include unit tests with video files. > - Large refactoring of both new and existing code to conform with our conding [sic] styles, done by @TSRBerry (thanks!) Some of it has been automated. > > Since we had no loop filtering before, this change will make video decoding slower. That may cause frame drop etc if the decoder is not fast enough in some games. I plan to optimize the decoder more in the future to make up for that, but if possible I'd prefer to not do it as part of this PR, but if the perf loss is too severe I might consider. > > This will need to be tested on games that had the block artifacts, it would be nice to confirm if they match hardware now, and get some before/after screenshots etc. Comment from @Bjorn29512: > Significantly improves the block artifacts in FE: Engage. > > Before: >  > > After: >  --------- Co-authored-by: gdkchan <gab.dark.100@gmail.com> Co-authored-by: TSR Berry <20988865+TSRBerry@users.noreply.github.com>
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Common;
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.CompilerServices;
|
||||
@@ -75,17 +75,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Vector128<int> zero = Vector128<int>.Zero;
|
||||
Vector128<int> const64 = Vector128.Create(64);
|
||||
|
||||
ulong x, y;
|
||||
src -= SubpelTaps / 2 - 1;
|
||||
src -= (SubpelTaps / 2) - 1;
|
||||
|
||||
fixed (Array8<short>* xFilter = xFilters)
|
||||
{
|
||||
Vector128<short> vfilter = Sse2.LoadVector128((short*)xFilter + (uint)(x0Q4 & SubpelMask) * 8);
|
||||
Vector128<short> vfilter = Sse2.LoadVector128((short*)xFilter + ((uint)(x0Q4 & SubpelMask) * 8));
|
||||
|
||||
for (y = 0; y < (uint)h; ++y)
|
||||
for (ulong y = 0; y < (uint)h; ++y)
|
||||
{
|
||||
ulong srcOffset = (uint)x0Q4 >> SubpelBits;
|
||||
for (x = 0; x < (uint)w; x += 4)
|
||||
for (ulong x = 0; x < (uint)w; x += 4)
|
||||
{
|
||||
Vector128<short> vsrc0 = Sse41.ConvertToVector128Int16(&src[srcOffset + x]);
|
||||
Vector128<short> vsrc1 = Sse41.ConvertToVector128Int16(&src[srcOffset + x + 1]);
|
||||
@@ -94,8 +93,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
Vector128<int> sum0123 = MultiplyAddAdjacent(vsrc0, vsrc1, vsrc2, vsrc3, vfilter, zero);
|
||||
|
||||
Sse.StoreScalar((float*)&dst[x], PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle());
|
||||
Sse.StoreScalar((float*)&dst[x],
|
||||
PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle());
|
||||
}
|
||||
|
||||
src += srcStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
@@ -117,22 +118,20 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
if (Sse41.IsSupported && UseIntrinsics && xStepQ4 == 1 << SubpelBits)
|
||||
{
|
||||
ConvolveHorizSse41(src, srcStride, dst, dstStride, xFilters, x0Q4, w, h);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
int x, y;
|
||||
src -= SubpelTaps / 2 - 1;
|
||||
src -= (SubpelTaps / 2) - 1;
|
||||
|
||||
for (y = 0; y < h; ++y)
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
int xQ4 = x0Q4;
|
||||
for (x = 0; x < w; ++x)
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
byte* srcX = &src[xQ4 >> SubpelBits];
|
||||
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcX[k] * xFilter[k];
|
||||
}
|
||||
@@ -140,6 +139,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
dst[x] = BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits));
|
||||
xQ4 += xStepQ4;
|
||||
}
|
||||
|
||||
src += srcStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
@@ -156,25 +156,26 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
int x, y;
|
||||
src -= SubpelTaps / 2 - 1;
|
||||
src -= (SubpelTaps / 2) - 1;
|
||||
|
||||
for (y = 0; y < h; ++y)
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
int xQ4 = x0Q4;
|
||||
for (x = 0; x < w; ++x)
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
byte* srcX = &src[xQ4 >> SubpelBits];
|
||||
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcX[k] * xFilter[k];
|
||||
}
|
||||
|
||||
dst[x] = (byte)BitUtils.RoundPowerOfTwo(dst[x] + BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits)), 1);
|
||||
dst[x] = (byte)BitUtils.RoundPowerOfTwo(
|
||||
dst[x] + BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits)), 1);
|
||||
xQ4 += xStepQ4;
|
||||
}
|
||||
|
||||
src += srcStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
@@ -203,18 +204,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
srcStride * 6,
|
||||
srcStride * 7);
|
||||
|
||||
ulong x, y;
|
||||
src -= srcStride * (SubpelTaps / 2 - 1);
|
||||
src -= srcStride * ((SubpelTaps / 2) - 1);
|
||||
|
||||
fixed (Array8<short>* yFilter = yFilters)
|
||||
{
|
||||
Vector128<short> vfilter = Sse2.LoadVector128((short*)yFilter + (uint)(y0Q4 & SubpelMask) * 8);
|
||||
Vector128<short> vfilter = Sse2.LoadVector128((short*)yFilter + ((uint)(y0Q4 & SubpelMask) * 8));
|
||||
|
||||
ulong srcBaseY = (uint)y0Q4 >> SubpelBits;
|
||||
for (y = 0; y < (uint)h; ++y)
|
||||
for (ulong y = 0; y < (uint)h; ++y)
|
||||
{
|
||||
ulong srcOffset = (srcBaseY + y) * (uint)srcStride;
|
||||
for (x = 0; x < (uint)w; x += 4)
|
||||
for (ulong x = 0; x < (uint)w; x += 4)
|
||||
{
|
||||
Vector256<int> vsrc = Avx2.GatherVector256((uint*)&src[srcOffset + x], indices, 1).AsInt32();
|
||||
|
||||
@@ -240,8 +240,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
Vector128<int> sum0123 = MultiplyAddAdjacent(vsrc0, vsrc1, vsrc2, vsrc3, vfilter, zero);
|
||||
|
||||
Sse.StoreScalar((float*)&dst[x], PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle());
|
||||
Sse.StoreScalar((float*)&dst[x],
|
||||
PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle());
|
||||
}
|
||||
|
||||
dst += dstStride;
|
||||
}
|
||||
}
|
||||
@@ -262,22 +264,20 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
if (Avx2.IsSupported && UseIntrinsics && yStepQ4 == 1 << SubpelBits)
|
||||
{
|
||||
ConvolveVertAvx2(src, srcStride, dst, dstStride, yFilters, y0Q4, w, h);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
int x, y;
|
||||
src -= srcStride * (SubpelTaps / 2 - 1);
|
||||
src -= srcStride * ((SubpelTaps / 2) - 1);
|
||||
|
||||
for (x = 0; x < w; ++x)
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
int yQ4 = y0Q4;
|
||||
for (y = 0; y < h; ++y)
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
byte* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
|
||||
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcY[k * srcStride] * yFilter[k];
|
||||
}
|
||||
@@ -285,6 +285,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
dst[y * dstStride] = BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits));
|
||||
yQ4 += yStepQ4;
|
||||
}
|
||||
|
||||
++src;
|
||||
++dst;
|
||||
}
|
||||
@@ -301,18 +302,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
int x, y;
|
||||
src -= srcStride * (SubpelTaps / 2 - 1);
|
||||
src -= srcStride * ((SubpelTaps / 2) - 1);
|
||||
|
||||
for (x = 0; x < w; ++x)
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
int yQ4 = y0Q4;
|
||||
for (y = 0; y < h; ++y)
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
byte* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
|
||||
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcY[k * srcStride] * yFilter[k];
|
||||
}
|
||||
@@ -321,6 +321,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
dst[y * dstStride] + BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits)), 1);
|
||||
yQ4 += yStepQ4;
|
||||
}
|
||||
|
||||
++src;
|
||||
++dst;
|
||||
}
|
||||
@@ -420,15 +421,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
// ==> yStepQ4 = 64. Since w and h are at most 16, the temp buffer is still
|
||||
// big enough.
|
||||
byte* temp = stackalloc byte[64 * 135];
|
||||
int intermediateHeight = (((h - 1) * yStepQ4 + y0Q4) >> SubpelBits) + SubpelTaps;
|
||||
int intermediateHeight = ((((h - 1) * yStepQ4) + y0Q4) >> SubpelBits) + SubpelTaps;
|
||||
|
||||
Debug.Assert(w <= 64);
|
||||
Debug.Assert(h <= 64);
|
||||
Debug.Assert(yStepQ4 <= 32 || (yStepQ4 <= 64 && h <= 32));
|
||||
Debug.Assert(xStepQ4 <= 64);
|
||||
|
||||
ConvolveHoriz(src - srcStride * (SubpelTaps / 2 - 1), srcStride, temp, 64, filter, x0Q4, xStepQ4, w, intermediateHeight);
|
||||
ConvolveVert(temp + 64 * (SubpelTaps / 2 - 1), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h);
|
||||
ConvolveHoriz(src - (srcStride * ((SubpelTaps / 2) - 1)), srcStride, temp, 64, filter, x0Q4, xStepQ4, w,
|
||||
intermediateHeight);
|
||||
ConvolveVert(temp + (64 * ((SubpelTaps / 2) - 1)), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h);
|
||||
}
|
||||
|
||||
public static unsafe void Convolve8Avg(
|
||||
@@ -489,11 +491,9 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
int w,
|
||||
int h)
|
||||
{
|
||||
int x, y;
|
||||
|
||||
for (y = 0; y < h; ++y)
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
for (x = 0; x < w; ++x)
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
dst[x] = (byte)BitUtils.RoundPowerOfTwo(dst[x] + src[x], 1);
|
||||
}
|
||||
@@ -611,18 +611,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
int x, y;
|
||||
src -= SubpelTaps / 2 - 1;
|
||||
src -= (SubpelTaps / 2) - 1;
|
||||
|
||||
for (y = 0; y < h; ++y)
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
int xQ4 = x0Q4;
|
||||
for (x = 0; x < w; ++x)
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
ushort* srcX = &src[xQ4 >> SubpelBits];
|
||||
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcX[k] * xFilter[k];
|
||||
}
|
||||
@@ -630,6 +629,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
dst[x] = BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd);
|
||||
xQ4 += xStepQ4;
|
||||
}
|
||||
|
||||
src += srcStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
@@ -647,25 +647,26 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
int x, y;
|
||||
src -= SubpelTaps / 2 - 1;
|
||||
src -= (SubpelTaps / 2) - 1;
|
||||
|
||||
for (y = 0; y < h; ++y)
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
int xQ4 = x0Q4;
|
||||
for (x = 0; x < w; ++x)
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
ushort* srcX = &src[xQ4 >> SubpelBits];
|
||||
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcX[k] * xFilter[k];
|
||||
}
|
||||
|
||||
dst[x] = (ushort)BitUtils.RoundPowerOfTwo(dst[x] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd), 1);
|
||||
dst[x] = (ushort)BitUtils.RoundPowerOfTwo(
|
||||
dst[x] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd), 1);
|
||||
xQ4 += xStepQ4;
|
||||
}
|
||||
|
||||
src += srcStride;
|
||||
dst += dstStride;
|
||||
}
|
||||
@@ -683,18 +684,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
int x, y;
|
||||
src -= srcStride * (SubpelTaps / 2 - 1);
|
||||
src -= srcStride * ((SubpelTaps / 2) - 1);
|
||||
|
||||
for (x = 0; x < w; ++x)
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
int yQ4 = y0Q4;
|
||||
for (y = 0; y < h; ++y)
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
ushort* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
|
||||
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcY[k * srcStride] * yFilter[k];
|
||||
}
|
||||
@@ -702,6 +702,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
dst[y * dstStride] = BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd);
|
||||
yQ4 += yStepQ4;
|
||||
}
|
||||
|
||||
++src;
|
||||
++dst;
|
||||
}
|
||||
@@ -719,26 +720,27 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
int x, y;
|
||||
src -= srcStride * (SubpelTaps / 2 - 1);
|
||||
src -= srcStride * ((SubpelTaps / 2) - 1);
|
||||
|
||||
for (x = 0; x < w; ++x)
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
int yQ4 = y0Q4;
|
||||
for (y = 0; y < h; ++y)
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
ushort* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
|
||||
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
|
||||
int k, sum = 0;
|
||||
for (k = 0; k < SubpelTaps; ++k)
|
||||
int sum = 0;
|
||||
for (int k = 0; k < SubpelTaps; ++k)
|
||||
{
|
||||
sum += srcY[k * srcStride] * yFilter[k];
|
||||
}
|
||||
|
||||
dst[y * dstStride] = (ushort)BitUtils.RoundPowerOfTwo(
|
||||
dst[y * dstStride] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd), 1);
|
||||
dst[y * dstStride] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd),
|
||||
1);
|
||||
yQ4 += yStepQ4;
|
||||
}
|
||||
|
||||
++src;
|
||||
++dst;
|
||||
}
|
||||
@@ -771,15 +773,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
// --Require an additional SubpelTaps rows for the 8-tap filter tails.
|
||||
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
|
||||
ushort* temp = stackalloc ushort[64 * 135];
|
||||
int intermediateHeight = (((h - 1) * yStepQ4 + y0Q4) >> SubpelBits) + SubpelTaps;
|
||||
int intermediateHeight = ((((h - 1) * yStepQ4) + y0Q4) >> SubpelBits) + SubpelTaps;
|
||||
|
||||
Debug.Assert(w <= 64);
|
||||
Debug.Assert(h <= 64);
|
||||
Debug.Assert(yStepQ4 <= 32);
|
||||
Debug.Assert(xStepQ4 <= 32);
|
||||
|
||||
HighbdConvolveHoriz(src - srcStride * (SubpelTaps / 2 - 1), srcStride, temp, 64, filter, x0Q4, xStepQ4, w, intermediateHeight, bd);
|
||||
HighbdConvolveVert(temp + 64 * (SubpelTaps / 2 - 1), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h, bd);
|
||||
HighbdConvolveHoriz(src - (srcStride * ((SubpelTaps / 2) - 1)), srcStride, temp, 64, filter, x0Q4, xStepQ4,
|
||||
w, intermediateHeight, bd);
|
||||
HighbdConvolveVert(temp + (64 * ((SubpelTaps / 2) - 1)), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h,
|
||||
bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdConvolve8Horiz(
|
||||
@@ -928,11 +932,9 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
int h,
|
||||
int bd)
|
||||
{
|
||||
int x, y;
|
||||
|
||||
for (y = 0; y < h; ++y)
|
||||
for (int y = 0; y < h; ++y)
|
||||
{
|
||||
for (x = 0; x < w; ++x)
|
||||
for (int x = 0; x < w; ++x)
|
||||
{
|
||||
dst[x] = (ushort)BitUtils.RoundPowerOfTwo(dst[x] + src[x], 1);
|
||||
}
|
||||
@@ -942,4 +944,4 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -9,4 +9,4 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
public const int SubpelShifts = 1 << SubpelBits;
|
||||
public const int SubpelTaps = 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Common;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Common;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
@@ -6,22 +6,22 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
private static unsafe ref byte Dst(byte* dst, int stride, int x, int y)
|
||||
{
|
||||
return ref dst[x + y * stride];
|
||||
return ref dst[x + (y * stride)];
|
||||
}
|
||||
|
||||
private static unsafe ref ushort Dst(ushort* dst, int stride, int x, int y)
|
||||
{
|
||||
return ref dst[x + y * stride];
|
||||
return ref dst[x + (y * stride)];
|
||||
}
|
||||
|
||||
private static byte Avg3(byte a, byte b, byte c)
|
||||
{
|
||||
return (byte)((a + 2 * b + c + 2) >> 2);
|
||||
return (byte)((a + (2 * b) + c + 2) >> 2);
|
||||
}
|
||||
|
||||
private static ushort Avg3(ushort a, ushort b, ushort c)
|
||||
{
|
||||
return (ushort)((a + 2 * b + c + 2) >> 2);
|
||||
return (ushort)((a + (2 * b) + c + 2) >> 2);
|
||||
}
|
||||
|
||||
private static byte Avg2(byte a, byte b)
|
||||
@@ -51,9 +51,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void D207Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int r, c;
|
||||
// First column
|
||||
for (r = 0; r < bs - 1; ++r)
|
||||
for (int r = 0; r < bs - 1; ++r)
|
||||
{
|
||||
dst[r * stride] = Avg2(left[r], left[r + 1]);
|
||||
}
|
||||
@@ -62,7 +61,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
dst++;
|
||||
|
||||
// Second column
|
||||
for (r = 0; r < bs - 2; ++r)
|
||||
for (int r = 0; r < bs - 2; ++r)
|
||||
{
|
||||
dst[r * stride] = Avg3(left[r], left[r + 1], left[r + 2]);
|
||||
}
|
||||
@@ -72,16 +71,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
dst++;
|
||||
|
||||
// Rest of last row
|
||||
for (c = 0; c < bs - 2; ++c)
|
||||
for (int c = 0; c < bs - 2; ++c)
|
||||
{
|
||||
dst[(bs - 1) * stride + c] = left[bs - 1];
|
||||
dst[((bs - 1) * stride) + c] = left[bs - 1];
|
||||
}
|
||||
|
||||
for (r = bs - 2; r >= 0; --r)
|
||||
for (int r = bs - 2; r >= 0; --r)
|
||||
{
|
||||
for (c = 0; c < bs - 2; ++c)
|
||||
for (int c = 0; c < bs - 2; ++c)
|
||||
{
|
||||
dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
|
||||
dst[(r * stride) + c] = dst[((r + 1) * stride) + c - 2];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -103,19 +102,18 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void D63Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int r, c;
|
||||
int size;
|
||||
for (c = 0; c < bs; ++c)
|
||||
for (int c = 0; c < bs; ++c)
|
||||
{
|
||||
dst[c] = Avg2(above[c], above[c + 1]);
|
||||
dst[stride + c] = Avg3(above[c], above[c + 1], above[c + 2]);
|
||||
}
|
||||
for (r = 2, size = bs - 2; r < bs; r += 2, --size)
|
||||
|
||||
for (int r = 2, size = bs - 2; r < bs; r += 2, --size)
|
||||
{
|
||||
MemoryUtil.Copy(dst + (r + 0) * stride, dst + (r >> 1), size);
|
||||
MemoryUtil.Fill(dst + (r + 0) * stride + size, above[bs - 1], bs - size);
|
||||
MemoryUtil.Copy(dst + (r + 1) * stride, dst + stride + (r >> 1), size);
|
||||
MemoryUtil.Fill(dst + (r + 1) * stride + size, above[bs - 1], bs - size);
|
||||
MemoryUtil.Copy(dst + ((r + 0) * stride), dst + (r >> 1), size);
|
||||
MemoryUtil.Fill(dst + ((r + 0) * stride) + size, above[bs - 1], bs - size);
|
||||
MemoryUtil.Copy(dst + ((r + 1) * stride), dst + stride + (r >> 1), size);
|
||||
MemoryUtil.Fill(dst + ((r + 1) * stride) + size, above[bs - 1], bs - size);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -138,15 +136,15 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
byte aboveRight = above[bs - 1];
|
||||
byte* dstRow0 = dst;
|
||||
int x, size;
|
||||
|
||||
for (x = 0; x < bs - 1; ++x)
|
||||
for (int x = 0; x < bs - 1; ++x)
|
||||
{
|
||||
dst[x] = Avg3(above[x], above[x + 1], above[x + 2]);
|
||||
}
|
||||
|
||||
dst[bs - 1] = aboveRight;
|
||||
dst += stride;
|
||||
for (x = 1, size = bs - 2; x < bs; ++x, --size)
|
||||
for (int x = 1, size = bs - 2; x < bs; ++x, --size)
|
||||
{
|
||||
MemoryUtil.Copy(dst, dstRow0 + x, size);
|
||||
MemoryUtil.Fill(dst + size, aboveRight, x + 1);
|
||||
@@ -171,10 +169,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void D117Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int r, c;
|
||||
|
||||
// First row
|
||||
for (c = 0; c < bs; c++)
|
||||
for (int c = 0; c < bs; c++)
|
||||
{
|
||||
dst[c] = Avg2(above[c - 1], above[c]);
|
||||
}
|
||||
@@ -183,7 +179,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
// Second row
|
||||
dst[0] = Avg3(left[0], above[-1], above[0]);
|
||||
for (c = 1; c < bs; c++)
|
||||
for (int c = 1; c < bs; c++)
|
||||
{
|
||||
dst[c] = Avg3(above[c - 2], above[c - 1], above[c]);
|
||||
}
|
||||
@@ -192,17 +188,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
// The rest of first col
|
||||
dst[0] = Avg3(above[-1], left[0], left[1]);
|
||||
for (r = 3; r < bs; ++r)
|
||||
for (int r = 3; r < bs; ++r)
|
||||
{
|
||||
dst[(r - 2) * stride] = Avg3(left[r - 3], left[r - 2], left[r - 1]);
|
||||
}
|
||||
|
||||
// The rest of the block
|
||||
for (r = 2; r < bs; ++r)
|
||||
for (int r = 2; r < bs; ++r)
|
||||
{
|
||||
for (c = 1; c < bs; c++)
|
||||
for (int c = 1; c < bs; c++)
|
||||
{
|
||||
dst[c] = dst[-2 * stride + c - 1];
|
||||
dst[c] = dst[(-2 * stride) + c - 1];
|
||||
}
|
||||
|
||||
dst += stride;
|
||||
@@ -226,26 +222,26 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void D135Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int i;
|
||||
byte* border = stackalloc byte[32 + 32 - 1]; // outer border from bottom-left to top-right
|
||||
|
||||
// Dst(dst, stride, bs, bs - 2)[0], i.e., border starting at bottom-left
|
||||
for (i = 0; i < bs - 2; ++i)
|
||||
for (int i = 0; i < bs - 2; ++i)
|
||||
{
|
||||
border[i] = Avg3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]);
|
||||
}
|
||||
|
||||
border[bs - 2] = Avg3(above[-1], left[0], left[1]);
|
||||
border[bs - 1] = Avg3(left[0], above[-1], above[0]);
|
||||
border[bs - 0] = Avg3(above[-1], above[0], above[1]);
|
||||
// dst[0][2, size), i.e., remaining top border ascending
|
||||
for (i = 0; i < bs - 2; ++i)
|
||||
for (int i = 0; i < bs - 2; ++i)
|
||||
{
|
||||
border[bs + 1 + i] = Avg3(above[i], above[i + 1], above[i + 2]);
|
||||
}
|
||||
|
||||
for (i = 0; i < bs; ++i)
|
||||
for (int i = 0; i < bs; ++i)
|
||||
{
|
||||
MemoryUtil.Copy(dst + i * stride, border + bs - 1 - i, bs);
|
||||
MemoryUtil.Copy(dst + (i * stride), border + bs - 1 - i, bs);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -266,9 +262,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void D153Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int r, c;
|
||||
dst[0] = Avg2(above[-1], left[0]);
|
||||
for (r = 1; r < bs; r++)
|
||||
for (int r = 1; r < bs; r++)
|
||||
{
|
||||
dst[r * stride] = Avg2(left[r - 1], left[r]);
|
||||
}
|
||||
@@ -277,23 +272,23 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
dst[0] = Avg3(left[0], above[-1], above[0]);
|
||||
dst[stride] = Avg3(above[-1], left[0], left[1]);
|
||||
for (r = 2; r < bs; r++)
|
||||
for (int r = 2; r < bs; r++)
|
||||
{
|
||||
dst[r * stride] = Avg3(left[r - 2], left[r - 1], left[r]);
|
||||
}
|
||||
|
||||
dst++;
|
||||
|
||||
for (c = 0; c < bs - 2; c++)
|
||||
for (int c = 0; c < bs - 2; c++)
|
||||
{
|
||||
dst[c] = Avg3(above[c - 1], above[c], above[c + 1]);
|
||||
}
|
||||
|
||||
dst += stride;
|
||||
|
||||
for (r = 1; r < bs; ++r)
|
||||
for (int r = 1; r < bs; ++r)
|
||||
{
|
||||
for (c = 0; c < bs - 2; c++)
|
||||
for (int c = 0; c < bs - 2; c++)
|
||||
{
|
||||
dst[c] = dst[-stride + c - 2];
|
||||
}
|
||||
@@ -324,9 +319,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void VPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int r;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Copy(dst, above, bs);
|
||||
dst += stride;
|
||||
@@ -355,43 +348,40 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void HPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int r;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Fill(dst, left[r], bs);
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
|
||||
public static unsafe void TMPredictor4x4(byte* dst, int stride, byte* above, byte* left)
|
||||
public static unsafe void TmPredictor4x4(byte* dst, int stride, byte* above, byte* left)
|
||||
{
|
||||
TMPredictor(dst, stride, 4, above, left);
|
||||
TmPredictor(dst, stride, 4, above, left);
|
||||
}
|
||||
|
||||
public static unsafe void TMPredictor8x8(byte* dst, int stride, byte* above, byte* left)
|
||||
public static unsafe void TmPredictor8x8(byte* dst, int stride, byte* above, byte* left)
|
||||
{
|
||||
TMPredictor(dst, stride, 8, above, left);
|
||||
TmPredictor(dst, stride, 8, above, left);
|
||||
}
|
||||
|
||||
public static unsafe void TMPredictor16x16(byte* dst, int stride, byte* above, byte* left)
|
||||
public static unsafe void TmPredictor16x16(byte* dst, int stride, byte* above, byte* left)
|
||||
{
|
||||
TMPredictor(dst, stride, 16, above, left);
|
||||
TmPredictor(dst, stride, 16, above, left);
|
||||
}
|
||||
|
||||
public static unsafe void TMPredictor32x32(byte* dst, int stride, byte* above, byte* left)
|
||||
public static unsafe void TmPredictor32x32(byte* dst, int stride, byte* above, byte* left)
|
||||
{
|
||||
TMPredictor(dst, stride, 32, above, left);
|
||||
TmPredictor(dst, stride, 32, above, left);
|
||||
}
|
||||
|
||||
private static unsafe void TMPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
private static unsafe void TmPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int r, c;
|
||||
int yTopLeft = above[-1];
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
for (c = 0; c < bs; c++)
|
||||
for (int c = 0; c < bs; c++)
|
||||
{
|
||||
dst[c] = BitUtils.ClipPixel(left[r] + above[c] - yTopLeft);
|
||||
}
|
||||
@@ -422,9 +412,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void Dc128Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int r;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Fill(dst, (byte)128, bs);
|
||||
dst += stride;
|
||||
@@ -453,16 +441,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void DcLeftPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int i, r, expectedDc, sum = 0;
|
||||
int expectedDc, sum = 0;
|
||||
|
||||
for (i = 0; i < bs; i++)
|
||||
for (int i = 0; i < bs; i++)
|
||||
{
|
||||
sum += left[i];
|
||||
}
|
||||
|
||||
expectedDc = (sum + (bs >> 1)) / bs;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Fill(dst, (byte)expectedDc, bs);
|
||||
dst += stride;
|
||||
@@ -491,16 +479,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void DcTopPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int i, r, expectedDc, sum = 0;
|
||||
int expectedDc, sum = 0;
|
||||
|
||||
for (i = 0; i < bs; i++)
|
||||
for (int i = 0; i < bs; i++)
|
||||
{
|
||||
sum += above[i];
|
||||
}
|
||||
|
||||
expectedDc = (sum + (bs >> 1)) / bs;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Fill(dst, (byte)expectedDc, bs);
|
||||
dst += stride;
|
||||
@@ -529,10 +517,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
private static unsafe void DcPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
|
||||
{
|
||||
int i, r, expectedDc, sum = 0;
|
||||
int expectedDc, sum = 0;
|
||||
int count = 2 * bs;
|
||||
|
||||
for (i = 0; i < bs; i++)
|
||||
for (int i = 0; i < bs; i++)
|
||||
{
|
||||
sum += above[i];
|
||||
sum += left[i];
|
||||
@@ -540,7 +528,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
expectedDc = (sum + (count >> 1)) / count;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Fill(dst, (byte)expectedDc, bs);
|
||||
dst += stride;
|
||||
@@ -555,10 +543,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
byte k = left[2];
|
||||
byte l = left[3];
|
||||
|
||||
MemoryUtil.Fill(dst + stride * 0, Avg3(h, I, j), 4);
|
||||
MemoryUtil.Fill(dst + stride * 1, Avg3(I, j, k), 4);
|
||||
MemoryUtil.Fill(dst + stride * 2, Avg3(j, k, l), 4);
|
||||
MemoryUtil.Fill(dst + stride * 3, Avg3(k, l, l), 4);
|
||||
MemoryUtil.Fill(dst + (stride * 0), Avg3(h, I, j), 4);
|
||||
MemoryUtil.Fill(dst + (stride * 1), Avg3(I, j, k), 4);
|
||||
MemoryUtil.Fill(dst + (stride * 2), Avg3(j, k, l), 4);
|
||||
MemoryUtil.Fill(dst + (stride * 3), Avg3(k, l, l), 4);
|
||||
}
|
||||
|
||||
public static unsafe void VePredictor4x4(byte* dst, int stride, byte* above, byte* left)
|
||||
@@ -574,9 +562,9 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
dst[1] = Avg3(I, j, k);
|
||||
dst[2] = Avg3(j, k, l);
|
||||
dst[3] = Avg3(k, l, m);
|
||||
MemoryUtil.Copy(dst + stride * 1, dst, 4);
|
||||
MemoryUtil.Copy(dst + stride * 2, dst, 4);
|
||||
MemoryUtil.Copy(dst + stride * 3, dst, 4);
|
||||
MemoryUtil.Copy(dst + (stride * 1), dst, 4);
|
||||
MemoryUtil.Copy(dst + (stride * 2), dst, 4);
|
||||
MemoryUtil.Copy(dst + (stride * 3), dst, 4);
|
||||
}
|
||||
|
||||
public static unsafe void D207Predictor4x4(byte* dst, int stride, byte* above, byte* left)
|
||||
@@ -591,7 +579,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Dst(dst, stride, 1, 0) = Avg3(I, j, k);
|
||||
Dst(dst, stride, 3, 0) = Dst(dst, stride, 1, 1) = Avg3(j, k, l);
|
||||
Dst(dst, stride, 3, 1) = Dst(dst, stride, 1, 2) = Avg3(k, l, l);
|
||||
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 0, 3) = Dst(dst, stride, 1, 3) = Dst(dst, stride, 2, 3) = Dst(dst, stride, 3, 3) = l;
|
||||
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 0, 3) =
|
||||
Dst(dst, stride, 1, 3) = Dst(dst, stride, 2, 3) = Dst(dst, stride, 3, 3) = l;
|
||||
}
|
||||
|
||||
public static unsafe void D63Predictor4x4(byte* dst, int stride, byte* above, byte* left)
|
||||
@@ -616,7 +605,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Dst(dst, stride, 3, 3) = Avg3(e, f, g); // Differs from vp8
|
||||
}
|
||||
|
||||
public static unsafe void D63ePredictor4x4(byte* dst, int stride, byte* above, byte* left)
|
||||
public static unsafe void D63EPredictor4x4(byte* dst, int stride, byte* above, byte* left)
|
||||
{
|
||||
byte a = above[0];
|
||||
byte b = above[1];
|
||||
@@ -652,13 +641,14 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Dst(dst, stride, 0, 0) = Avg3(a, b, c);
|
||||
Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 1) = Avg3(b, c, d);
|
||||
Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 2) = Avg3(c, d, e);
|
||||
Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
|
||||
Dst(dst, stride, 3, 0) =
|
||||
Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
|
||||
Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 3) = Avg3(e, f, g);
|
||||
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 3) = Avg3(f, g, h);
|
||||
Dst(dst, stride, 3, 3) = h; // differs from vp8
|
||||
}
|
||||
|
||||
public static unsafe void D45ePredictor4x4(byte* dst, int stride, byte* above, byte* left)
|
||||
public static unsafe void D45EPredictor4x4(byte* dst, int stride, byte* above, byte* left)
|
||||
{
|
||||
byte a = above[0];
|
||||
byte b = above[1];
|
||||
@@ -671,7 +661,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Dst(dst, stride, 0, 0) = Avg3(a, b, c);
|
||||
Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 1) = Avg3(b, c, d);
|
||||
Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 2) = Avg3(c, d, e);
|
||||
Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
|
||||
Dst(dst, stride, 3, 0) =
|
||||
Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
|
||||
Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 3) = Avg3(e, f, g);
|
||||
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 3) = Avg3(f, g, h);
|
||||
Dst(dst, stride, 3, 3) = Avg3(g, h, h);
|
||||
@@ -714,7 +705,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Dst(dst, stride, 0, 3) = Avg3(j, k, l);
|
||||
Dst(dst, stride, 1, 3) = Dst(dst, stride, 0, 2) = Avg3(I, j, k);
|
||||
Dst(dst, stride, 2, 3) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 1) = Avg3(x, I, j);
|
||||
Dst(dst, stride, 3, 3) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 0) = Avg3(a, x, I);
|
||||
Dst(dst, stride, 3, 3) =
|
||||
Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 0) = Avg3(a, x, I);
|
||||
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 0) = Avg3(b, a, x);
|
||||
Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 0) = Avg3(c, b, a);
|
||||
Dst(dst, stride, 3, 0) = Avg3(d, c, b);
|
||||
@@ -758,38 +750,39 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdD207Predictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdD207Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdD207Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int r, c;
|
||||
|
||||
// First column.
|
||||
for (r = 0; r < bs - 1; ++r)
|
||||
for (int r = 0; r < bs - 1; ++r)
|
||||
{
|
||||
dst[r * stride] = Avg2(left[r], left[r + 1]);
|
||||
}
|
||||
|
||||
dst[(bs - 1) * stride] = left[bs - 1];
|
||||
dst++;
|
||||
|
||||
// Second column.
|
||||
for (r = 0; r < bs - 2; ++r)
|
||||
for (int r = 0; r < bs - 2; ++r)
|
||||
{
|
||||
dst[r * stride] = Avg3(left[r], left[r + 1], left[r + 2]);
|
||||
}
|
||||
|
||||
dst[(bs - 2) * stride] = Avg3(left[bs - 2], left[bs - 1], left[bs - 1]);
|
||||
dst[(bs - 1) * stride] = left[bs - 1];
|
||||
dst++;
|
||||
|
||||
// Rest of last row.
|
||||
for (c = 0; c < bs - 2; ++c)
|
||||
for (int c = 0; c < bs - 2; ++c)
|
||||
{
|
||||
dst[(bs - 1) * stride + c] = left[bs - 1];
|
||||
dst[((bs - 1) * stride) + c] = left[bs - 1];
|
||||
}
|
||||
|
||||
for (r = bs - 2; r >= 0; --r)
|
||||
for (int r = bs - 2; r >= 0; --r)
|
||||
{
|
||||
for (c = 0; c < bs - 2; ++c)
|
||||
for (int c = 0; c < bs - 2; ++c)
|
||||
{
|
||||
dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
|
||||
dst[(r * stride) + c] = dst[((r + 1) * stride) + c - 2];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -809,21 +802,21 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdD63Predictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdD63Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdD63Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int r, c;
|
||||
int size;
|
||||
for (c = 0; c < bs; ++c)
|
||||
for (int c = 0; c < bs; ++c)
|
||||
{
|
||||
dst[c] = Avg2(above[c], above[c + 1]);
|
||||
dst[stride + c] = Avg3(above[c], above[c + 1], above[c + 2]);
|
||||
}
|
||||
for (r = 2, size = bs - 2; r < bs; r += 2, --size)
|
||||
|
||||
for (int r = 2, size = bs - 2; r < bs; r += 2, --size)
|
||||
{
|
||||
MemoryUtil.Copy(dst + (r + 0) * stride, dst + (r >> 1), size);
|
||||
MemoryUtil.Fill(dst + (r + 0) * stride + size, above[bs - 1], bs - size);
|
||||
MemoryUtil.Copy(dst + (r + 1) * stride, dst + stride + (r >> 1), size);
|
||||
MemoryUtil.Fill(dst + (r + 1) * stride + size, above[bs - 1], bs - size);
|
||||
MemoryUtil.Copy(dst + ((r + 0) * stride), dst + (r >> 1), size);
|
||||
MemoryUtil.Fill(dst + ((r + 0) * stride) + size, above[bs - 1], bs - size);
|
||||
MemoryUtil.Copy(dst + ((r + 1) * stride), dst + stride + (r >> 1), size);
|
||||
MemoryUtil.Fill(dst + ((r + 1) * stride) + size, above[bs - 1], bs - size);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -842,19 +835,20 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdD45Predictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdD45Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdD45Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
ushort aboveRight = above[bs - 1];
|
||||
ushort* dstRow0 = dst;
|
||||
int x, size;
|
||||
|
||||
for (x = 0; x < bs - 1; ++x)
|
||||
for (int x = 0; x < bs - 1; ++x)
|
||||
{
|
||||
dst[x] = Avg3(above[x], above[x + 1], above[x + 2]);
|
||||
}
|
||||
|
||||
dst[bs - 1] = aboveRight;
|
||||
dst += stride;
|
||||
for (x = 1, size = bs - 2; x < bs; ++x, --size)
|
||||
for (int x = 1, size = bs - 2; x < bs; ++x, --size)
|
||||
{
|
||||
MemoryUtil.Copy(dst, dstRow0 + x, size);
|
||||
MemoryUtil.Fill(dst + size, aboveRight, x + 1);
|
||||
@@ -877,12 +871,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdD117Predictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdD117Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdD117Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int r, c;
|
||||
|
||||
// First row
|
||||
for (c = 0; c < bs; c++)
|
||||
for (int c = 0; c < bs; c++)
|
||||
{
|
||||
dst[c] = Avg2(above[c - 1], above[c]);
|
||||
}
|
||||
@@ -891,7 +884,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
// Second row
|
||||
dst[0] = Avg3(left[0], above[-1], above[0]);
|
||||
for (c = 1; c < bs; c++)
|
||||
for (int c = 1; c < bs; c++)
|
||||
{
|
||||
dst[c] = Avg3(above[c - 2], above[c - 1], above[c]);
|
||||
}
|
||||
@@ -900,17 +893,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
// The rest of first col
|
||||
dst[0] = Avg3(above[-1], left[0], left[1]);
|
||||
for (r = 3; r < bs; ++r)
|
||||
for (int r = 3; r < bs; ++r)
|
||||
{
|
||||
dst[(r - 2) * stride] = Avg3(left[r - 3], left[r - 2], left[r - 1]);
|
||||
}
|
||||
|
||||
// The rest of the block
|
||||
for (r = 2; r < bs; ++r)
|
||||
for (int r = 2; r < bs; ++r)
|
||||
{
|
||||
for (c = 1; c < bs; c++)
|
||||
for (int c = 1; c < bs; c++)
|
||||
{
|
||||
dst[c] = dst[-2 * stride + c - 1];
|
||||
dst[c] = dst[(-2 * stride) + c - 1];
|
||||
}
|
||||
|
||||
dst += stride;
|
||||
@@ -932,28 +925,29 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdD135Predictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdD135Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdD135Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int i;
|
||||
ushort* border = stackalloc ushort[32 + 32 - 1]; // Outer border from bottom-left to top-right
|
||||
|
||||
// Dst(dst, stride, bs, bs - 2)[0], i.e., border starting at bottom-left
|
||||
for (i = 0; i < bs - 2; ++i)
|
||||
for (int i = 0; i < bs - 2; ++i)
|
||||
{
|
||||
border[i] = Avg3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]);
|
||||
}
|
||||
|
||||
border[bs - 2] = Avg3(above[-1], left[0], left[1]);
|
||||
border[bs - 1] = Avg3(left[0], above[-1], above[0]);
|
||||
border[bs - 0] = Avg3(above[-1], above[0], above[1]);
|
||||
// dst[0][2, size), i.e., remaining top border ascending
|
||||
for (i = 0; i < bs - 2; ++i)
|
||||
for (int i = 0; i < bs - 2; ++i)
|
||||
{
|
||||
border[bs + 1 + i] = Avg3(above[i], above[i + 1], above[i + 2]);
|
||||
}
|
||||
|
||||
for (i = 0; i < bs; ++i)
|
||||
for (int i = 0; i < bs; ++i)
|
||||
{
|
||||
MemoryUtil.Copy(dst + i * stride, border + bs - 1 - i, bs);
|
||||
MemoryUtil.Copy(dst + (i * stride), border + bs - 1 - i, bs);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -972,11 +966,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdD153Predictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdD153Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdD153Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int r, c;
|
||||
dst[0] = Avg2(above[-1], left[0]);
|
||||
for (r = 1; r < bs; r++)
|
||||
for (int r = 1; r < bs; r++)
|
||||
{
|
||||
dst[r * stride] = Avg2(left[r - 1], left[r]);
|
||||
}
|
||||
@@ -985,23 +979,23 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
dst[0] = Avg3(left[0], above[-1], above[0]);
|
||||
dst[stride] = Avg3(above[-1], left[0], left[1]);
|
||||
for (r = 2; r < bs; r++)
|
||||
for (int r = 2; r < bs; r++)
|
||||
{
|
||||
dst[r * stride] = Avg3(left[r - 2], left[r - 1], left[r]);
|
||||
}
|
||||
|
||||
dst++;
|
||||
|
||||
for (c = 0; c < bs - 2; c++)
|
||||
for (int c = 0; c < bs - 2; c++)
|
||||
{
|
||||
dst[c] = Avg3(above[c - 1], above[c], above[c + 1]);
|
||||
}
|
||||
|
||||
dst += stride;
|
||||
|
||||
for (r = 1; r < bs; ++r)
|
||||
for (int r = 1; r < bs; ++r)
|
||||
{
|
||||
for (c = 0; c < bs - 2; c++)
|
||||
for (int c = 0; c < bs - 2; c++)
|
||||
{
|
||||
dst[c] = dst[-stride + c - 2];
|
||||
}
|
||||
@@ -1030,10 +1024,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdVPredictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdVPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdVPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int r;
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Copy(dst, above, bs);
|
||||
dst += stride;
|
||||
@@ -1060,44 +1054,44 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdHPredictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdHPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdHPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int r;
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Fill(dst, left[r], bs);
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
|
||||
public static unsafe void HighbdTMPredictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
public static unsafe void HighbdTmPredictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
{
|
||||
HighbdTMPredictor(dst, stride, 4, above, left, bd);
|
||||
HighbdTmPredictor(dst, stride, 4, above, left, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdTMPredictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
public static unsafe void HighbdTmPredictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
{
|
||||
HighbdTMPredictor(dst, stride, 8, above, left, bd);
|
||||
HighbdTmPredictor(dst, stride, 8, above, left, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdTMPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
public static unsafe void HighbdTmPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
{
|
||||
HighbdTMPredictor(dst, stride, 16, above, left, bd);
|
||||
HighbdTmPredictor(dst, stride, 16, above, left, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdTMPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
public static unsafe void HighbdTmPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
{
|
||||
HighbdTMPredictor(dst, stride, 32, above, left, bd);
|
||||
HighbdTmPredictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdTMPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdTmPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int r, c;
|
||||
int yTopLeft = above[-1];
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
for (c = 0; c < bs; c++)
|
||||
for (int c = 0; c < bs; c++)
|
||||
{
|
||||
dst[c] = BitUtils.ClipPixelHighbd(left[r] + above[c] - yTopLeft, bd);
|
||||
}
|
||||
@@ -1116,21 +1110,22 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdDc128Predictor(dst, stride, 8, above, left, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdDc128Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
public static unsafe void HighbdDc128Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
HighbdDc128Predictor(dst, stride, 16, above, left, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdDc128Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
public static unsafe void HighbdDc128Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
HighbdDc128Predictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdDc128Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdDc128Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int r;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Fill(dst, (ushort)(128 << (bd - 8)), bs);
|
||||
dst += stride;
|
||||
@@ -1147,28 +1142,31 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdDcLeftPredictor(dst, stride, 8, above, left, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdDcLeftPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
public static unsafe void HighbdDcLeftPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
HighbdDcLeftPredictor(dst, stride, 16, above, left, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdDcLeftPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
public static unsafe void HighbdDcLeftPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
HighbdDcLeftPredictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdDcLeftPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdDcLeftPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int i, r, expectedDc, sum = 0;
|
||||
int expectedDc, sum = 0;
|
||||
|
||||
for (i = 0; i < bs; i++)
|
||||
for (int i = 0; i < bs; i++)
|
||||
{
|
||||
sum += left[i];
|
||||
}
|
||||
|
||||
expectedDc = (sum + (bs >> 1)) / bs;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Fill(dst, (ushort)expectedDc, bs);
|
||||
dst += stride;
|
||||
@@ -1185,28 +1183,31 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdDcTopPredictor(dst, stride, 8, above, left, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdDcTopPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
public static unsafe void HighbdDcTopPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
HighbdDcTopPredictor(dst, stride, 16, above, left, bd);
|
||||
}
|
||||
|
||||
public static unsafe void HighbdDcTopPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
public static unsafe void HighbdDcTopPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
HighbdDcTopPredictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdDcTopPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdDcTopPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int i, r, expectedDc, sum = 0;
|
||||
int expectedDc, sum = 0;
|
||||
|
||||
for (i = 0; i < bs; i++)
|
||||
for (int i = 0; i < bs; i++)
|
||||
{
|
||||
sum += above[i];
|
||||
}
|
||||
|
||||
expectedDc = (sum + (bs >> 1)) / bs;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Fill(dst, (ushort)expectedDc, bs);
|
||||
dst += stride;
|
||||
@@ -1233,12 +1234,13 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
HighbdDcPredictor(dst, stride, 32, above, left, bd);
|
||||
}
|
||||
|
||||
private static unsafe void HighbdDcPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
|
||||
private static unsafe void HighbdDcPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
|
||||
int bd)
|
||||
{
|
||||
int i, r, expectedDc, sum = 0;
|
||||
int expectedDc, sum = 0;
|
||||
int count = 2 * bs;
|
||||
|
||||
for (i = 0; i < bs; i++)
|
||||
for (int i = 0; i < bs; i++)
|
||||
{
|
||||
sum += above[i];
|
||||
sum += left[i];
|
||||
@@ -1246,7 +1248,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
expectedDc = (sum + (count >> 1)) / count;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (int r = 0; r < bs; r++)
|
||||
{
|
||||
MemoryUtil.Fill(dst, (ushort)expectedDc, bs);
|
||||
dst += stride;
|
||||
@@ -1265,7 +1267,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Dst(dst, stride, 1, 0) = Avg3(I, j, k);
|
||||
Dst(dst, stride, 3, 0) = Dst(dst, stride, 1, 1) = Avg3(j, k, l);
|
||||
Dst(dst, stride, 3, 1) = Dst(dst, stride, 1, 2) = Avg3(k, l, l);
|
||||
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 0, 3) = Dst(dst, stride, 1, 3) = Dst(dst, stride, 2, 3) = Dst(dst, stride, 3, 3) = l;
|
||||
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 0, 3) =
|
||||
Dst(dst, stride, 1, 3) = Dst(dst, stride, 2, 3) = Dst(dst, stride, 3, 3) = l;
|
||||
}
|
||||
|
||||
public static unsafe void HighbdD63Predictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
|
||||
@@ -1303,7 +1306,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Dst(dst, stride, 0, 0) = Avg3(a, b, c);
|
||||
Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 1) = Avg3(b, c, d);
|
||||
Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 2) = Avg3(c, d, e);
|
||||
Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
|
||||
Dst(dst, stride, 3, 0) =
|
||||
Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
|
||||
Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 3) = Avg3(e, f, g);
|
||||
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 3) = Avg3(f, g, h);
|
||||
Dst(dst, stride, 3, 3) = h; // Differs from vp8
|
||||
@@ -1346,7 +1350,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Dst(dst, stride, 0, 3) = Avg3(j, k, l);
|
||||
Dst(dst, stride, 1, 3) = Dst(dst, stride, 0, 2) = Avg3(I, j, k);
|
||||
Dst(dst, stride, 2, 3) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 1) = Avg3(x, I, j);
|
||||
Dst(dst, stride, 3, 3) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 0) = Avg3(a, x, I);
|
||||
Dst(dst, stride, 3, 3) =
|
||||
Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 0) = Avg3(a, x, I);
|
||||
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 0) = Avg3(b, a, x);
|
||||
Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 0) = Avg3(c, b, a);
|
||||
Dst(dst, stride, 3, 0) = Avg3(d, c, b);
|
||||
@@ -1376,4 +1381,4 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Dst(dst, stride, 1, 3) = Avg3(l, k, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
229
src/Ryujinx.Graphics.Nvdec.Vp9/Dsp/LoopFilterAuto.cs
Normal file
229
src/Ryujinx.Graphics.Nvdec.Vp9/Dsp/LoopFilterAuto.cs
Normal file
@@ -0,0 +1,229 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using System;
|
||||
using System.Runtime.Intrinsics.X86;
|
||||
|
||||
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
internal class LoopFilterAuto
|
||||
{
|
||||
public static void LpfHorizontal4(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit,
|
||||
ReadOnlySpan<byte> limit,
|
||||
ReadOnlySpan<byte> thresh)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfHorizontal4(s, pitch, blimit, limit, thresh);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfHorizontal4(s, pitch, blimit[0], limit[0], thresh[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfHorizontal4Dual(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit0,
|
||||
ReadOnlySpan<byte> limit0,
|
||||
ReadOnlySpan<byte> thresh0,
|
||||
ReadOnlySpan<byte> blimit1,
|
||||
ReadOnlySpan<byte> limit1,
|
||||
ReadOnlySpan<byte> thresh1)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfHorizontal4Dual(s, pitch, blimit0, limit0, thresh0, blimit1, limit1, thresh1);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfHorizontal4Dual(s, pitch, blimit0[0], limit0[0], thresh0[0], blimit1[0], limit1[0],
|
||||
thresh1[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfHorizontal8(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit,
|
||||
ReadOnlySpan<byte> limit,
|
||||
ReadOnlySpan<byte> thresh)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfHorizontal8(s, pitch, blimit, limit, thresh);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfHorizontal8(s, pitch, blimit[0], limit[0], thresh[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfHorizontal8Dual(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit0,
|
||||
ReadOnlySpan<byte> limit0,
|
||||
ReadOnlySpan<byte> thresh0,
|
||||
ReadOnlySpan<byte> blimit1,
|
||||
ReadOnlySpan<byte> limit1,
|
||||
ReadOnlySpan<byte> thresh1)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfHorizontal8Dual(s, pitch, blimit0, limit0, thresh0, blimit1, limit1, thresh1);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfHorizontal8Dual(s, pitch, blimit0[0], limit0[0], thresh0[0], blimit1[0], limit1[0],
|
||||
thresh1[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfHorizontal16(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit,
|
||||
ReadOnlySpan<byte> limit,
|
||||
ReadOnlySpan<byte> thresh)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfHorizontal16(s, pitch, blimit, limit, thresh);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfHorizontal16(s, pitch, blimit[0], limit[0], thresh[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfHorizontal16Dual(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit,
|
||||
ReadOnlySpan<byte> limit,
|
||||
ReadOnlySpan<byte> thresh)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfHorizontal16Dual(s, pitch, blimit, limit, thresh);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfHorizontal16Dual(s, pitch, blimit[0], limit[0], thresh[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfVertical4(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit,
|
||||
ReadOnlySpan<byte> limit,
|
||||
ReadOnlySpan<byte> thresh)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfVertical4(s, pitch, blimit, limit, thresh);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfVertical4(s, pitch, blimit[0], limit[0], thresh[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfVertical4Dual(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit0,
|
||||
ReadOnlySpan<byte> limit0,
|
||||
ReadOnlySpan<byte> thresh0,
|
||||
ReadOnlySpan<byte> blimit1,
|
||||
ReadOnlySpan<byte> limit1,
|
||||
ReadOnlySpan<byte> thresh1)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfVertical4Dual(s, pitch, blimit0, limit0, thresh0, blimit1, limit1, thresh1);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfVertical4Dual(s, pitch, blimit0[0], limit0[0], thresh0[0], blimit1[0], limit1[0],
|
||||
thresh1[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfVertical8(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit,
|
||||
ReadOnlySpan<byte> limit,
|
||||
ReadOnlySpan<byte> thresh)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfVertical8(s, pitch, blimit, limit, thresh);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfVertical8(s, pitch, blimit[0], limit[0], thresh[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfVertical8Dual(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit0,
|
||||
ReadOnlySpan<byte> limit0,
|
||||
ReadOnlySpan<byte> thresh0,
|
||||
ReadOnlySpan<byte> blimit1,
|
||||
ReadOnlySpan<byte> limit1,
|
||||
ReadOnlySpan<byte> thresh1)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfVertical8Dual(s, pitch, blimit0, limit0, thresh0, blimit1, limit1, thresh1);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfVertical8Dual(s, pitch, blimit0[0], limit0[0], thresh0[0], blimit1[0], limit1[0],
|
||||
thresh1[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfVertical16(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit,
|
||||
ReadOnlySpan<byte> limit,
|
||||
ReadOnlySpan<byte> thresh)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfVertical16(s, pitch, blimit, limit, thresh);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfVertical16(s, pitch, blimit[0], limit[0], thresh[0]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void LpfVertical16Dual(
|
||||
ArrayPtr<byte> s,
|
||||
int pitch,
|
||||
ReadOnlySpan<byte> blimit,
|
||||
ReadOnlySpan<byte> limit,
|
||||
ReadOnlySpan<byte> thresh)
|
||||
{
|
||||
if (Sse2.IsSupported)
|
||||
{
|
||||
LoopFilterSse2.LpfVertical16Dual(s, pitch, blimit, limit, thresh);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoopFilterScalar.LpfVertical16Dual(s, pitch, blimit[0], limit[0], thresh[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
1093
src/Ryujinx.Graphics.Nvdec.Vp9/Dsp/LoopFilterScalar.cs
Normal file
1093
src/Ryujinx.Graphics.Nvdec.Vp9/Dsp/LoopFilterScalar.cs
Normal file
File diff suppressed because it is too large
Load Diff
1837
src/Ryujinx.Graphics.Nvdec.Vp9/Dsp/LoopFilterSse2.cs
Normal file
1837
src/Ryujinx.Graphics.Nvdec.Vp9/Dsp/LoopFilterSse2.cs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,4 @@
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Common;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Common;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
||||
@@ -12,10 +12,9 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
Debug.Assert(den != 0);
|
||||
{
|
||||
int p = (int)(((ulong)num * 256 + (den >> 1)) / den);
|
||||
int p = (int)((((ulong)num * 256) + (den >> 1)) / den);
|
||||
// (p > 255) ? 255 : (p < 1) ? 1 : p;
|
||||
int clippedProb = p | ((255 - p) >> 23) | (p == 0 ? 1 : 0);
|
||||
|
||||
return (byte)clippedProb;
|
||||
}
|
||||
}
|
||||
@@ -23,13 +22,13 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
/* This function assumes prob1 and prob2 are already within [1,255] range. */
|
||||
public static byte WeightedProb(int prob1, int prob2, int factor)
|
||||
{
|
||||
return (byte)BitUtils.RoundPowerOfTwo(prob1 * (256 - factor) + prob2 * factor, 8);
|
||||
return (byte)BitUtils.RoundPowerOfTwo((prob1 * (256 - factor)) + (prob2 * factor), 8);
|
||||
}
|
||||
|
||||
// MODE_MV_MAX_UPDATE_FACTOR (128) * count / MODE_MV_COUNT_SAT;
|
||||
private static readonly uint[] _countToUpdateFactor = {
|
||||
0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64,
|
||||
70, 76, 83, 89, 96, 102, 108, 115, 121, 128,
|
||||
private static readonly uint[] CountToUpdateFactor =
|
||||
{
|
||||
0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64, 70, 76, 83, 89, 96, 102, 108, 115, 121, 128
|
||||
};
|
||||
|
||||
private const int ModeMvCountSat = 20;
|
||||
@@ -41,14 +40,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
return preProb;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint count = Math.Min(den, ModeMvCountSat);
|
||||
uint factor = _countToUpdateFactor[(int)count];
|
||||
byte prob = GetProb(ct0, den);
|
||||
|
||||
return WeightedProb(preProb, prob, (int)factor);
|
||||
}
|
||||
uint count = Math.Min(den, ModeMvCountSat);
|
||||
uint factor = CountToUpdateFactor[(int)count];
|
||||
byte prob = GetProb(ct0, den);
|
||||
return WeightedProb(preProb, prob, (int)factor);
|
||||
}
|
||||
|
||||
private static uint TreeMergeProbsImpl(
|
||||
@@ -59,17 +55,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Span<byte> probs)
|
||||
{
|
||||
int l = tree[i];
|
||||
uint leftCount = (l <= 0) ? counts[-l] : TreeMergeProbsImpl((uint)l, tree, preProbs, counts, probs);
|
||||
uint leftCount = l <= 0 ? counts[-l] : TreeMergeProbsImpl((uint)l, tree, preProbs, counts, probs);
|
||||
int r = tree[i + 1];
|
||||
uint rightCount = (r <= 0) ? counts[-r] : TreeMergeProbsImpl((uint)r, tree, preProbs, counts, probs);
|
||||
uint rightCount = r <= 0 ? counts[-r] : TreeMergeProbsImpl((uint)r, tree, preProbs, counts, probs);
|
||||
probs[(int)(i >> 1)] = ModeMvMergeProbs(preProbs[(int)(i >> 1)], leftCount, rightCount);
|
||||
|
||||
return leftCount + rightCount;
|
||||
}
|
||||
|
||||
public static void TreeMergeProbs(sbyte[] tree, ReadOnlySpan<byte> preProbs, ReadOnlySpan<uint> counts, Span<byte> probs)
|
||||
public static void TreeMergeProbs(sbyte[] tree, ReadOnlySpan<byte> preProbs, ReadOnlySpan<uint> counts,
|
||||
Span<byte> probs)
|
||||
{
|
||||
TreeMergeProbsImpl(0, tree, preProbs, counts, probs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Types;
|
||||
using System;
|
||||
using System.Buffers.Binary;
|
||||
|
||||
@@ -6,18 +7,18 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
internal struct Reader
|
||||
{
|
||||
private static readonly byte[] _norm = {
|
||||
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
private static readonly byte[] Norm =
|
||||
{
|
||||
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
private const int BdValueSize = sizeof(ulong) * 8;
|
||||
|
||||
// This is meant to be a large, positive constant that can still be efficiently
|
||||
@@ -36,16 +37,13 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
_buffer = new ArrayPtr<byte>(ref buffer[0], size);
|
||||
Value = 0;
|
||||
Count = -8;
|
||||
Range = 255;
|
||||
Fill();
|
||||
|
||||
return ReadBit() != 0; // Marker bit
|
||||
}
|
||||
_buffer = new ArrayPtr<byte>(ref buffer[0], size);
|
||||
Value = 0;
|
||||
Count = -8;
|
||||
Range = 255;
|
||||
Fill();
|
||||
return ReadBit() != 0; // Marker bit
|
||||
}
|
||||
|
||||
private void Fill()
|
||||
@@ -65,7 +63,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
ulong bigEndianValues = BinaryPrimitives.ReadUInt64BigEndian(buffer);
|
||||
nv = bigEndianValues >> (BdValueSize - bits);
|
||||
count += bits;
|
||||
buffer = buffer[(bits >> 3)..];
|
||||
buffer = buffer.Slice(bits >> 3);
|
||||
value = Value | (nv << (shift & 0x7));
|
||||
}
|
||||
else
|
||||
@@ -84,7 +82,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
{
|
||||
count += 8;
|
||||
value |= (ulong)buffer[0] << shift;
|
||||
buffer = buffer[1..];
|
||||
buffer = buffer.Slice(1);
|
||||
shift -= 8;
|
||||
}
|
||||
}
|
||||
@@ -98,7 +96,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Count = count;
|
||||
}
|
||||
|
||||
public readonly bool HasError()
|
||||
public bool HasError()
|
||||
{
|
||||
// Check if we have reached the end of the buffer.
|
||||
//
|
||||
@@ -124,7 +122,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
ulong bigsplit;
|
||||
int count;
|
||||
uint range;
|
||||
uint split = (Range * (uint)prob + (256 - (uint)prob)) >> 8;
|
||||
uint split = ((Range * (uint)prob) + (256 - (uint)prob)) >> 8;
|
||||
|
||||
if (Count < 0)
|
||||
{
|
||||
@@ -146,7 +144,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
}
|
||||
|
||||
{
|
||||
int shift = _norm[range];
|
||||
int shift = Norm[range];
|
||||
range <<= shift;
|
||||
value <<= shift;
|
||||
count -= shift;
|
||||
@@ -188,7 +186,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
public int ReadBool(int prob, ref ulong value, ref int count, ref uint range)
|
||||
{
|
||||
uint split = (range * (uint)prob + (256 - (uint)prob)) >> 8;
|
||||
uint split = ((range * (uint)prob) + (256 - (uint)prob)) >> 8;
|
||||
ulong bigsplit = (ulong)split << (BdValueSize - 8);
|
||||
|
||||
if (count < 0)
|
||||
@@ -202,19 +200,20 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
|
||||
if (value >= bigsplit)
|
||||
{
|
||||
range -= split;
|
||||
value -= bigsplit;
|
||||
range = range - split;
|
||||
value = value - bigsplit;
|
||||
{
|
||||
int shift = _norm[range];
|
||||
int shift = Norm[range];
|
||||
range <<= shift;
|
||||
value <<= shift;
|
||||
count -= shift;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
range = split;
|
||||
{
|
||||
int shift = _norm[range];
|
||||
int shift = Norm[range];
|
||||
range <<= shift;
|
||||
value <<= shift;
|
||||
count -= shift;
|
||||
@@ -230,7 +229,82 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
Count -= 8;
|
||||
_buffer = _buffer.Slice(-1);
|
||||
}
|
||||
|
||||
return _buffer;
|
||||
}
|
||||
|
||||
private int DecodeUniform()
|
||||
{
|
||||
const int l = 8;
|
||||
const int m = (1 << l) - 191;
|
||||
int v = ReadLiteral(l - 1);
|
||||
return v < m ? v : (v << 1) - m + ReadBit();
|
||||
}
|
||||
|
||||
public int DecodeTermSubexp()
|
||||
{
|
||||
if (ReadBit() == 0)
|
||||
{
|
||||
return ReadLiteral(4);
|
||||
}
|
||||
|
||||
if (ReadBit() == 0)
|
||||
{
|
||||
return ReadLiteral(4) + 16;
|
||||
}
|
||||
|
||||
if (ReadBit() == 0)
|
||||
{
|
||||
return ReadLiteral(5) + 32;
|
||||
}
|
||||
|
||||
return DecodeUniform() + 64;
|
||||
}
|
||||
|
||||
public TxMode ReadTxMode()
|
||||
{
|
||||
TxMode txMode = (TxMode)ReadLiteral(2);
|
||||
if (txMode == TxMode.Allow32x32)
|
||||
{
|
||||
txMode += ReadBit();
|
||||
}
|
||||
|
||||
return txMode;
|
||||
}
|
||||
|
||||
public int ReadCoeff(
|
||||
ReadOnlySpan<byte> probs,
|
||||
int n,
|
||||
ref ulong value,
|
||||
ref int count,
|
||||
ref uint range)
|
||||
{
|
||||
int val = 0;
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
val = (val << 1) | ReadBool(probs[i], ref value, ref count, ref range);
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
public void DiffUpdateProb(ref byte p)
|
||||
{
|
||||
if (Read(Entropy.DiffUpdateProb) != 0)
|
||||
{
|
||||
p = (byte)DSubExp.InvRemapProb(DecodeTermSubexp(), p);
|
||||
}
|
||||
}
|
||||
|
||||
public void UpdateMvProbs(Span<byte> p, int n)
|
||||
{
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
if (Read(EntropyMv.UpdateProb) != 0)
|
||||
{
|
||||
p[i] = (byte)((ReadLiteral(7) << 1) | 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -13,42 +13,42 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
|
||||
// for (int i = 1; i < 32; ++i)
|
||||
// Console.WriteLine("public const short CosPi{0}_64 = {1};", i, MathF.Round(16384 * MathF.Cos(i * MathF.PI / 64)));
|
||||
// Note: sin(k * Pi / 64) = cos((32 - k) * Pi / 64)
|
||||
public const short CosPi1_64 = 16364;
|
||||
public const short CosPi2_64 = 16305;
|
||||
public const short CosPi3_64 = 16207;
|
||||
public const short CosPi4_64 = 16069;
|
||||
public const short CosPi5_64 = 15893;
|
||||
public const short CosPi6_64 = 15679;
|
||||
public const short CosPi7_64 = 15426;
|
||||
public const short CosPi8_64 = 15137;
|
||||
public const short CosPi9_64 = 14811;
|
||||
public const short CosPi10_64 = 14449;
|
||||
public const short CosPi11_64 = 14053;
|
||||
public const short CosPi12_64 = 13623;
|
||||
public const short CosPi13_64 = 13160;
|
||||
public const short CosPi14_64 = 12665;
|
||||
public const short CosPi15_64 = 12140;
|
||||
public const short CosPi16_64 = 11585;
|
||||
public const short CosPi17_64 = 11003;
|
||||
public const short CosPi18_64 = 10394;
|
||||
public const short CosPi19_64 = 9760;
|
||||
public const short CosPi20_64 = 9102;
|
||||
public const short CosPi21_64 = 8423;
|
||||
public const short CosPi22_64 = 7723;
|
||||
public const short CosPi23_64 = 7005;
|
||||
public const short CosPi24_64 = 6270;
|
||||
public const short CosPi25_64 = 5520;
|
||||
public const short CosPi26_64 = 4756;
|
||||
public const short CosPi27_64 = 3981;
|
||||
public const short CosPi28_64 = 3196;
|
||||
public const short CosPi29_64 = 2404;
|
||||
public const short CosPi30_64 = 1606;
|
||||
public const short CosPi31_64 = 804;
|
||||
public const short CosPi164 = 16364;
|
||||
public const short CosPi264 = 16305;
|
||||
public const short CosPi364 = 16207;
|
||||
public const short CosPi464 = 16069;
|
||||
public const short CosPi564 = 15893;
|
||||
public const short CosPi664 = 15679;
|
||||
public const short CosPi764 = 15426;
|
||||
public const short CosPi864 = 15137;
|
||||
public const short CosPi964 = 14811;
|
||||
public const short CosPi1064 = 14449;
|
||||
public const short CosPi1164 = 14053;
|
||||
public const short CosPi1264 = 13623;
|
||||
public const short CosPi1364 = 13160;
|
||||
public const short CosPi1464 = 12665;
|
||||
public const short CosPi1564 = 12140;
|
||||
public const short CosPi1664 = 11585;
|
||||
public const short CosPi1764 = 11003;
|
||||
public const short CosPi1864 = 10394;
|
||||
public const short CosPi1964 = 9760;
|
||||
public const short CosPi2064 = 9102;
|
||||
public const short CosPi2164 = 8423;
|
||||
public const short CosPi2264 = 7723;
|
||||
public const short CosPi2364 = 7005;
|
||||
public const short CosPi2464 = 6270;
|
||||
public const short CosPi2564 = 5520;
|
||||
public const short CosPi2664 = 4756;
|
||||
public const short CosPi2764 = 3981;
|
||||
public const short CosPi2864 = 3196;
|
||||
public const short CosPi2964 = 2404;
|
||||
public const short CosPi3064 = 1606;
|
||||
public const short CosPi3164 = 804;
|
||||
|
||||
// 16384 * sqrt(2) * sin(kPi / 9) * 2 / 3
|
||||
public const short SinPi1_9 = 5283;
|
||||
public const short SinPi2_9 = 9929;
|
||||
public const short SinPi3_9 = 13377;
|
||||
public const short SinPi4_9 = 15212;
|
||||
public const short SinPi19 = 5283;
|
||||
public const short SinPi29 = 9929;
|
||||
public const short SinPi39 = 13377;
|
||||
public const short SinPi49 = 15212;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user