Implement VP9 loop filtering (#550)

Unmerged PR from OG Ryujinx (#4367). From @gdkchan:

> The main goal of this change is porting the loop filtering from
libvpx, which should fix the block artifacts on some VP9 videos on games
using NVDEC to decode them. In addition to that, there are two other
changes:
> 
> - The remaining decoder code required to decode a VP9 video (with
headers included) has been added. That was done because it's much better
to test the decoder standalone with a video file. I decided to keep that
code on the emulator, even if some of it is unused, since it makes
standalone testing easier in the future too, and we can include unit
tests with video files.
> - Large refactoring of both new and existing code to conform with our
conding [sic] styles, done by @TSRBerry (thanks!) Some of it has been
automated.
> 
> Since we had no loop filtering before, this change will make video
decoding slower. That may cause frame drop etc if the decoder is not
fast enough in some games. I plan to optimize the decoder more in the
future to make up for that, but if possible I'd prefer to not do it as
part of this PR, but if the perf loss is too severe I might consider.
> 
> This will need to be tested on games that had the block artifacts, it
would be nice to confirm if they match hardware now, and get some
before/after screenshots etc.

Comment from @Bjorn29512:

> Significantly improves the block artifacts in FE: Engage.
> 
> Before:
>
![](https://user-images.githubusercontent.com/110204265/216882414-ec88dbda-7544-4490-8a47-37f074056ae3.png)
> 
> After:
>
![](https://user-images.githubusercontent.com/110204265/216882478-4e81fead-1033-4877-b282-f9cac6d6aa3b.png)

---------

Co-authored-by: gdkchan <gab.dark.100@gmail.com>
Co-authored-by: TSR Berry <20988865+TSRBerry@users.noreply.github.com>
This commit is contained in:
Keaton
2025-02-18 20:59:36 -06:00
committed by GitHub
parent 920933bc9f
commit f91cd05260
79 changed files with 11343 additions and 3036 deletions

View File

@@ -1,4 +1,4 @@
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using Ryujinx.Graphics.Nvdec.Vp9.Common;
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
@@ -6,22 +6,22 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
private static unsafe ref byte Dst(byte* dst, int stride, int x, int y)
{
return ref dst[x + y * stride];
return ref dst[x + (y * stride)];
}
private static unsafe ref ushort Dst(ushort* dst, int stride, int x, int y)
{
return ref dst[x + y * stride];
return ref dst[x + (y * stride)];
}
private static byte Avg3(byte a, byte b, byte c)
{
return (byte)((a + 2 * b + c + 2) >> 2);
return (byte)((a + (2 * b) + c + 2) >> 2);
}
private static ushort Avg3(ushort a, ushort b, ushort c)
{
return (ushort)((a + 2 * b + c + 2) >> 2);
return (ushort)((a + (2 * b) + c + 2) >> 2);
}
private static byte Avg2(byte a, byte b)
@@ -51,9 +51,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void D207Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int r, c;
// First column
for (r = 0; r < bs - 1; ++r)
for (int r = 0; r < bs - 1; ++r)
{
dst[r * stride] = Avg2(left[r], left[r + 1]);
}
@@ -62,7 +61,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
dst++;
// Second column
for (r = 0; r < bs - 2; ++r)
for (int r = 0; r < bs - 2; ++r)
{
dst[r * stride] = Avg3(left[r], left[r + 1], left[r + 2]);
}
@@ -72,16 +71,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
dst++;
// Rest of last row
for (c = 0; c < bs - 2; ++c)
for (int c = 0; c < bs - 2; ++c)
{
dst[(bs - 1) * stride + c] = left[bs - 1];
dst[((bs - 1) * stride) + c] = left[bs - 1];
}
for (r = bs - 2; r >= 0; --r)
for (int r = bs - 2; r >= 0; --r)
{
for (c = 0; c < bs - 2; ++c)
for (int c = 0; c < bs - 2; ++c)
{
dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
dst[(r * stride) + c] = dst[((r + 1) * stride) + c - 2];
}
}
}
@@ -103,19 +102,18 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void D63Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int r, c;
int size;
for (c = 0; c < bs; ++c)
for (int c = 0; c < bs; ++c)
{
dst[c] = Avg2(above[c], above[c + 1]);
dst[stride + c] = Avg3(above[c], above[c + 1], above[c + 2]);
}
for (r = 2, size = bs - 2; r < bs; r += 2, --size)
for (int r = 2, size = bs - 2; r < bs; r += 2, --size)
{
MemoryUtil.Copy(dst + (r + 0) * stride, dst + (r >> 1), size);
MemoryUtil.Fill(dst + (r + 0) * stride + size, above[bs - 1], bs - size);
MemoryUtil.Copy(dst + (r + 1) * stride, dst + stride + (r >> 1), size);
MemoryUtil.Fill(dst + (r + 1) * stride + size, above[bs - 1], bs - size);
MemoryUtil.Copy(dst + ((r + 0) * stride), dst + (r >> 1), size);
MemoryUtil.Fill(dst + ((r + 0) * stride) + size, above[bs - 1], bs - size);
MemoryUtil.Copy(dst + ((r + 1) * stride), dst + stride + (r >> 1), size);
MemoryUtil.Fill(dst + ((r + 1) * stride) + size, above[bs - 1], bs - size);
}
}
@@ -138,15 +136,15 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
byte aboveRight = above[bs - 1];
byte* dstRow0 = dst;
int x, size;
for (x = 0; x < bs - 1; ++x)
for (int x = 0; x < bs - 1; ++x)
{
dst[x] = Avg3(above[x], above[x + 1], above[x + 2]);
}
dst[bs - 1] = aboveRight;
dst += stride;
for (x = 1, size = bs - 2; x < bs; ++x, --size)
for (int x = 1, size = bs - 2; x < bs; ++x, --size)
{
MemoryUtil.Copy(dst, dstRow0 + x, size);
MemoryUtil.Fill(dst + size, aboveRight, x + 1);
@@ -171,10 +169,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void D117Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int r, c;
// First row
for (c = 0; c < bs; c++)
for (int c = 0; c < bs; c++)
{
dst[c] = Avg2(above[c - 1], above[c]);
}
@@ -183,7 +179,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
// Second row
dst[0] = Avg3(left[0], above[-1], above[0]);
for (c = 1; c < bs; c++)
for (int c = 1; c < bs; c++)
{
dst[c] = Avg3(above[c - 2], above[c - 1], above[c]);
}
@@ -192,17 +188,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
// The rest of first col
dst[0] = Avg3(above[-1], left[0], left[1]);
for (r = 3; r < bs; ++r)
for (int r = 3; r < bs; ++r)
{
dst[(r - 2) * stride] = Avg3(left[r - 3], left[r - 2], left[r - 1]);
}
// The rest of the block
for (r = 2; r < bs; ++r)
for (int r = 2; r < bs; ++r)
{
for (c = 1; c < bs; c++)
for (int c = 1; c < bs; c++)
{
dst[c] = dst[-2 * stride + c - 1];
dst[c] = dst[(-2 * stride) + c - 1];
}
dst += stride;
@@ -226,26 +222,26 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void D135Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int i;
byte* border = stackalloc byte[32 + 32 - 1]; // outer border from bottom-left to top-right
// Dst(dst, stride, bs, bs - 2)[0], i.e., border starting at bottom-left
for (i = 0; i < bs - 2; ++i)
for (int i = 0; i < bs - 2; ++i)
{
border[i] = Avg3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]);
}
border[bs - 2] = Avg3(above[-1], left[0], left[1]);
border[bs - 1] = Avg3(left[0], above[-1], above[0]);
border[bs - 0] = Avg3(above[-1], above[0], above[1]);
// dst[0][2, size), i.e., remaining top border ascending
for (i = 0; i < bs - 2; ++i)
for (int i = 0; i < bs - 2; ++i)
{
border[bs + 1 + i] = Avg3(above[i], above[i + 1], above[i + 2]);
}
for (i = 0; i < bs; ++i)
for (int i = 0; i < bs; ++i)
{
MemoryUtil.Copy(dst + i * stride, border + bs - 1 - i, bs);
MemoryUtil.Copy(dst + (i * stride), border + bs - 1 - i, bs);
}
}
@@ -266,9 +262,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void D153Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int r, c;
dst[0] = Avg2(above[-1], left[0]);
for (r = 1; r < bs; r++)
for (int r = 1; r < bs; r++)
{
dst[r * stride] = Avg2(left[r - 1], left[r]);
}
@@ -277,23 +272,23 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
dst[0] = Avg3(left[0], above[-1], above[0]);
dst[stride] = Avg3(above[-1], left[0], left[1]);
for (r = 2; r < bs; r++)
for (int r = 2; r < bs; r++)
{
dst[r * stride] = Avg3(left[r - 2], left[r - 1], left[r]);
}
dst++;
for (c = 0; c < bs - 2; c++)
for (int c = 0; c < bs - 2; c++)
{
dst[c] = Avg3(above[c - 1], above[c], above[c + 1]);
}
dst += stride;
for (r = 1; r < bs; ++r)
for (int r = 1; r < bs; ++r)
{
for (c = 0; c < bs - 2; c++)
for (int c = 0; c < bs - 2; c++)
{
dst[c] = dst[-stride + c - 2];
}
@@ -324,9 +319,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void VPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int r;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Copy(dst, above, bs);
dst += stride;
@@ -355,43 +348,40 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void HPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int r;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Fill(dst, left[r], bs);
dst += stride;
}
}
public static unsafe void TMPredictor4x4(byte* dst, int stride, byte* above, byte* left)
public static unsafe void TmPredictor4x4(byte* dst, int stride, byte* above, byte* left)
{
TMPredictor(dst, stride, 4, above, left);
TmPredictor(dst, stride, 4, above, left);
}
public static unsafe void TMPredictor8x8(byte* dst, int stride, byte* above, byte* left)
public static unsafe void TmPredictor8x8(byte* dst, int stride, byte* above, byte* left)
{
TMPredictor(dst, stride, 8, above, left);
TmPredictor(dst, stride, 8, above, left);
}
public static unsafe void TMPredictor16x16(byte* dst, int stride, byte* above, byte* left)
public static unsafe void TmPredictor16x16(byte* dst, int stride, byte* above, byte* left)
{
TMPredictor(dst, stride, 16, above, left);
TmPredictor(dst, stride, 16, above, left);
}
public static unsafe void TMPredictor32x32(byte* dst, int stride, byte* above, byte* left)
public static unsafe void TmPredictor32x32(byte* dst, int stride, byte* above, byte* left)
{
TMPredictor(dst, stride, 32, above, left);
TmPredictor(dst, stride, 32, above, left);
}
private static unsafe void TMPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
private static unsafe void TmPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int r, c;
int yTopLeft = above[-1];
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
for (c = 0; c < bs; c++)
for (int c = 0; c < bs; c++)
{
dst[c] = BitUtils.ClipPixel(left[r] + above[c] - yTopLeft);
}
@@ -422,9 +412,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void Dc128Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int r;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Fill(dst, (byte)128, bs);
dst += stride;
@@ -453,16 +441,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void DcLeftPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int i, r, expectedDc, sum = 0;
int expectedDc, sum = 0;
for (i = 0; i < bs; i++)
for (int i = 0; i < bs; i++)
{
sum += left[i];
}
expectedDc = (sum + (bs >> 1)) / bs;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Fill(dst, (byte)expectedDc, bs);
dst += stride;
@@ -491,16 +479,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void DcTopPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int i, r, expectedDc, sum = 0;
int expectedDc, sum = 0;
for (i = 0; i < bs; i++)
for (int i = 0; i < bs; i++)
{
sum += above[i];
}
expectedDc = (sum + (bs >> 1)) / bs;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Fill(dst, (byte)expectedDc, bs);
dst += stride;
@@ -529,10 +517,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
private static unsafe void DcPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
{
int i, r, expectedDc, sum = 0;
int expectedDc, sum = 0;
int count = 2 * bs;
for (i = 0; i < bs; i++)
for (int i = 0; i < bs; i++)
{
sum += above[i];
sum += left[i];
@@ -540,7 +528,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
expectedDc = (sum + (count >> 1)) / count;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Fill(dst, (byte)expectedDc, bs);
dst += stride;
@@ -555,10 +543,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
byte k = left[2];
byte l = left[3];
MemoryUtil.Fill(dst + stride * 0, Avg3(h, I, j), 4);
MemoryUtil.Fill(dst + stride * 1, Avg3(I, j, k), 4);
MemoryUtil.Fill(dst + stride * 2, Avg3(j, k, l), 4);
MemoryUtil.Fill(dst + stride * 3, Avg3(k, l, l), 4);
MemoryUtil.Fill(dst + (stride * 0), Avg3(h, I, j), 4);
MemoryUtil.Fill(dst + (stride * 1), Avg3(I, j, k), 4);
MemoryUtil.Fill(dst + (stride * 2), Avg3(j, k, l), 4);
MemoryUtil.Fill(dst + (stride * 3), Avg3(k, l, l), 4);
}
public static unsafe void VePredictor4x4(byte* dst, int stride, byte* above, byte* left)
@@ -574,9 +562,9 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
dst[1] = Avg3(I, j, k);
dst[2] = Avg3(j, k, l);
dst[3] = Avg3(k, l, m);
MemoryUtil.Copy(dst + stride * 1, dst, 4);
MemoryUtil.Copy(dst + stride * 2, dst, 4);
MemoryUtil.Copy(dst + stride * 3, dst, 4);
MemoryUtil.Copy(dst + (stride * 1), dst, 4);
MemoryUtil.Copy(dst + (stride * 2), dst, 4);
MemoryUtil.Copy(dst + (stride * 3), dst, 4);
}
public static unsafe void D207Predictor4x4(byte* dst, int stride, byte* above, byte* left)
@@ -591,7 +579,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Dst(dst, stride, 1, 0) = Avg3(I, j, k);
Dst(dst, stride, 3, 0) = Dst(dst, stride, 1, 1) = Avg3(j, k, l);
Dst(dst, stride, 3, 1) = Dst(dst, stride, 1, 2) = Avg3(k, l, l);
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 0, 3) = Dst(dst, stride, 1, 3) = Dst(dst, stride, 2, 3) = Dst(dst, stride, 3, 3) = l;
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 0, 3) =
Dst(dst, stride, 1, 3) = Dst(dst, stride, 2, 3) = Dst(dst, stride, 3, 3) = l;
}
public static unsafe void D63Predictor4x4(byte* dst, int stride, byte* above, byte* left)
@@ -616,7 +605,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Dst(dst, stride, 3, 3) = Avg3(e, f, g); // Differs from vp8
}
public static unsafe void D63ePredictor4x4(byte* dst, int stride, byte* above, byte* left)
public static unsafe void D63EPredictor4x4(byte* dst, int stride, byte* above, byte* left)
{
byte a = above[0];
byte b = above[1];
@@ -652,13 +641,14 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Dst(dst, stride, 0, 0) = Avg3(a, b, c);
Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 1) = Avg3(b, c, d);
Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 2) = Avg3(c, d, e);
Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
Dst(dst, stride, 3, 0) =
Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 3) = Avg3(e, f, g);
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 3) = Avg3(f, g, h);
Dst(dst, stride, 3, 3) = h; // differs from vp8
}
public static unsafe void D45ePredictor4x4(byte* dst, int stride, byte* above, byte* left)
public static unsafe void D45EPredictor4x4(byte* dst, int stride, byte* above, byte* left)
{
byte a = above[0];
byte b = above[1];
@@ -671,7 +661,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Dst(dst, stride, 0, 0) = Avg3(a, b, c);
Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 1) = Avg3(b, c, d);
Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 2) = Avg3(c, d, e);
Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
Dst(dst, stride, 3, 0) =
Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 3) = Avg3(e, f, g);
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 3) = Avg3(f, g, h);
Dst(dst, stride, 3, 3) = Avg3(g, h, h);
@@ -714,7 +705,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Dst(dst, stride, 0, 3) = Avg3(j, k, l);
Dst(dst, stride, 1, 3) = Dst(dst, stride, 0, 2) = Avg3(I, j, k);
Dst(dst, stride, 2, 3) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 1) = Avg3(x, I, j);
Dst(dst, stride, 3, 3) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 0) = Avg3(a, x, I);
Dst(dst, stride, 3, 3) =
Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 0) = Avg3(a, x, I);
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 0) = Avg3(b, a, x);
Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 0) = Avg3(c, b, a);
Dst(dst, stride, 3, 0) = Avg3(d, c, b);
@@ -758,38 +750,39 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdD207Predictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdD207Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdD207Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int r, c;
// First column.
for (r = 0; r < bs - 1; ++r)
for (int r = 0; r < bs - 1; ++r)
{
dst[r * stride] = Avg2(left[r], left[r + 1]);
}
dst[(bs - 1) * stride] = left[bs - 1];
dst++;
// Second column.
for (r = 0; r < bs - 2; ++r)
for (int r = 0; r < bs - 2; ++r)
{
dst[r * stride] = Avg3(left[r], left[r + 1], left[r + 2]);
}
dst[(bs - 2) * stride] = Avg3(left[bs - 2], left[bs - 1], left[bs - 1]);
dst[(bs - 1) * stride] = left[bs - 1];
dst++;
// Rest of last row.
for (c = 0; c < bs - 2; ++c)
for (int c = 0; c < bs - 2; ++c)
{
dst[(bs - 1) * stride + c] = left[bs - 1];
dst[((bs - 1) * stride) + c] = left[bs - 1];
}
for (r = bs - 2; r >= 0; --r)
for (int r = bs - 2; r >= 0; --r)
{
for (c = 0; c < bs - 2; ++c)
for (int c = 0; c < bs - 2; ++c)
{
dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
dst[(r * stride) + c] = dst[((r + 1) * stride) + c - 2];
}
}
}
@@ -809,21 +802,21 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdD63Predictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdD63Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdD63Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int r, c;
int size;
for (c = 0; c < bs; ++c)
for (int c = 0; c < bs; ++c)
{
dst[c] = Avg2(above[c], above[c + 1]);
dst[stride + c] = Avg3(above[c], above[c + 1], above[c + 2]);
}
for (r = 2, size = bs - 2; r < bs; r += 2, --size)
for (int r = 2, size = bs - 2; r < bs; r += 2, --size)
{
MemoryUtil.Copy(dst + (r + 0) * stride, dst + (r >> 1), size);
MemoryUtil.Fill(dst + (r + 0) * stride + size, above[bs - 1], bs - size);
MemoryUtil.Copy(dst + (r + 1) * stride, dst + stride + (r >> 1), size);
MemoryUtil.Fill(dst + (r + 1) * stride + size, above[bs - 1], bs - size);
MemoryUtil.Copy(dst + ((r + 0) * stride), dst + (r >> 1), size);
MemoryUtil.Fill(dst + ((r + 0) * stride) + size, above[bs - 1], bs - size);
MemoryUtil.Copy(dst + ((r + 1) * stride), dst + stride + (r >> 1), size);
MemoryUtil.Fill(dst + ((r + 1) * stride) + size, above[bs - 1], bs - size);
}
}
@@ -842,19 +835,20 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdD45Predictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdD45Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdD45Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
ushort aboveRight = above[bs - 1];
ushort* dstRow0 = dst;
int x, size;
for (x = 0; x < bs - 1; ++x)
for (int x = 0; x < bs - 1; ++x)
{
dst[x] = Avg3(above[x], above[x + 1], above[x + 2]);
}
dst[bs - 1] = aboveRight;
dst += stride;
for (x = 1, size = bs - 2; x < bs; ++x, --size)
for (int x = 1, size = bs - 2; x < bs; ++x, --size)
{
MemoryUtil.Copy(dst, dstRow0 + x, size);
MemoryUtil.Fill(dst + size, aboveRight, x + 1);
@@ -877,12 +871,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdD117Predictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdD117Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdD117Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int r, c;
// First row
for (c = 0; c < bs; c++)
for (int c = 0; c < bs; c++)
{
dst[c] = Avg2(above[c - 1], above[c]);
}
@@ -891,7 +884,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
// Second row
dst[0] = Avg3(left[0], above[-1], above[0]);
for (c = 1; c < bs; c++)
for (int c = 1; c < bs; c++)
{
dst[c] = Avg3(above[c - 2], above[c - 1], above[c]);
}
@@ -900,17 +893,17 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
// The rest of first col
dst[0] = Avg3(above[-1], left[0], left[1]);
for (r = 3; r < bs; ++r)
for (int r = 3; r < bs; ++r)
{
dst[(r - 2) * stride] = Avg3(left[r - 3], left[r - 2], left[r - 1]);
}
// The rest of the block
for (r = 2; r < bs; ++r)
for (int r = 2; r < bs; ++r)
{
for (c = 1; c < bs; c++)
for (int c = 1; c < bs; c++)
{
dst[c] = dst[-2 * stride + c - 1];
dst[c] = dst[(-2 * stride) + c - 1];
}
dst += stride;
@@ -932,28 +925,29 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdD135Predictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdD135Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdD135Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int i;
ushort* border = stackalloc ushort[32 + 32 - 1]; // Outer border from bottom-left to top-right
// Dst(dst, stride, bs, bs - 2)[0], i.e., border starting at bottom-left
for (i = 0; i < bs - 2; ++i)
for (int i = 0; i < bs - 2; ++i)
{
border[i] = Avg3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]);
}
border[bs - 2] = Avg3(above[-1], left[0], left[1]);
border[bs - 1] = Avg3(left[0], above[-1], above[0]);
border[bs - 0] = Avg3(above[-1], above[0], above[1]);
// dst[0][2, size), i.e., remaining top border ascending
for (i = 0; i < bs - 2; ++i)
for (int i = 0; i < bs - 2; ++i)
{
border[bs + 1 + i] = Avg3(above[i], above[i + 1], above[i + 2]);
}
for (i = 0; i < bs; ++i)
for (int i = 0; i < bs; ++i)
{
MemoryUtil.Copy(dst + i * stride, border + bs - 1 - i, bs);
MemoryUtil.Copy(dst + (i * stride), border + bs - 1 - i, bs);
}
}
@@ -972,11 +966,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdD153Predictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdD153Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdD153Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int r, c;
dst[0] = Avg2(above[-1], left[0]);
for (r = 1; r < bs; r++)
for (int r = 1; r < bs; r++)
{
dst[r * stride] = Avg2(left[r - 1], left[r]);
}
@@ -985,23 +979,23 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
dst[0] = Avg3(left[0], above[-1], above[0]);
dst[stride] = Avg3(above[-1], left[0], left[1]);
for (r = 2; r < bs; r++)
for (int r = 2; r < bs; r++)
{
dst[r * stride] = Avg3(left[r - 2], left[r - 1], left[r]);
}
dst++;
for (c = 0; c < bs - 2; c++)
for (int c = 0; c < bs - 2; c++)
{
dst[c] = Avg3(above[c - 1], above[c], above[c + 1]);
}
dst += stride;
for (r = 1; r < bs; ++r)
for (int r = 1; r < bs; ++r)
{
for (c = 0; c < bs - 2; c++)
for (int c = 0; c < bs - 2; c++)
{
dst[c] = dst[-stride + c - 2];
}
@@ -1030,10 +1024,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdVPredictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdVPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdVPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int r;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Copy(dst, above, bs);
dst += stride;
@@ -1060,44 +1054,44 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdHPredictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdHPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdHPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int r;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Fill(dst, left[r], bs);
dst += stride;
}
}
public static unsafe void HighbdTMPredictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
public static unsafe void HighbdTmPredictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
{
HighbdTMPredictor(dst, stride, 4, above, left, bd);
HighbdTmPredictor(dst, stride, 4, above, left, bd);
}
public static unsafe void HighbdTMPredictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd)
public static unsafe void HighbdTmPredictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd)
{
HighbdTMPredictor(dst, stride, 8, above, left, bd);
HighbdTmPredictor(dst, stride, 8, above, left, bd);
}
public static unsafe void HighbdTMPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
public static unsafe void HighbdTmPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
{
HighbdTMPredictor(dst, stride, 16, above, left, bd);
HighbdTmPredictor(dst, stride, 16, above, left, bd);
}
public static unsafe void HighbdTMPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
public static unsafe void HighbdTmPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
{
HighbdTMPredictor(dst, stride, 32, above, left, bd);
HighbdTmPredictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdTMPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdTmPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int r, c;
int yTopLeft = above[-1];
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
for (c = 0; c < bs; c++)
for (int c = 0; c < bs; c++)
{
dst[c] = BitUtils.ClipPixelHighbd(left[r] + above[c] - yTopLeft, bd);
}
@@ -1116,21 +1110,22 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdDc128Predictor(dst, stride, 8, above, left, bd);
}
public static unsafe void HighbdDc128Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
public static unsafe void HighbdDc128Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left,
int bd)
{
HighbdDc128Predictor(dst, stride, 16, above, left, bd);
}
public static unsafe void HighbdDc128Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
public static unsafe void HighbdDc128Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left,
int bd)
{
HighbdDc128Predictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdDc128Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdDc128Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int r;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Fill(dst, (ushort)(128 << (bd - 8)), bs);
dst += stride;
@@ -1147,28 +1142,31 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdDcLeftPredictor(dst, stride, 8, above, left, bd);
}
public static unsafe void HighbdDcLeftPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
public static unsafe void HighbdDcLeftPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left,
int bd)
{
HighbdDcLeftPredictor(dst, stride, 16, above, left, bd);
}
public static unsafe void HighbdDcLeftPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
public static unsafe void HighbdDcLeftPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left,
int bd)
{
HighbdDcLeftPredictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdDcLeftPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdDcLeftPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int i, r, expectedDc, sum = 0;
int expectedDc, sum = 0;
for (i = 0; i < bs; i++)
for (int i = 0; i < bs; i++)
{
sum += left[i];
}
expectedDc = (sum + (bs >> 1)) / bs;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Fill(dst, (ushort)expectedDc, bs);
dst += stride;
@@ -1185,28 +1183,31 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdDcTopPredictor(dst, stride, 8, above, left, bd);
}
public static unsafe void HighbdDcTopPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
public static unsafe void HighbdDcTopPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left,
int bd)
{
HighbdDcTopPredictor(dst, stride, 16, above, left, bd);
}
public static unsafe void HighbdDcTopPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
public static unsafe void HighbdDcTopPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left,
int bd)
{
HighbdDcTopPredictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdDcTopPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdDcTopPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int i, r, expectedDc, sum = 0;
int expectedDc, sum = 0;
for (i = 0; i < bs; i++)
for (int i = 0; i < bs; i++)
{
sum += above[i];
}
expectedDc = (sum + (bs >> 1)) / bs;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Fill(dst, (ushort)expectedDc, bs);
dst += stride;
@@ -1233,12 +1234,13 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
HighbdDcPredictor(dst, stride, 32, above, left, bd);
}
private static unsafe void HighbdDcPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
private static unsafe void HighbdDcPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left,
int bd)
{
int i, r, expectedDc, sum = 0;
int expectedDc, sum = 0;
int count = 2 * bs;
for (i = 0; i < bs; i++)
for (int i = 0; i < bs; i++)
{
sum += above[i];
sum += left[i];
@@ -1246,7 +1248,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
expectedDc = (sum + (count >> 1)) / count;
for (r = 0; r < bs; r++)
for (int r = 0; r < bs; r++)
{
MemoryUtil.Fill(dst, (ushort)expectedDc, bs);
dst += stride;
@@ -1265,7 +1267,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Dst(dst, stride, 1, 0) = Avg3(I, j, k);
Dst(dst, stride, 3, 0) = Dst(dst, stride, 1, 1) = Avg3(j, k, l);
Dst(dst, stride, 3, 1) = Dst(dst, stride, 1, 2) = Avg3(k, l, l);
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 0, 3) = Dst(dst, stride, 1, 3) = Dst(dst, stride, 2, 3) = Dst(dst, stride, 3, 3) = l;
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 0, 3) =
Dst(dst, stride, 1, 3) = Dst(dst, stride, 2, 3) = Dst(dst, stride, 3, 3) = l;
}
public static unsafe void HighbdD63Predictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
@@ -1303,7 +1306,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Dst(dst, stride, 0, 0) = Avg3(a, b, c);
Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 1) = Avg3(b, c, d);
Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 2) = Avg3(c, d, e);
Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
Dst(dst, stride, 3, 0) =
Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 3) = Avg3(e, f, g);
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 3) = Avg3(f, g, h);
Dst(dst, stride, 3, 3) = h; // Differs from vp8
@@ -1346,7 +1350,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Dst(dst, stride, 0, 3) = Avg3(j, k, l);
Dst(dst, stride, 1, 3) = Dst(dst, stride, 0, 2) = Avg3(I, j, k);
Dst(dst, stride, 2, 3) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 1) = Avg3(x, I, j);
Dst(dst, stride, 3, 3) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 0) = Avg3(a, x, I);
Dst(dst, stride, 3, 3) =
Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 0) = Avg3(a, x, I);
Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 0) = Avg3(b, a, x);
Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 0) = Avg3(c, b, a);
Dst(dst, stride, 3, 0) = Avg3(d, c, b);
@@ -1376,4 +1381,4 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
Dst(dst, stride, 1, 3) = Avg3(l, k, j);
}
}
}
}