Implement VP9 loop filtering

This commit is contained in:
gdkchan
2023-02-04 19:44:30 -03:00
committed by KeatonTheBot
parent d135385cab
commit b69b432b4c
79 changed files with 11301 additions and 3006 deletions

View File

@@ -1,4 +1,4 @@
using Ryujinx.Common.Memory;
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Nvdec.Vp9.Dsp;
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using Ryujinx.Graphics.Video;
@@ -17,26 +17,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
private static int GetCoefContext(ReadOnlySpan<short> neighbors, ReadOnlySpan<byte> tokenCache, int c)
{
const int MaxNeighbors = 2;
const int maxNeighbors = 2;
return (1 + tokenCache[neighbors[MaxNeighbors * c + 0]] + tokenCache[neighbors[MaxNeighbors * c + 1]]) >> 1;
}
private static int ReadCoeff(
ref Reader r,
ReadOnlySpan<byte> probs,
int n,
ref ulong value,
ref int count,
ref uint range)
{
int i, val = 0;
for (i = 0; i < n; ++i)
{
val = (val << 1) | r.ReadBool(probs[i], ref value, ref count, ref range);
}
return val;
return (1 + tokenCache[neighbors[(maxNeighbors * c) + 0]] +
tokenCache[neighbors[(maxNeighbors * c) + 1]]) >> 1;
}
private static int DecodeCoefs(
@@ -58,13 +42,15 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
ref Array6<Array6<Array3<byte>>> coefProbs = ref fc.CoefProbs[(int)txSize][(int)type][refr];
Span<byte> tokenCache = stackalloc byte[32 * 32];
ReadOnlySpan<byte> bandTranslate = Luts.GetBandTranslate(txSize);
int dqShift = (txSize == TxSize.Tx32x32) ? 1 : 0;
int dqShift = txSize == TxSize.Tx32x32 ? 1 : 0;
int v;
short dqv = dq[0];
ReadOnlySpan<byte> cat6Prob = (xd.Bd == 12)
? Luts.Vp9Cat6ProbHigh12
: (xd.Bd == 10) ? Luts.Vp9Cat6ProbHigh12[2..] : Luts.Vp9Cat6Prob;
int cat6Bits = (xd.Bd == 12) ? 18 : (xd.Bd == 10) ? 16 : 14;
ReadOnlySpan<byte> cat6Prob = xd.Bd == 12
? Luts.Cat6ProbHigh12
: xd.Bd == 10
? Luts.Cat6ProbHigh12.Slice(2)
: Luts.Cat6Prob;
int cat6Bits = xd.Bd == 12 ? 18 : xd.Bd == 10 ? 16 : 14;
// Keep value, range, and count as locals. The compiler produces better
// results with the locals than using r directly.
ulong value = r.Value;
@@ -75,7 +61,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
{
int val = -1;
band = bandTranslate[0];
bandTranslate = bandTranslate[1..];
bandTranslate = bandTranslate.Slice(1);
ref Array3<byte> prob = ref coefProbs[band][ctx];
if (!xd.Counts.IsNull)
{
@@ -107,18 +93,18 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
r.Value = value;
r.Range = range;
r.Count = count;
return c; // Zero tokens at the end (no eob token)
}
ctx = GetCoefContext(nb, tokenCache, c);
band = bandTranslate[0];
bandTranslate = bandTranslate[1..];
bandTranslate = bandTranslate.Slice(1);
prob = ref coefProbs[band][ctx];
}
if (r.ReadBool(prob[OneContextNode], ref value, ref count, ref range) != 0)
{
ReadOnlySpan<byte> p = Luts.Vp9Pareto8Full[prob[Constants.PivotNode] - 1];
ReadOnlySpan<byte> p = Luts.Pareto8Full[prob[Constants.PivotNode] - 1];
if (!xd.Counts.IsNull)
{
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.TwoToken];
@@ -133,20 +119,24 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
{
if (r.ReadBool(p[7], ref value, ref count, ref range) != 0)
{
val = Constants.Cat6MinVal + ReadCoeff(ref r, cat6Prob, cat6Bits, ref value, ref count, ref range);
val = Constants.Cat6MinVal + r.ReadCoeff(cat6Prob, cat6Bits, ref value,
ref count, ref range);
}
else
{
val = Constants.Cat5MinVal + ReadCoeff(ref r, Luts.Vp9Cat5Prob, 5, ref value, ref count, ref range);
val = Constants.Cat5MinVal + r.ReadCoeff(Luts.Cat5Prob, 5, ref value,
ref count, ref range);
}
}
else if (r.ReadBool(p[6], ref value, ref count, ref range) != 0)
{
val = Constants.Cat4MinVal + ReadCoeff(ref r, Luts.Vp9Cat4Prob, 4, ref value, ref count, ref range);
val = Constants.Cat4MinVal + r.ReadCoeff(Luts.Cat4Prob, 4, ref value, ref count,
ref range);
}
else
{
val = Constants.Cat3MinVal + ReadCoeff(ref r, Luts.Vp9Cat3Prob, 3, ref value, ref count, ref range);
val = Constants.Cat3MinVal + r.ReadCoeff(Luts.Cat3Prob, 3, ref value, ref count,
ref range);
}
}
else
@@ -154,13 +144,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
tokenCache[scan[c]] = 4;
if (r.ReadBool(p[4], ref value, ref count, ref range) != 0)
{
val = Constants.Cat2MinVal + ReadCoeff(ref r, Luts.Vp9Cat2Prob, 2, ref value, ref count, ref range);
val = Constants.Cat2MinVal + r.ReadCoeff(Luts.Cat2Prob, 2, ref value, ref count,
ref range);
}
else
{
val = Constants.Cat1MinVal + ReadCoeff(ref r, Luts.Vp9Cat1Prob, 1, ref value, ref count, ref range);
val = Constants.Cat1MinVal + r.ReadCoeff(Luts.Cat1Prob, 1, ref value, ref count,
ref range);
}
}
// Val may use 18-bits
v = (int)(((long)val * dqv) >> dqShift);
}
@@ -188,7 +181,9 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
tokenCache[scan[c]] = 1;
v = dqv >> dqShift;
}
dqcoeff[scan[c]] = (int)HighbdCheckRange(r.ReadBool(128, ref value, ref count, ref range) != 0 ? -v : v, xd.Bd);
dqcoeff[scan[c]] = (int)HighbdCheckRange(r.ReadBool(128, ref value, ref count, ref range) != 0 ? -v : v,
xd.Bd);
++c;
ctx = GetCoefContext(nb, tokenCache, c);
dqv = dq[1];
@@ -197,11 +192,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
r.Value = value;
r.Range = range;
r.Count = count;
return c;
}
private static void GetCtxShift(ref MacroBlockD xd, ref int ctxShiftA, ref int ctxShiftL, int x, int y, uint txSizeInBlocks)
private static void GetCtxShift(ref MacroBlockD xd, ref int ctxShiftA, ref int ctxShiftL, int x, int y,
uint txSizeInBlocks)
{
if (xd.MaxBlocksWide != 0)
{
@@ -210,6 +205,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
ctxShiftA = (int)(txSizeInBlocks - (xd.MaxBlocksWide - x)) * 8;
}
}
if (xd.MaxBlocksHigh != 0)
{
if (txSizeInBlocks + y > xd.MaxBlocksHigh)
@@ -238,8 +234,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
ref MacroBlockDPlane pd = ref xd.Plane[plane];
ref Array2<short> dequant = ref pd.SegDequant[segId];
int eob;
Span<sbyte> a = pd.AboveContext.AsSpan()[x..];
Span<sbyte> l = pd.LeftContext.AsSpan()[y..];
Span<sbyte> a = pd.AboveContext.AsSpan().Slice(x);
Span<sbyte> l = pd.LeftContext.AsSpan().Slice(y);
int ctx;
int ctxShiftA = 0;
int ctxShiftL = 0;
@@ -324,4 +320,4 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
return eob;
}
}
}
}