Implement VP9 loop filtering
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Dsp;
|
||||
using Ryujinx.Graphics.Nvdec.Vp9.Types;
|
||||
using Ryujinx.Graphics.Video;
|
||||
@@ -17,26 +17,10 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
|
||||
private static int GetCoefContext(ReadOnlySpan<short> neighbors, ReadOnlySpan<byte> tokenCache, int c)
|
||||
{
|
||||
const int MaxNeighbors = 2;
|
||||
const int maxNeighbors = 2;
|
||||
|
||||
return (1 + tokenCache[neighbors[MaxNeighbors * c + 0]] + tokenCache[neighbors[MaxNeighbors * c + 1]]) >> 1;
|
||||
}
|
||||
|
||||
private static int ReadCoeff(
|
||||
ref Reader r,
|
||||
ReadOnlySpan<byte> probs,
|
||||
int n,
|
||||
ref ulong value,
|
||||
ref int count,
|
||||
ref uint range)
|
||||
{
|
||||
int i, val = 0;
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
val = (val << 1) | r.ReadBool(probs[i], ref value, ref count, ref range);
|
||||
}
|
||||
|
||||
return val;
|
||||
return (1 + tokenCache[neighbors[(maxNeighbors * c) + 0]] +
|
||||
tokenCache[neighbors[(maxNeighbors * c) + 1]]) >> 1;
|
||||
}
|
||||
|
||||
private static int DecodeCoefs(
|
||||
@@ -58,13 +42,15 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
ref Array6<Array6<Array3<byte>>> coefProbs = ref fc.CoefProbs[(int)txSize][(int)type][refr];
|
||||
Span<byte> tokenCache = stackalloc byte[32 * 32];
|
||||
ReadOnlySpan<byte> bandTranslate = Luts.GetBandTranslate(txSize);
|
||||
int dqShift = (txSize == TxSize.Tx32x32) ? 1 : 0;
|
||||
int dqShift = txSize == TxSize.Tx32x32 ? 1 : 0;
|
||||
int v;
|
||||
short dqv = dq[0];
|
||||
ReadOnlySpan<byte> cat6Prob = (xd.Bd == 12)
|
||||
? Luts.Vp9Cat6ProbHigh12
|
||||
: (xd.Bd == 10) ? Luts.Vp9Cat6ProbHigh12[2..] : Luts.Vp9Cat6Prob;
|
||||
int cat6Bits = (xd.Bd == 12) ? 18 : (xd.Bd == 10) ? 16 : 14;
|
||||
ReadOnlySpan<byte> cat6Prob = xd.Bd == 12
|
||||
? Luts.Cat6ProbHigh12
|
||||
: xd.Bd == 10
|
||||
? Luts.Cat6ProbHigh12.Slice(2)
|
||||
: Luts.Cat6Prob;
|
||||
int cat6Bits = xd.Bd == 12 ? 18 : xd.Bd == 10 ? 16 : 14;
|
||||
// Keep value, range, and count as locals. The compiler produces better
|
||||
// results with the locals than using r directly.
|
||||
ulong value = r.Value;
|
||||
@@ -75,7 +61,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
int val = -1;
|
||||
band = bandTranslate[0];
|
||||
bandTranslate = bandTranslate[1..];
|
||||
bandTranslate = bandTranslate.Slice(1);
|
||||
ref Array3<byte> prob = ref coefProbs[band][ctx];
|
||||
if (!xd.Counts.IsNull)
|
||||
{
|
||||
@@ -107,18 +93,18 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
r.Value = value;
|
||||
r.Range = range;
|
||||
r.Count = count;
|
||||
|
||||
return c; // Zero tokens at the end (no eob token)
|
||||
}
|
||||
|
||||
ctx = GetCoefContext(nb, tokenCache, c);
|
||||
band = bandTranslate[0];
|
||||
bandTranslate = bandTranslate[1..];
|
||||
bandTranslate = bandTranslate.Slice(1);
|
||||
prob = ref coefProbs[band][ctx];
|
||||
}
|
||||
|
||||
if (r.ReadBool(prob[OneContextNode], ref value, ref count, ref range) != 0)
|
||||
{
|
||||
ReadOnlySpan<byte> p = Luts.Vp9Pareto8Full[prob[Constants.PivotNode] - 1];
|
||||
ReadOnlySpan<byte> p = Luts.Pareto8Full[prob[Constants.PivotNode] - 1];
|
||||
if (!xd.Counts.IsNull)
|
||||
{
|
||||
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.TwoToken];
|
||||
@@ -133,20 +119,24 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
{
|
||||
if (r.ReadBool(p[7], ref value, ref count, ref range) != 0)
|
||||
{
|
||||
val = Constants.Cat6MinVal + ReadCoeff(ref r, cat6Prob, cat6Bits, ref value, ref count, ref range);
|
||||
val = Constants.Cat6MinVal + r.ReadCoeff(cat6Prob, cat6Bits, ref value,
|
||||
ref count, ref range);
|
||||
}
|
||||
else
|
||||
{
|
||||
val = Constants.Cat5MinVal + ReadCoeff(ref r, Luts.Vp9Cat5Prob, 5, ref value, ref count, ref range);
|
||||
val = Constants.Cat5MinVal + r.ReadCoeff(Luts.Cat5Prob, 5, ref value,
|
||||
ref count, ref range);
|
||||
}
|
||||
}
|
||||
else if (r.ReadBool(p[6], ref value, ref count, ref range) != 0)
|
||||
{
|
||||
val = Constants.Cat4MinVal + ReadCoeff(ref r, Luts.Vp9Cat4Prob, 4, ref value, ref count, ref range);
|
||||
val = Constants.Cat4MinVal + r.ReadCoeff(Luts.Cat4Prob, 4, ref value, ref count,
|
||||
ref range);
|
||||
}
|
||||
else
|
||||
{
|
||||
val = Constants.Cat3MinVal + ReadCoeff(ref r, Luts.Vp9Cat3Prob, 3, ref value, ref count, ref range);
|
||||
val = Constants.Cat3MinVal + r.ReadCoeff(Luts.Cat3Prob, 3, ref value, ref count,
|
||||
ref range);
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -154,13 +144,16 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
tokenCache[scan[c]] = 4;
|
||||
if (r.ReadBool(p[4], ref value, ref count, ref range) != 0)
|
||||
{
|
||||
val = Constants.Cat2MinVal + ReadCoeff(ref r, Luts.Vp9Cat2Prob, 2, ref value, ref count, ref range);
|
||||
val = Constants.Cat2MinVal + r.ReadCoeff(Luts.Cat2Prob, 2, ref value, ref count,
|
||||
ref range);
|
||||
}
|
||||
else
|
||||
{
|
||||
val = Constants.Cat1MinVal + ReadCoeff(ref r, Luts.Vp9Cat1Prob, 1, ref value, ref count, ref range);
|
||||
val = Constants.Cat1MinVal + r.ReadCoeff(Luts.Cat1Prob, 1, ref value, ref count,
|
||||
ref range);
|
||||
}
|
||||
}
|
||||
|
||||
// Val may use 18-bits
|
||||
v = (int)(((long)val * dqv) >> dqShift);
|
||||
}
|
||||
@@ -188,7 +181,9 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
tokenCache[scan[c]] = 1;
|
||||
v = dqv >> dqShift;
|
||||
}
|
||||
dqcoeff[scan[c]] = (int)HighbdCheckRange(r.ReadBool(128, ref value, ref count, ref range) != 0 ? -v : v, xd.Bd);
|
||||
|
||||
dqcoeff[scan[c]] = (int)HighbdCheckRange(r.ReadBool(128, ref value, ref count, ref range) != 0 ? -v : v,
|
||||
xd.Bd);
|
||||
++c;
|
||||
ctx = GetCoefContext(nb, tokenCache, c);
|
||||
dqv = dq[1];
|
||||
@@ -197,11 +192,11 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
r.Value = value;
|
||||
r.Range = range;
|
||||
r.Count = count;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
private static void GetCtxShift(ref MacroBlockD xd, ref int ctxShiftA, ref int ctxShiftL, int x, int y, uint txSizeInBlocks)
|
||||
private static void GetCtxShift(ref MacroBlockD xd, ref int ctxShiftA, ref int ctxShiftL, int x, int y,
|
||||
uint txSizeInBlocks)
|
||||
{
|
||||
if (xd.MaxBlocksWide != 0)
|
||||
{
|
||||
@@ -210,6 +205,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
ctxShiftA = (int)(txSizeInBlocks - (xd.MaxBlocksWide - x)) * 8;
|
||||
}
|
||||
}
|
||||
|
||||
if (xd.MaxBlocksHigh != 0)
|
||||
{
|
||||
if (txSizeInBlocks + y > xd.MaxBlocksHigh)
|
||||
@@ -238,8 +234,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
ref MacroBlockDPlane pd = ref xd.Plane[plane];
|
||||
ref Array2<short> dequant = ref pd.SegDequant[segId];
|
||||
int eob;
|
||||
Span<sbyte> a = pd.AboveContext.AsSpan()[x..];
|
||||
Span<sbyte> l = pd.LeftContext.AsSpan()[y..];
|
||||
Span<sbyte> a = pd.AboveContext.AsSpan().Slice(x);
|
||||
Span<sbyte> l = pd.LeftContext.AsSpan().Slice(y);
|
||||
int ctx;
|
||||
int ctxShiftA = 0;
|
||||
int ctxShiftL = 0;
|
||||
@@ -324,4 +320,4 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
|
||||
return eob;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user