Implement VP9 loop filtering (#550)

Unmerged PR from OG Ryujinx (#4367). From @gdkchan:

> The main goal of this change is porting the loop filtering from
libvpx, which should fix the block artifacts on some VP9 videos on games
using NVDEC to decode them. In addition to that, there are two other
changes:
> 
> - The remaining decoder code required to decode a VP9 video (with
headers included) has been added. That was done because it's much better
to test the decoder standalone with a video file. I decided to keep that
code on the emulator, even if some of it is unused, since it makes
standalone testing easier in the future too, and we can include unit
tests with video files.
> - Large refactoring of both new and existing code to conform with our
conding [sic] styles, done by @TSRBerry (thanks!) Some of it has been
automated.
> 
> Since we had no loop filtering before, this change will make video
decoding slower. That may cause frame drop etc if the decoder is not
fast enough in some games. I plan to optimize the decoder more in the
future to make up for that, but if possible I'd prefer to not do it as
part of this PR, but if the perf loss is too severe I might consider.
> 
> This will need to be tested on games that had the block artifacts, it
would be nice to confirm if they match hardware now, and get some
before/after screenshots etc.

Comment from @Bjorn29512:

> Significantly improves the block artifacts in FE: Engage.
> 
> Before:
>
![](https://user-images.githubusercontent.com/110204265/216882414-ec88dbda-7544-4490-8a47-37f074056ae3.png)
> 
> After:
>
![](https://user-images.githubusercontent.com/110204265/216882478-4e81fead-1033-4877-b282-f9cac6d6aa3b.png)

---------

Co-authored-by: gdkchan <gab.dark.100@gmail.com>
Co-authored-by: TSR Berry <20988865+TSRBerry@users.noreply.github.com>
This commit is contained in:
Keaton
2025-02-18 20:59:36 -06:00
committed by GitHub
parent 920933bc9f
commit f91cd05260
79 changed files with 11343 additions and 3036 deletions

View File

@@ -13,42 +13,42 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
// for (int i = 1; i < 32; ++i)
// Console.WriteLine("public const short CosPi{0}_64 = {1};", i, MathF.Round(16384 * MathF.Cos(i * MathF.PI / 64)));
// Note: sin(k * Pi / 64) = cos((32 - k) * Pi / 64)
public const short CosPi1_64 = 16364;
public const short CosPi2_64 = 16305;
public const short CosPi3_64 = 16207;
public const short CosPi4_64 = 16069;
public const short CosPi5_64 = 15893;
public const short CosPi6_64 = 15679;
public const short CosPi7_64 = 15426;
public const short CosPi8_64 = 15137;
public const short CosPi9_64 = 14811;
public const short CosPi10_64 = 14449;
public const short CosPi11_64 = 14053;
public const short CosPi12_64 = 13623;
public const short CosPi13_64 = 13160;
public const short CosPi14_64 = 12665;
public const short CosPi15_64 = 12140;
public const short CosPi16_64 = 11585;
public const short CosPi17_64 = 11003;
public const short CosPi18_64 = 10394;
public const short CosPi19_64 = 9760;
public const short CosPi20_64 = 9102;
public const short CosPi21_64 = 8423;
public const short CosPi22_64 = 7723;
public const short CosPi23_64 = 7005;
public const short CosPi24_64 = 6270;
public const short CosPi25_64 = 5520;
public const short CosPi26_64 = 4756;
public const short CosPi27_64 = 3981;
public const short CosPi28_64 = 3196;
public const short CosPi29_64 = 2404;
public const short CosPi30_64 = 1606;
public const short CosPi31_64 = 804;
public const short CosPi164 = 16364;
public const short CosPi264 = 16305;
public const short CosPi364 = 16207;
public const short CosPi464 = 16069;
public const short CosPi564 = 15893;
public const short CosPi664 = 15679;
public const short CosPi764 = 15426;
public const short CosPi864 = 15137;
public const short CosPi964 = 14811;
public const short CosPi1064 = 14449;
public const short CosPi1164 = 14053;
public const short CosPi1264 = 13623;
public const short CosPi1364 = 13160;
public const short CosPi1464 = 12665;
public const short CosPi1564 = 12140;
public const short CosPi1664 = 11585;
public const short CosPi1764 = 11003;
public const short CosPi1864 = 10394;
public const short CosPi1964 = 9760;
public const short CosPi2064 = 9102;
public const short CosPi2164 = 8423;
public const short CosPi2264 = 7723;
public const short CosPi2364 = 7005;
public const short CosPi2464 = 6270;
public const short CosPi2564 = 5520;
public const short CosPi2664 = 4756;
public const short CosPi2764 = 3981;
public const short CosPi2864 = 3196;
public const short CosPi2964 = 2404;
public const short CosPi3064 = 1606;
public const short CosPi3164 = 804;
// 16384 * sqrt(2) * sin(kPi / 9) * 2 / 3
public const short SinPi1_9 = 5283;
public const short SinPi2_9 = 9929;
public const short SinPi3_9 = 13377;
public const short SinPi4_9 = 15212;
public const short SinPi19 = 5283;
public const short SinPi29 = 9929;
public const short SinPi39 = 13377;
public const short SinPi49 = 15212;
}
}
}