Implement a new JIT for Arm devices (#6057)
* Implement a new JIT for Arm devices * Auto-format * Make a lot of Assembler members read-only * More read-only * Fix more warnings * ObjectDisposedException.ThrowIf * New JIT cache for platforms that enforce W^X, currently unused * Remove unused using * Fix assert * Pass memory manager type around * Safe memory manager mode support + other improvements * Actual safe memory manager mode masking support * PR feedback
This commit is contained in:
22
src/Ryujinx.Cpu/LightningJit/Cache/CacheEntry.cs
Normal file
22
src/Ryujinx.Cpu/LightningJit/Cache/CacheEntry.cs
Normal file
@@ -0,0 +1,22 @@
|
||||
using System;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Cache
|
||||
{
|
||||
readonly struct CacheEntry : IComparable<CacheEntry>
|
||||
{
|
||||
public int Offset { get; }
|
||||
public int Size { get; }
|
||||
|
||||
public CacheEntry(int offset, int size)
|
||||
{
|
||||
Offset = offset;
|
||||
Size = size;
|
||||
}
|
||||
|
||||
public int CompareTo([AllowNull] CacheEntry other)
|
||||
{
|
||||
return Offset.CompareTo(other.Offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
136
src/Ryujinx.Cpu/LightningJit/Cache/CacheMemoryAllocator.cs
Normal file
136
src/Ryujinx.Cpu/LightningJit/Cache/CacheMemoryAllocator.cs
Normal file
@@ -0,0 +1,136 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Cache
|
||||
{
|
||||
class CacheMemoryAllocator
|
||||
{
|
||||
private readonly struct MemoryBlock : IComparable<MemoryBlock>
|
||||
{
|
||||
public int Offset { get; }
|
||||
public int Size { get; }
|
||||
|
||||
public MemoryBlock(int offset, int size)
|
||||
{
|
||||
Offset = offset;
|
||||
Size = size;
|
||||
}
|
||||
|
||||
public int CompareTo([AllowNull] MemoryBlock other)
|
||||
{
|
||||
return Offset.CompareTo(other.Offset);
|
||||
}
|
||||
}
|
||||
|
||||
private readonly List<MemoryBlock> _blocks = new();
|
||||
|
||||
public CacheMemoryAllocator(int capacity)
|
||||
{
|
||||
_blocks.Add(new MemoryBlock(0, capacity));
|
||||
}
|
||||
|
||||
public int Allocate(int size)
|
||||
{
|
||||
for (int i = 0; i < _blocks.Count; i++)
|
||||
{
|
||||
MemoryBlock block = _blocks[i];
|
||||
|
||||
if (block.Size > size)
|
||||
{
|
||||
_blocks[i] = new(block.Offset + size, block.Size - size);
|
||||
return block.Offset;
|
||||
}
|
||||
else if (block.Size == size)
|
||||
{
|
||||
_blocks.RemoveAt(i);
|
||||
return block.Offset;
|
||||
}
|
||||
}
|
||||
|
||||
// We don't have enough free memory to perform the allocation.
|
||||
return -1;
|
||||
}
|
||||
|
||||
public void ForceAllocation(int offset, int size)
|
||||
{
|
||||
int index = _blocks.BinarySearch(new(offset, size));
|
||||
|
||||
if (index < 0)
|
||||
{
|
||||
index = ~index;
|
||||
}
|
||||
|
||||
int endOffset = offset + size;
|
||||
|
||||
MemoryBlock block = _blocks[index];
|
||||
|
||||
Debug.Assert(block.Offset <= offset && block.Offset + block.Size >= endOffset);
|
||||
|
||||
if (offset > block.Offset && endOffset < block.Offset + block.Size)
|
||||
{
|
||||
_blocks[index] = new(block.Offset, offset - block.Offset);
|
||||
_blocks.Insert(index + 1, new(endOffset, (block.Offset + block.Size) - endOffset));
|
||||
}
|
||||
else if (offset > block.Offset)
|
||||
{
|
||||
_blocks[index] = new(block.Offset, offset - block.Offset);
|
||||
}
|
||||
else if (endOffset < block.Offset + block.Size)
|
||||
{
|
||||
_blocks[index] = new(endOffset, (block.Offset + block.Size) - endOffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
_blocks.RemoveAt(index);
|
||||
}
|
||||
}
|
||||
|
||||
public void Free(int offset, int size)
|
||||
{
|
||||
Insert(new MemoryBlock(offset, size));
|
||||
}
|
||||
|
||||
private void Insert(MemoryBlock block)
|
||||
{
|
||||
int index = _blocks.BinarySearch(block);
|
||||
|
||||
if (index < 0)
|
||||
{
|
||||
index = ~index;
|
||||
}
|
||||
|
||||
if (index < _blocks.Count)
|
||||
{
|
||||
MemoryBlock next = _blocks[index];
|
||||
|
||||
int endOffs = block.Offset + block.Size;
|
||||
|
||||
if (next.Offset == endOffs)
|
||||
{
|
||||
block = new MemoryBlock(block.Offset, block.Size + next.Size);
|
||||
_blocks.RemoveAt(index);
|
||||
}
|
||||
}
|
||||
|
||||
if (index > 0)
|
||||
{
|
||||
MemoryBlock prev = _blocks[index - 1];
|
||||
|
||||
if (prev.Offset + prev.Size == block.Offset)
|
||||
{
|
||||
block = new MemoryBlock(block.Offset - prev.Size, block.Size + prev.Size);
|
||||
_blocks.RemoveAt(--index);
|
||||
}
|
||||
}
|
||||
|
||||
_blocks.Insert(index, block);
|
||||
}
|
||||
|
||||
public void Clear()
|
||||
{
|
||||
_blocks.Clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
197
src/Ryujinx.Cpu/LightningJit/Cache/JitCache.cs
Normal file
197
src/Ryujinx.Cpu/LightningJit/Cache/JitCache.cs
Normal file
@@ -0,0 +1,197 @@
|
||||
using ARMeilleure.Memory;
|
||||
using Ryujinx.Memory;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Runtime.Versioning;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Cache
|
||||
{
|
||||
static partial class JitCache
|
||||
{
|
||||
private static readonly int _pageSize = (int)MemoryBlock.GetPageSize();
|
||||
private static readonly int _pageMask = _pageSize - 1;
|
||||
|
||||
private const int CodeAlignment = 4; // Bytes.
|
||||
private const int CacheSize = 2047 * 1024 * 1024;
|
||||
|
||||
private static ReservedRegion _jitRegion;
|
||||
private static JitCacheInvalidation _jitCacheInvalidator;
|
||||
|
||||
private static CacheMemoryAllocator _cacheAllocator;
|
||||
|
||||
private static readonly List<CacheEntry> _cacheEntries = new();
|
||||
|
||||
private static readonly object _lock = new();
|
||||
private static bool _initialized;
|
||||
|
||||
[SupportedOSPlatform("windows")]
|
||||
[LibraryImport("kernel32.dll", SetLastError = true)]
|
||||
public static partial IntPtr FlushInstructionCache(IntPtr hProcess, IntPtr lpAddress, UIntPtr dwSize);
|
||||
|
||||
public static void Initialize(IJitMemoryAllocator allocator)
|
||||
{
|
||||
if (_initialized)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
if (_initialized)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_jitRegion = new ReservedRegion(allocator, CacheSize);
|
||||
|
||||
if (!OperatingSystem.IsWindows() && !OperatingSystem.IsMacOS())
|
||||
{
|
||||
_jitCacheInvalidator = new JitCacheInvalidation(allocator);
|
||||
}
|
||||
|
||||
_cacheAllocator = new CacheMemoryAllocator(CacheSize);
|
||||
|
||||
_initialized = true;
|
||||
}
|
||||
}
|
||||
|
||||
public unsafe static IntPtr Map(ReadOnlySpan<byte> code)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
Debug.Assert(_initialized);
|
||||
|
||||
int funcOffset = Allocate(code.Length);
|
||||
|
||||
IntPtr funcPtr = _jitRegion.Pointer + funcOffset;
|
||||
|
||||
if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
|
||||
{
|
||||
unsafe
|
||||
{
|
||||
fixed (byte* codePtr = code)
|
||||
{
|
||||
JitSupportDarwin.Copy(funcPtr, (IntPtr)codePtr, (ulong)code.Length);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ReprotectAsWritable(funcOffset, code.Length);
|
||||
code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
|
||||
ReprotectAsExecutable(funcOffset, code.Length);
|
||||
|
||||
if (OperatingSystem.IsWindows() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
|
||||
{
|
||||
FlushInstructionCache(Process.GetCurrentProcess().Handle, funcPtr, (UIntPtr)code.Length);
|
||||
}
|
||||
else
|
||||
{
|
||||
_jitCacheInvalidator?.Invalidate(funcPtr, (ulong)code.Length);
|
||||
}
|
||||
}
|
||||
|
||||
Add(funcOffset, code.Length);
|
||||
|
||||
return funcPtr;
|
||||
}
|
||||
}
|
||||
|
||||
public static void Unmap(IntPtr pointer)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
Debug.Assert(_initialized);
|
||||
|
||||
int funcOffset = (int)(pointer.ToInt64() - _jitRegion.Pointer.ToInt64());
|
||||
|
||||
if (TryFind(funcOffset, out CacheEntry entry, out int entryIndex) && entry.Offset == funcOffset)
|
||||
{
|
||||
_cacheAllocator.Free(funcOffset, AlignCodeSize(entry.Size));
|
||||
_cacheEntries.RemoveAt(entryIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void ReprotectAsWritable(int offset, int size)
|
||||
{
|
||||
int endOffs = offset + size;
|
||||
|
||||
int regionStart = offset & ~_pageMask;
|
||||
int regionEnd = (endOffs + _pageMask) & ~_pageMask;
|
||||
|
||||
_jitRegion.Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart));
|
||||
}
|
||||
|
||||
private static void ReprotectAsExecutable(int offset, int size)
|
||||
{
|
||||
int endOffs = offset + size;
|
||||
|
||||
int regionStart = offset & ~_pageMask;
|
||||
int regionEnd = (endOffs + _pageMask) & ~_pageMask;
|
||||
|
||||
_jitRegion.Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart));
|
||||
}
|
||||
|
||||
private static int Allocate(int codeSize)
|
||||
{
|
||||
codeSize = AlignCodeSize(codeSize);
|
||||
|
||||
int allocOffset = _cacheAllocator.Allocate(codeSize);
|
||||
|
||||
if (allocOffset < 0)
|
||||
{
|
||||
throw new OutOfMemoryException("JIT Cache exhausted.");
|
||||
}
|
||||
|
||||
_jitRegion.ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize);
|
||||
|
||||
return allocOffset;
|
||||
}
|
||||
|
||||
private static int AlignCodeSize(int codeSize)
|
||||
{
|
||||
return checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1);
|
||||
}
|
||||
|
||||
private static void Add(int offset, int size)
|
||||
{
|
||||
CacheEntry entry = new(offset, size);
|
||||
|
||||
int index = _cacheEntries.BinarySearch(entry);
|
||||
|
||||
if (index < 0)
|
||||
{
|
||||
index = ~index;
|
||||
}
|
||||
|
||||
_cacheEntries.Insert(index, entry);
|
||||
}
|
||||
|
||||
public static bool TryFind(int offset, out CacheEntry entry, out int entryIndex)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
int index = _cacheEntries.BinarySearch(new CacheEntry(offset, 0));
|
||||
|
||||
if (index < 0)
|
||||
{
|
||||
index = ~index - 1;
|
||||
}
|
||||
|
||||
if (index >= 0)
|
||||
{
|
||||
entry = _cacheEntries[index];
|
||||
entryIndex = index;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
entry = default;
|
||||
entryIndex = 0;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
79
src/Ryujinx.Cpu/LightningJit/Cache/JitCacheInvalidation.cs
Normal file
79
src/Ryujinx.Cpu/LightningJit/Cache/JitCacheInvalidation.cs
Normal file
@@ -0,0 +1,79 @@
|
||||
using ARMeilleure.Memory;
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Cache
|
||||
{
|
||||
class JitCacheInvalidation
|
||||
{
|
||||
private static readonly int[] _invalidationCode = new int[]
|
||||
{
|
||||
unchecked((int)0xd53b0022), // mrs x2, ctr_el0
|
||||
unchecked((int)0xd3504c44), // ubfx x4, x2, #16, #4
|
||||
unchecked((int)0x52800083), // mov w3, #0x4
|
||||
unchecked((int)0x12000c45), // and w5, w2, #0xf
|
||||
unchecked((int)0x1ac42064), // lsl w4, w3, w4
|
||||
unchecked((int)0x51000482), // sub w2, w4, #0x1
|
||||
unchecked((int)0x8a220002), // bic x2, x0, x2
|
||||
unchecked((int)0x1ac52063), // lsl w3, w3, w5
|
||||
unchecked((int)0xeb01005f), // cmp x2, x1
|
||||
unchecked((int)0x93407c84), // sxtw x4, w4
|
||||
unchecked((int)0x540000a2), // b.cs 3c <do_ic_clear>
|
||||
unchecked((int)0xd50b7b22), // dc cvau, x2
|
||||
unchecked((int)0x8b040042), // add x2, x2, x4
|
||||
unchecked((int)0xeb02003f), // cmp x1, x2
|
||||
unchecked((int)0x54ffffa8), // b.hi 2c <dc_clear_loop>
|
||||
unchecked((int)0xd5033b9f), // dsb ish
|
||||
unchecked((int)0x51000462), // sub w2, w3, #0x1
|
||||
unchecked((int)0x93407c63), // sxtw x3, w3
|
||||
unchecked((int)0x8a220000), // bic x0, x0, x2
|
||||
unchecked((int)0xeb00003f), // cmp x1, x0
|
||||
unchecked((int)0x540000a9), // b.ls 64 <exit>
|
||||
unchecked((int)0xd50b7520), // ic ivau, x0
|
||||
unchecked((int)0x8b030000), // add x0, x0, x3
|
||||
unchecked((int)0xeb00003f), // cmp x1, x0
|
||||
unchecked((int)0x54ffffa8), // b.hi 54 <ic_clear_loop>
|
||||
unchecked((int)0xd5033b9f), // dsb ish
|
||||
unchecked((int)0xd5033fdf), // isb
|
||||
unchecked((int)0xd65f03c0), // ret
|
||||
};
|
||||
|
||||
private delegate void InvalidateCache(ulong start, ulong end);
|
||||
|
||||
private readonly InvalidateCache _invalidateCache;
|
||||
private readonly ReservedRegion _invalidateCacheCodeRegion;
|
||||
|
||||
private readonly bool _needsInvalidation;
|
||||
|
||||
public JitCacheInvalidation(IJitMemoryAllocator allocator)
|
||||
{
|
||||
// On macOS and Windows, a different path is used to write to the JIT cache, which does the invalidation.
|
||||
if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
|
||||
{
|
||||
ulong size = (ulong)_invalidationCode.Length * sizeof(int);
|
||||
ulong mask = (ulong)ReservedRegion.DefaultGranularity - 1;
|
||||
|
||||
size = (size + mask) & ~mask;
|
||||
|
||||
_invalidateCacheCodeRegion = new ReservedRegion(allocator, size);
|
||||
_invalidateCacheCodeRegion.ExpandIfNeeded(size);
|
||||
|
||||
Marshal.Copy(_invalidationCode, 0, _invalidateCacheCodeRegion.Pointer, _invalidationCode.Length);
|
||||
|
||||
_invalidateCacheCodeRegion.Block.MapAsRx(0, size);
|
||||
|
||||
_invalidateCache = Marshal.GetDelegateForFunctionPointer<InvalidateCache>(_invalidateCacheCodeRegion.Pointer);
|
||||
|
||||
_needsInvalidation = true;
|
||||
}
|
||||
}
|
||||
|
||||
public void Invalidate(IntPtr basePointer, ulong size)
|
||||
{
|
||||
if (_needsInvalidation)
|
||||
{
|
||||
_invalidateCache((ulong)basePointer, (ulong)basePointer + size);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
16
src/Ryujinx.Cpu/LightningJit/Cache/JitSupportDarwin.cs
Normal file
16
src/Ryujinx.Cpu/LightningJit/Cache/JitSupportDarwin.cs
Normal file
@@ -0,0 +1,16 @@
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Runtime.Versioning;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Cache
|
||||
{
|
||||
[SupportedOSPlatform("macos")]
|
||||
static partial class JitSupportDarwin
|
||||
{
|
||||
[LibraryImport("libarmeilleure-jitsupport", EntryPoint = "armeilleure_jit_memcpy")]
|
||||
public static partial void Copy(IntPtr dst, IntPtr src, ulong n);
|
||||
|
||||
[LibraryImport("libc", EntryPoint = "sys_icache_invalidate", SetLastError = true)]
|
||||
public static partial void SysIcacheInvalidate(IntPtr start, IntPtr len);
|
||||
}
|
||||
}
|
||||
340
src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs
Normal file
340
src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs
Normal file
@@ -0,0 +1,340 @@
|
||||
using ARMeilleure.Memory;
|
||||
using Ryujinx.Common;
|
||||
using Ryujinx.Memory;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Cache
|
||||
{
|
||||
class NoWxCache : IDisposable
|
||||
{
|
||||
private const int CodeAlignment = 4; // Bytes.
|
||||
private const int SharedCacheSize = 2047 * 1024 * 1024;
|
||||
private const int LocalCacheSize = 128 * 1024 * 1024;
|
||||
|
||||
// How many calls to the same function we allow until we pad the shared cache to force the function to become available there
|
||||
// and allow the guest to take the fast path.
|
||||
private const int MinCallsForPad = 8;
|
||||
|
||||
private class MemoryCache : IDisposable
|
||||
{
|
||||
private readonly ReservedRegion _region;
|
||||
private readonly CacheMemoryAllocator _cacheAllocator;
|
||||
|
||||
public CacheMemoryAllocator Allocator => _cacheAllocator;
|
||||
public IntPtr Pointer => _region.Block.Pointer;
|
||||
|
||||
public MemoryCache(IJitMemoryAllocator allocator, ulong size)
|
||||
{
|
||||
_region = new(allocator, size);
|
||||
_cacheAllocator = new((int)size);
|
||||
}
|
||||
|
||||
public int Allocate(int codeSize)
|
||||
{
|
||||
codeSize = AlignCodeSize(codeSize);
|
||||
|
||||
int allocOffset = _cacheAllocator.Allocate(codeSize);
|
||||
|
||||
if (allocOffset < 0)
|
||||
{
|
||||
throw new OutOfMemoryException("JIT Cache exhausted.");
|
||||
}
|
||||
|
||||
_region.ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize);
|
||||
|
||||
return allocOffset;
|
||||
}
|
||||
|
||||
public void Free(int offset, int size)
|
||||
{
|
||||
_cacheAllocator.Free(offset, size);
|
||||
}
|
||||
|
||||
public void ReprotectAsRw(int offset, int size)
|
||||
{
|
||||
Debug.Assert(offset >= 0 && (offset & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
|
||||
Debug.Assert(size > 0 && (size & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
|
||||
|
||||
_region.Block.MapAsRw((ulong)offset, (ulong)size);
|
||||
}
|
||||
|
||||
public void ReprotectAsRx(int offset, int size)
|
||||
{
|
||||
Debug.Assert(offset >= 0 && (offset & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
|
||||
Debug.Assert(size > 0 && (size & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
|
||||
|
||||
_region.Block.MapAsRx((ulong)offset, (ulong)size);
|
||||
|
||||
if (OperatingSystem.IsMacOS() || OperatingSystem.IsIOS())
|
||||
{
|
||||
JitSupportDarwin.SysIcacheInvalidate(_region.Block.Pointer + offset, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new PlatformNotSupportedException();
|
||||
}
|
||||
}
|
||||
|
||||
private static int AlignCodeSize(int codeSize)
|
||||
{
|
||||
return checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1);
|
||||
}
|
||||
|
||||
protected virtual void Dispose(bool disposing)
|
||||
{
|
||||
if (disposing)
|
||||
{
|
||||
_region.Dispose();
|
||||
_cacheAllocator.Clear();
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
// Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
|
||||
Dispose(disposing: true);
|
||||
GC.SuppressFinalize(this);
|
||||
}
|
||||
}
|
||||
|
||||
private readonly IStackWalker _stackWalker;
|
||||
private readonly Translator _translator;
|
||||
private readonly MemoryCache _sharedCache;
|
||||
private readonly MemoryCache _localCache;
|
||||
private readonly PageAlignedRangeList _pendingMap;
|
||||
private readonly object _lock;
|
||||
|
||||
class ThreadLocalCacheEntry
|
||||
{
|
||||
public readonly int Offset;
|
||||
public readonly int Size;
|
||||
public readonly IntPtr FuncPtr;
|
||||
private int _useCount;
|
||||
|
||||
public ThreadLocalCacheEntry(int offset, int size, IntPtr funcPtr)
|
||||
{
|
||||
Offset = offset;
|
||||
Size = size;
|
||||
FuncPtr = funcPtr;
|
||||
_useCount = 0;
|
||||
}
|
||||
|
||||
public int IncrementUseCount()
|
||||
{
|
||||
return ++_useCount;
|
||||
}
|
||||
}
|
||||
|
||||
[ThreadStatic]
|
||||
private static Dictionary<ulong, ThreadLocalCacheEntry> _threadLocalCache;
|
||||
|
||||
public NoWxCache(IJitMemoryAllocator allocator, IStackWalker stackWalker, Translator translator)
|
||||
{
|
||||
_stackWalker = stackWalker;
|
||||
_translator = translator;
|
||||
_sharedCache = new(allocator, SharedCacheSize);
|
||||
_localCache = new(allocator, LocalCacheSize);
|
||||
_pendingMap = new(_sharedCache.ReprotectAsRx, RegisterFunction);
|
||||
_lock = new();
|
||||
}
|
||||
|
||||
public unsafe IntPtr Map(IntPtr framePointer, ReadOnlySpan<byte> code, ulong guestAddress, ulong guestSize)
|
||||
{
|
||||
if (TryGetThreadLocalFunction(guestAddress, out IntPtr funcPtr))
|
||||
{
|
||||
return funcPtr;
|
||||
}
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
if (!_pendingMap.Has(guestAddress) && !_translator.Functions.ContainsKey(guestAddress))
|
||||
{
|
||||
int funcOffset = _sharedCache.Allocate(code.Length);
|
||||
|
||||
funcPtr = _sharedCache.Pointer + funcOffset;
|
||||
code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
|
||||
|
||||
TranslatedFunction function = new(funcPtr, guestSize);
|
||||
|
||||
_pendingMap.Add(funcOffset, code.Length, guestAddress, function);
|
||||
}
|
||||
|
||||
ClearThreadLocalCache(framePointer);
|
||||
|
||||
return AddThreadLocalFunction(code, guestAddress);
|
||||
}
|
||||
}
|
||||
|
||||
public unsafe IntPtr MapPageAligned(ReadOnlySpan<byte> code)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
// Ensure we will get an aligned offset from the allocator.
|
||||
_pendingMap.Pad(_sharedCache.Allocator);
|
||||
|
||||
int sizeAligned = BitUtils.AlignUp(code.Length, (int)MemoryBlock.GetPageSize());
|
||||
int funcOffset = _sharedCache.Allocate(sizeAligned);
|
||||
|
||||
Debug.Assert((funcOffset & ((int)MemoryBlock.GetPageSize() - 1)) == 0);
|
||||
|
||||
IntPtr funcPtr = _sharedCache.Pointer + funcOffset;
|
||||
code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
|
||||
|
||||
_sharedCache.ReprotectAsRx(funcOffset, sizeAligned);
|
||||
|
||||
return funcPtr;
|
||||
}
|
||||
}
|
||||
|
||||
private bool TryGetThreadLocalFunction(ulong guestAddress, out IntPtr funcPtr)
|
||||
{
|
||||
if ((_threadLocalCache ??= new()).TryGetValue(guestAddress, out var entry))
|
||||
{
|
||||
if (entry.IncrementUseCount() >= MinCallsForPad)
|
||||
{
|
||||
// Function is being called often, let's make it available in the shared cache so that the guest code
|
||||
// can take the fast path and stop calling the emulator to get the function from the thread local cache.
|
||||
// To do that we pad all "pending" function until they complete a page of memory, allowing us to reprotect them as RX.
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
_pendingMap.Pad(_sharedCache.Allocator);
|
||||
}
|
||||
}
|
||||
|
||||
funcPtr = entry.FuncPtr;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
funcPtr = IntPtr.Zero;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private void ClearThreadLocalCache(IntPtr framePointer)
|
||||
{
|
||||
// Try to delete functions that are already on the shared cache
|
||||
// and no longer being executed.
|
||||
|
||||
if (_threadLocalCache == null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
IEnumerable<ulong> callStack = _stackWalker.GetCallStack(
|
||||
framePointer,
|
||||
_localCache.Pointer,
|
||||
LocalCacheSize,
|
||||
_sharedCache.Pointer,
|
||||
SharedCacheSize);
|
||||
|
||||
List<(ulong, ThreadLocalCacheEntry)> toDelete = new();
|
||||
|
||||
foreach ((ulong address, ThreadLocalCacheEntry entry) in _threadLocalCache)
|
||||
{
|
||||
// We only want to delete if the function is already on the shared cache,
|
||||
// otherwise we will keep translating the same function over and over again.
|
||||
bool canDelete = !_pendingMap.Has(address);
|
||||
if (!canDelete)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// We can only delete if the function is not part of the current thread call stack,
|
||||
// otherwise we will crash the program when the thread returns to it.
|
||||
foreach (ulong funcAddress in callStack)
|
||||
{
|
||||
if (funcAddress >= (ulong)entry.FuncPtr && funcAddress < (ulong)entry.FuncPtr + (ulong)entry.Size)
|
||||
{
|
||||
canDelete = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (canDelete)
|
||||
{
|
||||
toDelete.Add((address, entry));
|
||||
}
|
||||
}
|
||||
|
||||
int pageSize = (int)MemoryBlock.GetPageSize();
|
||||
|
||||
foreach ((ulong address, ThreadLocalCacheEntry entry) in toDelete)
|
||||
{
|
||||
_threadLocalCache.Remove(address);
|
||||
|
||||
int sizeAligned = BitUtils.AlignUp(entry.Size, pageSize);
|
||||
|
||||
_localCache.Free(entry.Offset, sizeAligned);
|
||||
_localCache.ReprotectAsRw(entry.Offset, sizeAligned);
|
||||
}
|
||||
}
|
||||
|
||||
public void ClearEntireThreadLocalCache()
|
||||
{
|
||||
// Thread is exiting, delete everything.
|
||||
|
||||
if (_threadLocalCache == null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
int pageSize = (int)MemoryBlock.GetPageSize();
|
||||
|
||||
foreach ((_, ThreadLocalCacheEntry entry) in _threadLocalCache)
|
||||
{
|
||||
int sizeAligned = BitUtils.AlignUp(entry.Size, pageSize);
|
||||
|
||||
_localCache.Free(entry.Offset, sizeAligned);
|
||||
_localCache.ReprotectAsRw(entry.Offset, sizeAligned);
|
||||
}
|
||||
|
||||
_threadLocalCache.Clear();
|
||||
_threadLocalCache = null;
|
||||
}
|
||||
|
||||
private unsafe IntPtr AddThreadLocalFunction(ReadOnlySpan<byte> code, ulong guestAddress)
|
||||
{
|
||||
int alignedSize = BitUtils.AlignUp(code.Length, (int)MemoryBlock.GetPageSize());
|
||||
int funcOffset = _localCache.Allocate(alignedSize);
|
||||
|
||||
Debug.Assert((funcOffset & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
|
||||
|
||||
IntPtr funcPtr = _localCache.Pointer + funcOffset;
|
||||
code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
|
||||
|
||||
(_threadLocalCache ??= new()).Add(guestAddress, new(funcOffset, code.Length, funcPtr));
|
||||
|
||||
_localCache.ReprotectAsRx(funcOffset, alignedSize);
|
||||
|
||||
return funcPtr;
|
||||
}
|
||||
|
||||
private void RegisterFunction(ulong address, TranslatedFunction func)
|
||||
{
|
||||
TranslatedFunction oldFunc = _translator.Functions.GetOrAdd(address, func.GuestSize, func);
|
||||
|
||||
Debug.Assert(oldFunc == func);
|
||||
|
||||
_translator.RegisterFunction(address, func);
|
||||
}
|
||||
|
||||
protected virtual void Dispose(bool disposing)
|
||||
{
|
||||
if (disposing)
|
||||
{
|
||||
_localCache.Dispose();
|
||||
_sharedCache.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
Dispose(disposing: true);
|
||||
GC.SuppressFinalize(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
218
src/Ryujinx.Cpu/LightningJit/Cache/PageAlignedRangeList.cs
Normal file
218
src/Ryujinx.Cpu/LightningJit/Cache/PageAlignedRangeList.cs
Normal file
@@ -0,0 +1,218 @@
|
||||
using Ryujinx.Common;
|
||||
using Ryujinx.Memory;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
|
||||
namespace Ryujinx.Cpu.LightningJit.Cache
|
||||
{
|
||||
class PageAlignedRangeList
|
||||
{
|
||||
private readonly struct Range : IComparable<Range>
|
||||
{
|
||||
public int Offset { get; }
|
||||
public int Size { get; }
|
||||
|
||||
public Range(int offset, int size)
|
||||
{
|
||||
Offset = offset;
|
||||
Size = size;
|
||||
}
|
||||
|
||||
public int CompareTo([AllowNull] Range other)
|
||||
{
|
||||
return Offset.CompareTo(other.Offset);
|
||||
}
|
||||
}
|
||||
|
||||
private readonly Action<int, int> _alignedRangeAction;
|
||||
private readonly Action<ulong, TranslatedFunction> _alignedFunctionAction;
|
||||
private readonly List<(Range, ulong, TranslatedFunction)> _pendingFunctions;
|
||||
private readonly List<Range> _ranges;
|
||||
|
||||
public PageAlignedRangeList(Action<int, int> alignedRangeAction, Action<ulong, TranslatedFunction> alignedFunctionAction)
|
||||
{
|
||||
_alignedRangeAction = alignedRangeAction;
|
||||
_alignedFunctionAction = alignedFunctionAction;
|
||||
_pendingFunctions = new();
|
||||
_ranges = new();
|
||||
}
|
||||
|
||||
public bool Has(ulong address)
|
||||
{
|
||||
foreach ((_, ulong guestAddress, _) in _pendingFunctions)
|
||||
{
|
||||
if (guestAddress == address)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public void Add(int offset, int size, ulong address, TranslatedFunction function)
|
||||
{
|
||||
Range range = new(offset, size);
|
||||
|
||||
Insert(range);
|
||||
_pendingFunctions.Add((range, address, function));
|
||||
ProcessAlignedRanges();
|
||||
}
|
||||
|
||||
public void Pad(CacheMemoryAllocator allocator)
|
||||
{
|
||||
int pageSize = (int)MemoryBlock.GetPageSize();
|
||||
|
||||
for (int index = 0; index < _ranges.Count; index++)
|
||||
{
|
||||
Range range = _ranges[index];
|
||||
|
||||
int endOffset = range.Offset + range.Size;
|
||||
|
||||
int alignedStart = BitUtils.AlignDown(range.Offset, pageSize);
|
||||
int alignedEnd = BitUtils.AlignUp(endOffset, pageSize);
|
||||
int alignedSize = alignedEnd - alignedStart;
|
||||
|
||||
if (alignedStart < range.Offset)
|
||||
{
|
||||
allocator.ForceAllocation(alignedStart, range.Offset - alignedStart);
|
||||
}
|
||||
|
||||
if (alignedEnd > endOffset)
|
||||
{
|
||||
allocator.ForceAllocation(endOffset, alignedEnd - endOffset);
|
||||
}
|
||||
|
||||
_alignedRangeAction(alignedStart, alignedSize);
|
||||
_ranges.RemoveAt(index--);
|
||||
ProcessPendingFunctions(index, alignedEnd);
|
||||
}
|
||||
}
|
||||
|
||||
private void ProcessAlignedRanges()
|
||||
{
|
||||
int pageSize = (int)MemoryBlock.GetPageSize();
|
||||
|
||||
for (int index = 0; index < _ranges.Count; index++)
|
||||
{
|
||||
Range range = _ranges[index];
|
||||
|
||||
int alignedStart = BitUtils.AlignUp(range.Offset, pageSize);
|
||||
int alignedEnd = BitUtils.AlignDown(range.Offset + range.Size, pageSize);
|
||||
int alignedSize = alignedEnd - alignedStart;
|
||||
|
||||
if (alignedSize <= 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
_alignedRangeAction(alignedStart, alignedSize);
|
||||
SplitAt(ref index, alignedStart, alignedEnd);
|
||||
ProcessPendingFunctions(index, alignedEnd);
|
||||
}
|
||||
}
|
||||
|
||||
private void ProcessPendingFunctions(int rangeIndex, int alignedEnd)
|
||||
{
|
||||
if ((rangeIndex > 0 && rangeIndex == _ranges.Count) ||
|
||||
(rangeIndex >= 0 && rangeIndex < _ranges.Count && _ranges[rangeIndex].Offset >= alignedEnd))
|
||||
{
|
||||
rangeIndex--;
|
||||
}
|
||||
|
||||
int alignedStart;
|
||||
|
||||
if (rangeIndex >= 0)
|
||||
{
|
||||
alignedStart = _ranges[rangeIndex].Offset + _ranges[rangeIndex].Size;
|
||||
}
|
||||
else
|
||||
{
|
||||
alignedStart = 0;
|
||||
}
|
||||
|
||||
if (rangeIndex < _ranges.Count - 1)
|
||||
{
|
||||
alignedEnd = _ranges[rangeIndex + 1].Offset;
|
||||
}
|
||||
else
|
||||
{
|
||||
alignedEnd = int.MaxValue;
|
||||
}
|
||||
|
||||
for (int index = 0; index < _pendingFunctions.Count; index++)
|
||||
{
|
||||
(Range range, ulong address, TranslatedFunction function) = _pendingFunctions[index];
|
||||
|
||||
if (range.Offset >= alignedStart && range.Offset + range.Size <= alignedEnd)
|
||||
{
|
||||
_alignedFunctionAction(address, function);
|
||||
_pendingFunctions.RemoveAt(index--);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void Insert(Range range)
|
||||
{
|
||||
int index = _ranges.BinarySearch(range);
|
||||
|
||||
if (index < 0)
|
||||
{
|
||||
index = ~index;
|
||||
}
|
||||
|
||||
if (index < _ranges.Count)
|
||||
{
|
||||
Range next = _ranges[index];
|
||||
|
||||
int endOffs = range.Offset + range.Size;
|
||||
|
||||
if (next.Offset == endOffs)
|
||||
{
|
||||
range = new Range(range.Offset, range.Size + next.Size);
|
||||
_ranges.RemoveAt(index);
|
||||
}
|
||||
}
|
||||
|
||||
if (index > 0)
|
||||
{
|
||||
Range prev = _ranges[index - 1];
|
||||
|
||||
if (prev.Offset + prev.Size == range.Offset)
|
||||
{
|
||||
range = new Range(range.Offset - prev.Size, range.Size + prev.Size);
|
||||
_ranges.RemoveAt(--index);
|
||||
}
|
||||
}
|
||||
|
||||
_ranges.Insert(index, range);
|
||||
}
|
||||
|
||||
private void SplitAt(ref int index, int alignedStart, int alignedEnd)
|
||||
{
|
||||
Range range = _ranges[index];
|
||||
|
||||
if (range.Offset < alignedStart)
|
||||
{
|
||||
_ranges[index++] = new(range.Offset, alignedStart - range.Offset);
|
||||
|
||||
if (range.Offset + range.Size > alignedEnd)
|
||||
{
|
||||
_ranges.Insert(index, new(alignedEnd, (range.Offset + range.Size) - alignedEnd));
|
||||
}
|
||||
}
|
||||
else if (range.Offset + range.Size > alignedEnd)
|
||||
{
|
||||
_ranges[index] = new(alignedEnd, (range.Offset + range.Size) - alignedEnd);
|
||||
}
|
||||
else if (range.Offset == alignedStart && range.Offset + range.Size == alignedEnd)
|
||||
{
|
||||
Debug.Assert(range.Offset == alignedStart && range.Offset + range.Size == alignedEnd);
|
||||
|
||||
_ranges.RemoveAt(index--);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user