Implement a new JIT for Arm devices (#6057)

* Implement a new JIT for Arm devices

* Auto-format

* Make a lot of Assembler members read-only

* More read-only

* Fix more warnings

* ObjectDisposedException.ThrowIf

* New JIT cache for platforms that enforce W^X, currently unused

* Remove unused using

* Fix assert

* Pass memory manager type around

* Safe memory manager mode support + other improvements

* Actual safe memory manager mode masking support

* PR feedback
This commit is contained in:
gdkchan
2024-01-20 11:11:28 -03:00
committed by GitHub
parent 331c07807f
commit 427b7d06b5
135 changed files with 43322 additions and 24 deletions

View File

@@ -0,0 +1,29 @@
using ARMeilleure.Common;
using ARMeilleure.Memory;
using Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64;
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Cpu.LightningJit.Arm64
{
static class A64Compiler
{
public static CompiledFunction Compile(
CpuPreset cpuPreset,
IMemoryManager memoryManager,
ulong address,
AddressTable<ulong> funcTable,
IntPtr dispatchStubPtr,
Architecture targetArch)
{
if (targetArch == Architecture.Arm64)
{
return Compiler.Compile(cpuPreset, memoryManager, address, funcTable, dispatchStubPtr);
}
else
{
throw new PlatformNotSupportedException();
}
}
}
}

View File

@@ -0,0 +1,138 @@
using Ryujinx.Cpu.LightningJit.Graph;
using System.Collections.Generic;
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm64
{
class Block : IBlock
{
public int Index { get; private set; }
private readonly List<Block> _predecessors;
private readonly List<Block> _successors;
public int PredecessorsCount => _predecessors.Count;
public int SuccessorsCount => _successors.Count;
public readonly ulong Address;
public readonly ulong EndAddress;
public readonly List<InstInfo> Instructions;
public readonly bool EndsWithBranch;
public readonly bool IsTruncated;
public readonly bool IsLoopEnd;
public Block(ulong address, ulong endAddress, List<InstInfo> instructions, bool endsWithBranch, bool isTruncated, bool isLoopEnd)
{
Debug.Assert((int)((endAddress - address) / 4) == instructions.Count);
_predecessors = new();
_successors = new();
Address = address;
EndAddress = endAddress;
Instructions = instructions;
EndsWithBranch = endsWithBranch;
IsTruncated = isTruncated;
IsLoopEnd = isLoopEnd;
}
public (Block, Block) SplitAtAddress(ulong address)
{
int splitIndex = (int)((address - Address) / 4);
int splitCount = Instructions.Count - splitIndex;
// Technically those are valid, but we don't want to create empty blocks.
Debug.Assert(splitIndex != 0);
Debug.Assert(splitCount != 0);
Block leftBlock = new(
Address,
address,
Instructions.GetRange(0, splitIndex),
false,
false,
false);
Block rightBlock = new(
address,
EndAddress,
Instructions.GetRange(splitIndex, splitCount),
EndsWithBranch,
IsTruncated,
IsLoopEnd);
return (leftBlock, rightBlock);
}
public void Number(int index)
{
Index = index;
}
public void AddSuccessor(Block block)
{
if (!_successors.Contains(block))
{
_successors.Add(block);
}
}
public void AddPredecessor(Block block)
{
if (!_predecessors.Contains(block))
{
_predecessors.Add(block);
}
}
public IBlock GetSuccessor(int index)
{
return _successors[index];
}
public IBlock GetPredecessor(int index)
{
return _predecessors[index];
}
public RegisterUse ComputeUseMasks()
{
if (Instructions.Count == 0)
{
return new(0u, 0u, 0u, 0u, 0u, 0u);
}
RegisterUse use = Instructions[0].RegisterUse;
for (int index = 1; index < Instructions.Count; index++)
{
RegisterUse currentUse = Instructions[index].RegisterUse;
use = new(use.Read | (currentUse.Read & ~use.Write), use.Write | currentUse.Write);
}
return use;
}
public bool EndsWithContextLoad()
{
return !IsTruncated && EndsWithContextStoreAndLoad();
}
public bool EndsWithContextStore()
{
return EndsWithContextStoreAndLoad();
}
private bool EndsWithContextStoreAndLoad()
{
if (Instructions.Count == 0)
{
return false;
}
InstName lastInstructionName = Instructions[^1].Name;
return lastInstructionName.IsCall() || lastInstructionName.IsException();
}
}
}

View File

@@ -0,0 +1,20 @@
namespace Ryujinx.Cpu.LightningJit.Arm64
{
static class ImmUtils
{
public static int ExtractSImm14Times4(uint encoding)
{
return ((int)(encoding >> 5) << 18) >> 16;
}
public static int ExtractSImm19Times4(uint encoding)
{
return ((int)(encoding >> 5) << 13) >> 11;
}
public static int ExtractSImm26Times4(uint encoding)
{
return (int)(encoding << 6) >> 4;
}
}
}

View File

@@ -0,0 +1,108 @@
using System;
namespace Ryujinx.Cpu.LightningJit.Arm64
{
[Flags]
enum InstFlags
{
None = 0,
Rd = 1 << 0,
RdSP = Rd | (1 << 1),
ReadRd = 1 << 2,
Rt = 1 << 3,
RtSeq = Rt | (1 << 4),
ReadRt = 1 << 5,
Rt2 = 1 << 6,
Rn = 1 << 7,
RnSeq = Rn | (1 << 8),
RnSP = Rn | (1 << 9),
Rm = 1 << 10,
Rs = 1 << 11,
Ra = 1 << 12,
Nzcv = 1 << 13,
C = 1 << 14,
S = 1 << 15,
Qc = 1 << 16,
FpSimd = 1 << 17,
FpSimdFromGpr = FpSimd | (1 << 18),
FpSimdToGpr = FpSimd | (1 << 19),
FpSimdFromToGpr = FpSimdFromGpr | FpSimdToGpr,
Memory = 1 << 20,
MemWBack = 1 << 21,
RdFpSimd = Rd | FpSimd,
RdReadRd = Rd | ReadRd,
RdReadRdRn = Rd | ReadRd | Rn,
RdReadRdRnFpSimd = Rd | ReadRd | Rn | FpSimd,
RdReadRdRnFpSimdFromGpr = Rd | ReadRd | Rn | FpSimdFromGpr,
RdReadRdRnQcFpSimd = Rd | ReadRd | Rn | Qc | FpSimd,
RdReadRdRnRmFpSimd = Rd | ReadRd | Rn | Rm | FpSimd,
RdReadRdRnRmQcFpSimd = Rd | ReadRd | Rn | Rm | Qc | FpSimd,
RdRn = Rd | Rn,
RdRnFpSimd = Rd | Rn | FpSimd,
RdRnFpSimdFromGpr = Rd | Rn | FpSimdFromGpr,
RdRnFpSimdToGpr = Rd | Rn | FpSimdToGpr,
RdRnQcFpSimd = Rd | Rn | Qc | FpSimd,
RdRnRm = Rd | Rn | Rm,
RdRnRmC = Rd | Rn | Rm | C,
RdRnRmCS = Rd | Rn | Rm | C | S,
RdRnRmFpSimd = Rd | Rn | Rm | FpSimd,
RdRnRmNzcv = Rd | Rn | Rm | Nzcv,
RdRnRmNzcvFpSimd = Rd | Rn | Rm | Nzcv | FpSimd,
RdRnRmQcFpSimd = Rd | Rn | Rm | Qc | FpSimd,
RdRnRmRa = Rd | Rn | Rm | Ra,
RdRnRmRaFpSimd = Rd | Rn | Rm | Ra | FpSimd,
RdRnRmS = Rd | Rn | Rm | S,
RdRnRsS = Rd | Rn | Rs | S,
RdRnS = Rd | Rn | S,
RdRnSeqRmFpSimd = Rd | RnSeq | Rm | FpSimd,
RdRnSFpSimd = Rd | Rn | S | FpSimd,
RdRnSFpSimdFromToGpr = Rd | Rn | S | FpSimdFromToGpr,
RdRnSP = Rd | RnSP,
RdRnSPRmS = Rd | RnSP | Rm | S,
RdRnSPS = Rd | RnSP | S,
RdSPRn = RdSP | Rn,
RdSPRnSP = RdSP | RnSP,
RdSPRnSPRm = RdSP | RnSP | Rm,
RnC = Rn | C,
RnNzcvS = Rn | Nzcv | S,
RnRm = Rn | Rm,
RnRmNzcvS = Rn | Rm | Nzcv | S,
RnRmNzcvSFpSimd = Rn | Rm | Nzcv | S | FpSimd,
RnRmSFpSimd = Rn | Rm | S | FpSimd,
RnSPRm = RnSP | Rm,
RtFpSimd = Rt | FpSimd,
RtReadRt = Rt | ReadRt,
RtReadRtRnSP = Rt | ReadRt | RnSP,
RtReadRtRnSPFpSimd = Rt | ReadRt | RnSP | FpSimd,
RtReadRtRnSPFpSimdMemWBack = Rt | ReadRt | RnSP | FpSimd | MemWBack,
RtReadRtRnSPMemWBack = Rt | ReadRt | RnSP | MemWBack,
RtReadRtRnSPRm = Rt | ReadRt | RnSP | Rm,
RtReadRtRnSPRmFpSimd = Rt | ReadRt | RnSP | Rm | FpSimd,
RtReadRtRnSPRmFpSimdMemWBack = Rt | ReadRt | RnSP | Rm | FpSimd | MemWBack,
RtReadRtRnSPRs = Rt | ReadRt | RnSP | Rs,
RtReadRtRnSPRsS = Rt | ReadRt | RnSP | Rs | S,
RtReadRtRt2RnSP = Rt | ReadRt | Rt2 | RnSP,
RtReadRtRt2RnSPFpSimd = Rt | ReadRt | Rt2 | RnSP | FpSimd,
RtReadRtRt2RnSPFpSimdMemWBack = Rt | ReadRt | Rt2 | RnSP | FpSimd | MemWBack,
RtReadRtRt2RnSPMemWBack = Rt | ReadRt | Rt2 | RnSP | MemWBack,
RtReadRtRt2RnSPRs = Rt | ReadRt | Rt2 | RnSP | Rs,
RtReadRtRt2RnSPS = Rt | ReadRt | Rt2 | RnSP | S,
RtRnSP = Rt | RnSP,
RtRnSPFpSimd = Rt | RnSP | FpSimd,
RtRnSPFpSimdMemWBack = Rt | RnSP | FpSimd | MemWBack,
RtRnSPMemWBack = Rt | RnSP | MemWBack,
RtRnSPRm = Rt | RnSP | Rm,
RtRnSPRmFpSimd = Rt | RnSP | Rm | FpSimd,
RtRnSPRmFpSimdMemWBack = Rt | RnSP | Rm | FpSimd | MemWBack,
RtRnSPRs = Rt | RnSP | Rs,
RtRt2RnSP = Rt | Rt2 | RnSP,
RtRt2RnSPFpSimd = Rt | Rt2 | RnSP | FpSimd,
RtRt2RnSPFpSimdMemWBack = Rt | Rt2 | RnSP | FpSimd | MemWBack,
RtRt2RnSPMemWBack = Rt | Rt2 | RnSP | MemWBack,
RtSeqReadRtRnSPFpSimd = RtSeq | ReadRt | RnSP | FpSimd,
RtSeqReadRtRnSPRmFpSimdMemWBack = RtSeq | ReadRt | RnSP | Rm | FpSimd | MemWBack,
RtSeqRnSPFpSimd = RtSeq | RnSP | FpSimd,
RtSeqRnSPRmFpSimdMemWBack = RtSeq | RnSP | Rm | FpSimd | MemWBack,
}
}

View File

@@ -0,0 +1,22 @@
using Ryujinx.Cpu.LightningJit.Graph;
namespace Ryujinx.Cpu.LightningJit.Arm64
{
readonly struct InstInfo
{
public readonly uint Encoding;
public readonly InstName Name;
public readonly InstFlags Flags;
public readonly AddressForm AddressForm;
public readonly RegisterUse RegisterUse;
public InstInfo(uint encoding, InstName name, InstFlags flags, AddressForm addressForm, in RegisterUse registerUse)
{
Encoding = encoding;
Name = name;
Flags = flags;
AddressForm = addressForm;
RegisterUse = registerUse;
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,64 @@
using Ryujinx.Cpu.LightningJit.Graph;
using System;
using System.Collections.Generic;
namespace Ryujinx.Cpu.LightningJit.Arm64
{
class MultiBlock : IBlockList
{
public readonly List<Block> Blocks;
public readonly RegisterMask[] ReadMasks;
public readonly RegisterMask[] WriteMasks;
public readonly RegisterMask GlobalUseMask;
public readonly bool HasHostCall;
public readonly bool HasMemoryInstruction;
public readonly bool IsTruncated;
public int Count => Blocks.Count;
public IBlock this[int index] => Blocks[index];
public MultiBlock(List<Block> blocks, RegisterMask globalUseMask, bool hasHostCall, bool hasMemoryInstruction)
{
Blocks = blocks;
(ReadMasks, WriteMasks) = DataFlow.GetGlobalUses(this);
GlobalUseMask = globalUseMask;
HasHostCall = hasHostCall;
HasMemoryInstruction = hasMemoryInstruction;
IsTruncated = blocks[^1].IsTruncated;
}
public void PrintDebugInfo()
{
foreach (Block block in Blocks)
{
Console.WriteLine($"bb {block.Index}");
List<int> predList = new();
List<int> succList = new();
for (int index = 0; index < block.PredecessorsCount; index++)
{
predList.Add(block.GetPredecessor(index).Index);
}
for (int index = 0; index < block.SuccessorsCount; index++)
{
succList.Add(block.GetSuccessor(index).Index);
}
Console.WriteLine($" predecessors: {string.Join(' ', predList)}");
Console.WriteLine($" successors: {string.Join(' ', succList)}");
Console.WriteLine($" gpr read mask: 0x{ReadMasks[block.Index].GprMask:X} 0x{block.ComputeUseMasks().Read.GprMask:X}");
Console.WriteLine($" gpr write mask: 0x{WriteMasks[block.Index].GprMask:X}");
for (int index = 0; index < block.Instructions.Count; index++)
{
Console.WriteLine($" {index} 0x{block.Instructions[index].Encoding:X8} {block.Instructions[index].Name}");
}
}
}
}
}

View File

@@ -0,0 +1,154 @@
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System;
using System.Diagnostics;
using System.Numerics;
namespace Ryujinx.Cpu.LightningJit.Arm64
{
class RegisterAllocator
{
public const int MaxTemps = 1;
public const int MaxTempsInclFixed = MaxTemps + 2;
private uint _gprMask;
private readonly uint _fpSimdMask;
private readonly uint _pStateMask;
private uint _tempGprsMask;
private readonly int[] _registerMap;
public int FixedContextRegister { get; }
public int FixedPageTableRegister { get; }
public uint AllGprMask => (_gprMask & ~RegisterUtils.ReservedRegsMask) | _tempGprsMask;
public uint AllFpSimdMask => _fpSimdMask;
public uint AllPStateMask => _pStateMask;
public RegisterAllocator(uint gprMask, uint fpSimdMask, uint pStateMask, bool hasHostCall)
{
_gprMask = gprMask;
_fpSimdMask = fpSimdMask;
_pStateMask = pStateMask;
if (hasHostCall)
{
// If the function has calls, we can avoid the need to spill those registers across
// calls by puting them on callee saved registers.
FixedContextRegister = AllocateAndMarkTempGprRegisterWithPreferencing();
FixedPageTableRegister = AllocateAndMarkTempGprRegisterWithPreferencing();
}
else
{
FixedContextRegister = AllocateAndMarkTempGprRegister();
FixedPageTableRegister = AllocateAndMarkTempGprRegister();
}
_tempGprsMask = (1u << FixedContextRegister) | (1u << FixedPageTableRegister);
_registerMap = new int[32];
for (int index = 0; index < _registerMap.Length; index++)
{
_registerMap[index] = index;
}
BuildRegisterMap(_registerMap);
Span<int> tempRegisters = stackalloc int[MaxTemps];
for (int index = 0; index < tempRegisters.Length; index++)
{
tempRegisters[index] = AllocateAndMarkTempGprRegister();
}
for (int index = 0; index < tempRegisters.Length; index++)
{
FreeTempGprRegister(tempRegisters[index]);
}
}
private void BuildRegisterMap(Span<int> map)
{
uint mask = _gprMask & RegisterUtils.ReservedRegsMask;
while (mask != 0)
{
int index = BitOperations.TrailingZeroCount(mask);
int remapIndex = AllocateAndMarkTempGprRegister();
map[index] = remapIndex;
_tempGprsMask |= 1u << remapIndex;
mask &= ~(1u << index);
}
}
public int RemapReservedGprRegister(int index)
{
return _registerMap[index];
}
private int AllocateAndMarkTempGprRegister()
{
int index = AllocateTempGprRegister();
_tempGprsMask |= 1u << index;
return index;
}
private int AllocateAndMarkTempGprRegisterWithPreferencing()
{
int index = AllocateTempRegisterWithPreferencing();
_tempGprsMask |= 1u << index;
return index;
}
public int AllocateTempGprRegister()
{
return AllocateTempRegister(ref _gprMask);
}
public void FreeTempGprRegister(int index)
{
FreeTempRegister(ref _gprMask, index);
}
private int AllocateTempRegisterWithPreferencing()
{
int firstCalleeSaved = BitOperations.TrailingZeroCount(~_gprMask & AbiConstants.GprCalleeSavedRegsMask);
if (firstCalleeSaved < 32)
{
uint regMask = 1u << firstCalleeSaved;
if ((regMask & RegisterUtils.ReservedRegsMask) == 0)
{
_gprMask |= regMask;
return firstCalleeSaved;
}
}
return AllocateTempRegister(ref _gprMask);
}
private static int AllocateTempRegister(ref uint mask)
{
int index = BitOperations.TrailingZeroCount(~(mask | RegisterUtils.ReservedRegsMask));
if (index == sizeof(uint) * 8)
{
throw new InvalidOperationException("No free registers.");
}
mask |= 1u << index;
return index;
}
private static void FreeTempRegister(ref uint mask, int index)
{
mask &= ~(1u << index);
}
}
}

View File

@@ -0,0 +1,495 @@
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm64
{
static class RegisterUtils
{
private const int RdRtBit = 0;
private const int RnBit = 5;
private const int RmRsBit = 16;
private const int RaRt2Bit = 10;
// Some of those register have specific roles and can't be used as general purpose registers.
// X18 - Reserved for platform specific usage.
// X29 - Frame pointer.
// X30 - Return address.
// X31 - Not an actual register, in some cases maps to SP, and in others to ZR.
public const uint ReservedRegsMask = (1u << 18) | (1u << 29) | (1u << 30) | (1u << 31);
public const int LrIndex = 30;
public const int SpIndex = 31;
public const int ZrIndex = 31;
public const int SpecialZrIndex = 32;
public static uint RemapRegisters(RegisterAllocator regAlloc, InstFlags flags, uint encoding)
{
if (flags.HasFlag(InstFlags.Rd) && (!flags.HasFlag(InstFlags.FpSimd) || IsFpToGpr(flags, encoding)))
{
encoding = ReplaceGprRegister(regAlloc, encoding, RdRtBit, flags.HasFlag(InstFlags.RdSP));
}
if (flags.HasFlag(InstFlags.Rn) && (!flags.HasFlag(InstFlags.FpSimd) || IsFpFromGpr(flags, encoding) || flags.HasFlag(InstFlags.Memory)))
{
encoding = ReplaceGprRegister(regAlloc, encoding, RnBit, flags.HasFlag(InstFlags.RnSP));
}
if (!flags.HasFlag(InstFlags.FpSimd))
{
if (flags.HasFlag(InstFlags.Rm) || flags.HasFlag(InstFlags.Rs))
{
encoding = ReplaceGprRegister(regAlloc, encoding, RmRsBit);
}
if (flags.HasFlag(InstFlags.Ra) || flags.HasFlag(InstFlags.Rt2))
{
encoding = ReplaceGprRegister(regAlloc, encoding, RaRt2Bit);
}
if (flags.HasFlag(InstFlags.Rt))
{
encoding = ReplaceGprRegister(regAlloc, encoding, RdRtBit);
}
}
else if (flags.HasFlag(InstFlags.Rm) && flags.HasFlag(InstFlags.Memory))
{
encoding = ReplaceGprRegister(regAlloc, encoding, RmRsBit);
}
return encoding;
}
public static uint ReplaceRt(uint encoding, int newIndex)
{
return ReplaceRegister(encoding, newIndex, RdRtBit);
}
public static uint ReplaceRn(uint encoding, int newIndex)
{
return ReplaceRegister(encoding, newIndex, RnBit);
}
private static uint ReplaceRegister(uint encoding, int newIndex, int bit)
{
encoding &= ~(0x1fu << bit);
encoding |= (uint)newIndex << bit;
return encoding;
}
private static uint ReplaceGprRegister(RegisterAllocator regAlloc, uint encoding, int bit, bool hasSP = false)
{
int oldIndex = (int)(encoding >> bit) & 0x1f;
if (oldIndex == ZrIndex && !hasSP)
{
return encoding;
}
int newIndex = regAlloc.RemapReservedGprRegister(oldIndex);
encoding &= ~(0x1fu << bit);
encoding |= (uint)newIndex << bit;
return encoding;
}
public static (uint, uint) PopulateReadMasks(InstName name, InstFlags flags, uint encoding)
{
uint gprMask = 0;
uint fpSimdMask = 0;
if (flags.HasFlag(InstFlags.FpSimd))
{
if (flags.HasFlag(InstFlags.Rd) && flags.HasFlag(InstFlags.ReadRd))
{
uint mask = MaskFromIndex(ExtractRd(flags, encoding));
if (IsFpToGpr(flags, encoding))
{
gprMask |= mask;
}
else
{
fpSimdMask |= mask;
}
}
if (flags.HasFlag(InstFlags.Rn))
{
uint mask = MaskFromIndex(ExtractRn(flags, encoding));
if (flags.HasFlag(InstFlags.RnSeq))
{
int count = GetRnSequenceCount(encoding);
for (int index = 0; index < count; index++, mask <<= 1)
{
fpSimdMask |= mask;
}
}
else if (IsFpFromGpr(flags, encoding) || flags.HasFlag(InstFlags.Memory))
{
gprMask |= mask;
}
else
{
fpSimdMask |= mask;
}
}
if (flags.HasFlag(InstFlags.Rm))
{
uint mask = MaskFromIndex(ExtractRm(flags, encoding));
if (flags.HasFlag(InstFlags.Memory))
{
gprMask |= mask;
}
else
{
fpSimdMask |= mask;
}
}
if (flags.HasFlag(InstFlags.Ra))
{
fpSimdMask |= MaskFromIndex(ExtractRa(flags, encoding));
}
if (flags.HasFlag(InstFlags.ReadRt))
{
if (flags.HasFlag(InstFlags.Rt))
{
uint mask = MaskFromIndex(ExtractRt(flags, encoding));
if (flags.HasFlag(InstFlags.RtSeq))
{
int count = GetRtSequenceCount(name, encoding);
for (int index = 0; index < count; index++, mask <<= 1)
{
fpSimdMask |= mask;
}
}
else
{
fpSimdMask |= mask;
}
}
if (flags.HasFlag(InstFlags.Rt2))
{
fpSimdMask |= MaskFromIndex(ExtractRt2(flags, encoding));
}
}
}
else
{
if (flags.HasFlag(InstFlags.Rd) && flags.HasFlag(InstFlags.ReadRd))
{
gprMask |= MaskFromIndex(ExtractRd(flags, encoding));
}
if (flags.HasFlag(InstFlags.Rn))
{
gprMask |= MaskFromIndex(ExtractRn(flags, encoding));
}
if (flags.HasFlag(InstFlags.Rm))
{
gprMask |= MaskFromIndex(ExtractRm(flags, encoding));
}
if (flags.HasFlag(InstFlags.Ra))
{
gprMask |= MaskFromIndex(ExtractRa(flags, encoding));
}
if (flags.HasFlag(InstFlags.ReadRt))
{
if (flags.HasFlag(InstFlags.Rt))
{
gprMask |= MaskFromIndex(ExtractRt(flags, encoding));
}
if (flags.HasFlag(InstFlags.Rt2))
{
gprMask |= MaskFromIndex(ExtractRt2(flags, encoding));
}
}
}
return (gprMask, fpSimdMask);
}
public static (uint, uint) PopulateWriteMasks(InstName name, InstFlags flags, uint encoding)
{
uint gprMask = 0;
uint fpSimdMask = 0;
if (flags.HasFlag(InstFlags.MemWBack))
{
gprMask |= MaskFromIndex(ExtractRn(flags, encoding));
}
if (flags.HasFlag(InstFlags.FpSimd))
{
if (flags.HasFlag(InstFlags.Rd))
{
uint mask = MaskFromIndex(ExtractRd(flags, encoding));
if (IsFpToGpr(flags, encoding))
{
gprMask |= mask;
}
else
{
fpSimdMask |= mask;
}
}
if (!flags.HasFlag(InstFlags.ReadRt))
{
if (flags.HasFlag(InstFlags.Rt))
{
uint mask = MaskFromIndex(ExtractRt(flags, encoding));
if (flags.HasFlag(InstFlags.RtSeq))
{
int count = GetRtSequenceCount(name, encoding);
for (int index = 0; index < count; index++, mask <<= 1)
{
fpSimdMask |= mask;
}
}
else
{
fpSimdMask |= mask;
}
}
if (flags.HasFlag(InstFlags.Rt2))
{
fpSimdMask |= MaskFromIndex(ExtractRt2(flags, encoding));
}
}
}
else
{
if (flags.HasFlag(InstFlags.Rd))
{
gprMask |= MaskFromIndex(ExtractRd(flags, encoding));
}
if (!flags.HasFlag(InstFlags.ReadRt))
{
if (flags.HasFlag(InstFlags.Rt))
{
gprMask |= MaskFromIndex(ExtractRt(flags, encoding));
}
if (flags.HasFlag(InstFlags.Rt2))
{
gprMask |= MaskFromIndex(ExtractRt2(flags, encoding));
}
}
if (flags.HasFlag(InstFlags.Rs))
{
gprMask |= MaskFromIndex(ExtractRs(flags, encoding));
}
}
return (gprMask, fpSimdMask);
}
private static uint MaskFromIndex(int index)
{
if (index < SpecialZrIndex)
{
return 1u << index;
}
return 0u;
}
private static bool IsFpFromGpr(InstFlags flags, uint encoding)
{
InstFlags bothFlags = InstFlags.FpSimdFromGpr | InstFlags.FpSimdToGpr;
if ((flags & bothFlags) == bothFlags) // FMOV (general)
{
return (encoding & (1u << 16)) != 0;
}
return flags.HasFlag(InstFlags.FpSimdFromGpr);
}
private static bool IsFpToGpr(InstFlags flags, uint encoding)
{
InstFlags bothFlags = InstFlags.FpSimdFromGpr | InstFlags.FpSimdToGpr;
if ((flags & bothFlags) == bothFlags) // FMOV (general)
{
return (encoding & (1u << 16)) == 0;
}
return flags.HasFlag(InstFlags.FpSimdToGpr);
}
private static int GetRtSequenceCount(InstName name, uint encoding)
{
switch (name)
{
case InstName.Ld1AdvsimdMultAsNoPostIndex:
case InstName.Ld1AdvsimdMultAsPostIndex:
case InstName.St1AdvsimdMultAsNoPostIndex:
case InstName.St1AdvsimdMultAsPostIndex:
return ((encoding >> 12) & 0xf) switch
{
0b0000 => 4,
0b0010 => 4,
0b0100 => 3,
0b0110 => 3,
0b0111 => 1,
0b1000 => 2,
0b1010 => 2,
_ => 1,
};
case InstName.Ld1rAdvsimdAsNoPostIndex:
case InstName.Ld1rAdvsimdAsPostIndex:
case InstName.Ld1AdvsimdSnglAsNoPostIndex:
case InstName.Ld1AdvsimdSnglAsPostIndex:
case InstName.St1AdvsimdSnglAsNoPostIndex:
case InstName.St1AdvsimdSnglAsPostIndex:
return 1;
case InstName.Ld2rAdvsimdAsNoPostIndex:
case InstName.Ld2rAdvsimdAsPostIndex:
case InstName.Ld2AdvsimdMultAsNoPostIndex:
case InstName.Ld2AdvsimdMultAsPostIndex:
case InstName.Ld2AdvsimdSnglAsNoPostIndex:
case InstName.Ld2AdvsimdSnglAsPostIndex:
case InstName.St2AdvsimdMultAsNoPostIndex:
case InstName.St2AdvsimdMultAsPostIndex:
case InstName.St2AdvsimdSnglAsNoPostIndex:
case InstName.St2AdvsimdSnglAsPostIndex:
return 2;
case InstName.Ld3rAdvsimdAsNoPostIndex:
case InstName.Ld3rAdvsimdAsPostIndex:
case InstName.Ld3AdvsimdMultAsNoPostIndex:
case InstName.Ld3AdvsimdMultAsPostIndex:
case InstName.Ld3AdvsimdSnglAsNoPostIndex:
case InstName.Ld3AdvsimdSnglAsPostIndex:
case InstName.St3AdvsimdMultAsNoPostIndex:
case InstName.St3AdvsimdMultAsPostIndex:
case InstName.St3AdvsimdSnglAsNoPostIndex:
case InstName.St3AdvsimdSnglAsPostIndex:
return 3;
case InstName.Ld4rAdvsimdAsNoPostIndex:
case InstName.Ld4rAdvsimdAsPostIndex:
case InstName.Ld4AdvsimdMultAsNoPostIndex:
case InstName.Ld4AdvsimdMultAsPostIndex:
case InstName.Ld4AdvsimdSnglAsNoPostIndex:
case InstName.Ld4AdvsimdSnglAsPostIndex:
case InstName.St4AdvsimdMultAsNoPostIndex:
case InstName.St4AdvsimdMultAsPostIndex:
case InstName.St4AdvsimdSnglAsNoPostIndex:
case InstName.St4AdvsimdSnglAsPostIndex:
return 4;
}
return 1;
}
private static int GetRnSequenceCount(uint encoding)
{
return ((int)(encoding >> 13) & 3) + 1;
}
public static int ExtractRd(InstFlags flags, uint encoding)
{
Debug.Assert(flags.HasFlag(InstFlags.Rd));
int index = (int)(encoding >> RdRtBit) & 0x1f;
if (!flags.HasFlag(InstFlags.RdSP) && index == ZrIndex)
{
return SpecialZrIndex;
}
return index;
}
public static int ExtractRn(uint encoding)
{
return (int)(encoding >> RnBit) & 0x1f;
}
public static int ExtractRn(InstFlags flags, uint encoding)
{
Debug.Assert(flags.HasFlag(InstFlags.Rn));
int index = ExtractRn(encoding);
if (!flags.HasFlag(InstFlags.RnSP) && index == ZrIndex)
{
return SpecialZrIndex;
}
return index;
}
public static int ExtractRm(uint encoding)
{
return (int)(encoding >> RmRsBit) & 0x1f;
}
public static int ExtractRm(InstFlags flags, uint encoding)
{
Debug.Assert(flags.HasFlag(InstFlags.Rm));
int index = ExtractRm(encoding);
return index == ZrIndex ? SpecialZrIndex : index;
}
public static int ExtractRs(uint encoding)
{
return (int)(encoding >> RmRsBit) & 0x1f;
}
public static int ExtractRs(InstFlags flags, uint encoding)
{
Debug.Assert(flags.HasFlag(InstFlags.Rs));
int index = ExtractRs(encoding);
return index == ZrIndex ? SpecialZrIndex : index;
}
public static int ExtractRa(InstFlags flags, uint encoding)
{
Debug.Assert(flags.HasFlag(InstFlags.Ra));
int index = (int)(encoding >> RaRt2Bit) & 0x1f;
return index == ZrIndex ? SpecialZrIndex : index;
}
public static int ExtractRt(uint encoding)
{
return (int)(encoding >> RdRtBit) & 0x1f;
}
public static int ExtractRt(InstFlags flags, uint encoding)
{
Debug.Assert(flags.HasFlag(InstFlags.Rt));
int index = ExtractRt(encoding);
return index == ZrIndex ? SpecialZrIndex : index;
}
public static int ExtractRt2(InstFlags flags, uint encoding)
{
Debug.Assert(flags.HasFlag(InstFlags.Rt2));
int index = (int)(encoding >> RaRt2Bit) & 0x1f;
return index == ZrIndex ? SpecialZrIndex : index;
}
}
}

View File

@@ -0,0 +1,743 @@
using ARMeilleure.Common;
using ARMeilleure.Memory;
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using Ryujinx.Cpu.LightningJit.Graph;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Numerics;
namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64
{
static class Compiler
{
private const int Encodable26BitsOffsetLimit = 0x2000000;
private readonly struct Context
{
public readonly CodeWriter Writer;
public readonly RegisterAllocator RegisterAllocator;
public readonly TailMerger TailMerger;
public readonly AddressTable<ulong> FuncTable;
public readonly IntPtr DispatchStubPointer;
private readonly MultiBlock _multiBlock;
private readonly RegisterSaveRestore _registerSaveRestore;
private readonly IntPtr _pageTablePointer;
public Context(
CodeWriter writer,
RegisterAllocator registerAllocator,
TailMerger tailMerger,
RegisterSaveRestore registerSaveRestore,
MultiBlock multiBlock,
AddressTable<ulong> funcTable,
IntPtr dispatchStubPointer,
IntPtr pageTablePointer)
{
Writer = writer;
RegisterAllocator = registerAllocator;
TailMerger = tailMerger;
_registerSaveRestore = registerSaveRestore;
_multiBlock = multiBlock;
FuncTable = funcTable;
DispatchStubPointer = dispatchStubPointer;
_pageTablePointer = pageTablePointer;
}
public readonly int GetLrRegisterIndex()
{
return RemapGprRegister(RegisterUtils.LrIndex);
}
public readonly int RemapGprRegister(int index)
{
return RegisterAllocator.RemapReservedGprRegister(index);
}
public readonly int GetReservedStackOffset()
{
return _registerSaveRestore.GetReservedStackOffset();
}
public readonly void WritePrologue()
{
Assembler asm = new(Writer);
_registerSaveRestore.WritePrologue(ref asm);
// If needed, set up the fixed registers with the pointers we will use.
// First one is the context pointer (passed as first argument),
// second one is the page table or address space base, it is at a fixed memory location and considered constant.
if (RegisterAllocator.FixedContextRegister != 0)
{
asm.Mov(Register(RegisterAllocator.FixedContextRegister), Register(0));
}
if (_multiBlock.HasMemoryInstruction)
{
asm.Mov(Register(RegisterAllocator.FixedPageTableRegister), (ulong)_pageTablePointer);
}
// This assumes that the block with the index 0 is always the entry block.
LoadFromContext(ref asm, _multiBlock.ReadMasks[0]);
}
public readonly void WriteEpilogueWithoutContext()
{
Assembler asm = new(Writer);
_registerSaveRestore.WriteEpilogue(ref asm);
}
public void LoadFromContextAfterCall(int blockIndex)
{
Block block = _multiBlock.Blocks[blockIndex];
if (block.SuccessorsCount != 0)
{
Assembler asm = new(Writer);
RegisterMask readMask = _multiBlock.ReadMasks[block.GetSuccessor(0).Index];
for (int sIndex = 1; sIndex < block.SuccessorsCount; sIndex++)
{
IBlock successor = block.GetSuccessor(sIndex);
readMask |= _multiBlock.ReadMasks[successor.Index];
}
LoadFromContext(ref asm, readMask);
}
}
private void LoadFromContext(ref Assembler asm, RegisterMask readMask)
{
LoadGprFromContext(ref asm, readMask.GprMask, NativeContextOffsets.GprBaseOffset);
LoadFpSimdFromContext(ref asm, readMask.FpSimdMask, NativeContextOffsets.FpSimdBaseOffset);
LoadPStateFromContext(ref asm, readMask.PStateMask, NativeContextOffsets.FlagsBaseOffset);
}
public void StoreToContextBeforeCall(int blockIndex, ulong? newLrValue = null)
{
Assembler asm = new(Writer);
StoreToContext(ref asm, _multiBlock.WriteMasks[blockIndex], newLrValue);
}
private void StoreToContext(ref Assembler asm, RegisterMask writeMask, ulong? newLrValue)
{
StoreGprToContext(ref asm, writeMask.GprMask, NativeContextOffsets.GprBaseOffset, newLrValue);
StoreFpSimdToContext(ref asm, writeMask.FpSimdMask, NativeContextOffsets.FpSimdBaseOffset);
StorePStateToContext(ref asm, writeMask.PStateMask, NativeContextOffsets.FlagsBaseOffset);
}
private void LoadGprFromContext(ref Assembler asm, uint mask, int baseOffset)
{
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
while (mask != 0)
{
int reg = BitOperations.TrailingZeroCount(mask);
int offset = baseOffset + reg * 8;
if (reg < 31 && (mask & (2u << reg)) != 0 && offset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
{
mask &= ~(3u << reg);
asm.LdpRiUn(
Register(RegisterAllocator.RemapReservedGprRegister(reg)),
Register(RegisterAllocator.RemapReservedGprRegister(reg + 1)),
contextPtr,
offset);
}
else
{
mask &= ~(1u << reg);
asm.LdrRiUn(Register(RegisterAllocator.RemapReservedGprRegister(reg)), contextPtr, offset);
}
}
}
private void LoadFpSimdFromContext(ref Assembler asm, uint mask, int baseOffset)
{
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
while (mask != 0)
{
int reg = BitOperations.TrailingZeroCount(mask);
int offset = baseOffset + reg * 16;
mask &= ~(1u << reg);
asm.LdrRiUn(Register(reg, OperandType.V128), contextPtr, offset);
}
}
private void LoadPStateFromContext(ref Assembler asm, uint mask, int baseOffset)
{
if (mask == 0)
{
return;
}
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
int tempRegister = RegisterAllocator.AllocateTempGprRegister();
Operand rt = Register(tempRegister, OperandType.I32);
asm.LdrRiUn(rt, contextPtr, baseOffset);
asm.MsrNzcv(rt);
RegisterAllocator.FreeTempGprRegister(tempRegister);
}
private void StoreGprToContext(ref Assembler asm, uint mask, int baseOffset, ulong? newLrValue)
{
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
int tempRegister = -1;
if (newLrValue.HasValue)
{
// This is required for BLR X30 instructions, where we need to get the target address
// before it is overwritten with the return address that the call would write there.
tempRegister = RegisterAllocator.AllocateTempGprRegister();
asm.Mov(Register(tempRegister), newLrValue.Value);
}
while (mask != 0)
{
int reg = BitOperations.TrailingZeroCount(mask);
int offset = baseOffset + reg * 8;
if (reg < 31 && (mask & (2u << reg)) != 0 && offset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
{
mask &= ~(3u << reg);
asm.StpRiUn(
Register(RemapReservedGprRegister(reg, tempRegister)),
Register(RemapReservedGprRegister(reg + 1, tempRegister)),
contextPtr,
offset);
}
else
{
mask &= ~(1u << reg);
asm.StrRiUn(Register(RemapReservedGprRegister(reg, tempRegister)), contextPtr, offset);
}
}
if (tempRegister >= 0)
{
RegisterAllocator.FreeTempGprRegister(tempRegister);
}
}
private int RemapReservedGprRegister(int index, int tempRegister)
{
if (tempRegister >= 0 && index == RegisterUtils.LrIndex)
{
return tempRegister;
}
return RegisterAllocator.RemapReservedGprRegister(index);
}
private void StoreFpSimdToContext(ref Assembler asm, uint mask, int baseOffset)
{
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
while (mask != 0)
{
int reg = BitOperations.TrailingZeroCount(mask);
int offset = baseOffset + reg * 16;
mask &= ~(1u << reg);
asm.StrRiUn(Register(reg, OperandType.V128), contextPtr, offset);
}
}
private void StorePStateToContext(ref Assembler asm, uint mask, int baseOffset)
{
if (mask == 0)
{
return;
}
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
int tempRegister = RegisterAllocator.AllocateTempGprRegister();
Operand rt = Register(tempRegister, OperandType.I32);
asm.MrsNzcv(rt);
asm.StrRiUn(rt, contextPtr, baseOffset);
RegisterAllocator.FreeTempGprRegister(tempRegister);
}
}
private readonly struct PendingBranch
{
public readonly int BlockIndex;
public readonly ulong Pc;
public readonly InstName Name;
public readonly uint Encoding;
public readonly int WriterPointer;
public PendingBranch(int blockIndex, ulong pc, InstName name, uint encoding, int writerPointer)
{
BlockIndex = blockIndex;
Pc = pc;
Name = name;
Encoding = encoding;
WriterPointer = writerPointer;
}
}
public static CompiledFunction Compile(CpuPreset cpuPreset, IMemoryManager memoryManager, ulong address, AddressTable<ulong> funcTable, IntPtr dispatchStubPtr)
{
MultiBlock multiBlock = Decoder.DecodeMulti(cpuPreset, memoryManager, address);
Dictionary<ulong, int> targets = new();
List<PendingBranch> pendingBranches = new();
uint gprUseMask = multiBlock.GlobalUseMask.GprMask;
uint fpSimdUseMask = multiBlock.GlobalUseMask.FpSimdMask;
uint pStateUseMask = multiBlock.GlobalUseMask.PStateMask;
CodeWriter writer = new();
RegisterAllocator regAlloc = new(gprUseMask, fpSimdUseMask, pStateUseMask, multiBlock.HasHostCall);
RegisterSaveRestore rsr = new(
regAlloc.AllGprMask & AbiConstants.GprCalleeSavedRegsMask,
regAlloc.AllFpSimdMask & AbiConstants.FpSimdCalleeSavedRegsMask,
OperandType.FP64,
multiBlock.HasHostCall,
multiBlock.HasHostCall ? CalculateStackSizeForCallSpill(regAlloc.AllGprMask, regAlloc.AllFpSimdMask, regAlloc.AllPStateMask) : 0);
TailMerger tailMerger = new();
Context context = new(writer, regAlloc, tailMerger, rsr, multiBlock, funcTable, dispatchStubPtr, memoryManager.PageTablePointer);
context.WritePrologue();
ulong pc = address;
for (int blockIndex = 0; blockIndex < multiBlock.Blocks.Count; blockIndex++)
{
Block block = multiBlock.Blocks[blockIndex];
Debug.Assert(block.Address == pc);
targets.Add(pc, writer.InstructionPointer);
int instCount = block.EndsWithBranch ? block.Instructions.Count - 1 : block.Instructions.Count;
for (int index = 0; index < instCount; index++)
{
InstInfo instInfo = block.Instructions[index];
uint encoding = RegisterUtils.RemapRegisters(regAlloc, instInfo.Flags, instInfo.Encoding);
if (instInfo.AddressForm != AddressForm.None)
{
InstEmitMemory.RewriteInstruction(
memoryManager.AddressSpaceBits,
memoryManager.Type,
writer,
regAlloc,
instInfo.Name,
instInfo.Flags,
instInfo.AddressForm,
pc,
encoding);
}
else if (instInfo.Name == InstName.Sys)
{
InstEmitMemory.RewriteSysInstruction(memoryManager.AddressSpaceBits, memoryManager.Type, writer, regAlloc, encoding);
}
else if (instInfo.Name.IsSystem())
{
bool needsContextStoreLoad = InstEmitSystem.NeedsContextStoreLoad(instInfo.Name);
if (needsContextStoreLoad)
{
context.StoreToContextBeforeCall(blockIndex);
}
InstEmitSystem.RewriteInstruction(writer, regAlloc, tailMerger, instInfo.Name, pc, encoding, rsr.GetReservedStackOffset());
if (needsContextStoreLoad)
{
context.LoadFromContextAfterCall(blockIndex);
}
}
else
{
writer.WriteInstruction(encoding);
}
pc += 4UL;
}
if (block.IsLoopEnd)
{
// If this is a loop, the code might run for a long time uninterrupted.
// We insert a "sync point" here to ensure the loop can be interrupted if needed.
InstEmitSystem.WriteSyncPoint(writer, context.RegisterAllocator, tailMerger, context.GetReservedStackOffset());
}
if (blockIndex < multiBlock.Blocks.Count - 1)
{
InstInfo lastInstructionInfo = block.Instructions[^1];
InstName lastInstructionName = lastInstructionInfo.Name;
InstFlags lastInstructionFlags = lastInstructionInfo.Flags;
uint lastInstructionEncoding = lastInstructionInfo.Encoding;
lastInstructionEncoding = RegisterUtils.RemapRegisters(regAlloc, lastInstructionFlags, lastInstructionEncoding);
if (lastInstructionName.IsCall())
{
context.StoreToContextBeforeCall(blockIndex, pc + 4UL);
InstEmitSystem.RewriteCallInstruction(
writer,
regAlloc,
tailMerger,
context.WriteEpilogueWithoutContext,
funcTable,
dispatchStubPtr,
lastInstructionName,
pc,
lastInstructionEncoding,
context.GetReservedStackOffset());
context.LoadFromContextAfterCall(blockIndex);
pc += 4UL;
}
else if (lastInstructionName == InstName.Ret)
{
RewriteBranchInstruction(context, blockIndex, lastInstructionName, pc, lastInstructionEncoding);
pc += 4UL;
}
else if (block.EndsWithBranch)
{
pendingBranches.Add(new(blockIndex, pc, lastInstructionName, lastInstructionEncoding, writer.InstructionPointer));
writer.WriteInstruction(0u); // Placeholder.
pc += 4UL;
}
}
}
int lastBlockIndex = multiBlock.Blocks[^1].Index;
if (multiBlock.IsTruncated)
{
Assembler asm = new(writer);
WriteTailCallConstant(context, ref asm, lastBlockIndex, pc);
}
else
{
InstInfo lastInstructionInfo = multiBlock.Blocks[^1].Instructions[^1];
InstName lastInstructionName = lastInstructionInfo.Name;
InstFlags lastInstructionFlags = lastInstructionInfo.Flags;
uint lastInstructionEncoding = lastInstructionInfo.Encoding;
lastInstructionEncoding = RegisterUtils.RemapRegisters(regAlloc, lastInstructionFlags, lastInstructionEncoding);
RewriteBranchInstruction(context, lastBlockIndex, lastInstructionName, pc, lastInstructionEncoding);
pc += 4;
}
foreach (PendingBranch pendingBranch in pendingBranches)
{
RewriteBranchInstructionWithTarget(
context,
pendingBranch.BlockIndex,
pendingBranch.Name,
pendingBranch.Pc,
pendingBranch.Encoding,
pendingBranch.WriterPointer,
targets);
}
tailMerger.WriteReturn(writer, context.WriteEpilogueWithoutContext);
return new(writer.AsByteSpan(), (int)(pc - address));
}
private static int CalculateStackSizeForCallSpill(uint gprUseMask, uint fpSimdUseMask, uint pStateUseMask)
{
// Note that we don't discard callee saved FP/SIMD register because only the lower 64 bits is callee saved,
// so if the function is using the full register, that won't be enough.
// We could do better, but it's likely not worth it since this case happens very rarely in practice.
return BitOperations.PopCount(gprUseMask & ~AbiConstants.GprCalleeSavedRegsMask) * 8 +
BitOperations.PopCount(fpSimdUseMask) * 16 +
(pStateUseMask != 0 ? 8 : 0);
}
private static void RewriteBranchInstruction(in Context context, int blockIndex, InstName name, ulong pc, uint encoding)
{
CodeWriter writer = context.Writer;
Assembler asm = new(writer);
int originalOffset;
ulong nextAddress = pc + 4UL;
ulong targetAddress;
switch (name)
{
case InstName.BUncond:
originalOffset = ImmUtils.ExtractSImm26Times4(encoding);
targetAddress = pc + (ulong)originalOffset;
WriteTailCallConstant(context, ref asm, blockIndex, targetAddress);
break;
case InstName.Bl:
case InstName.Blr:
case InstName.Br:
if (name == InstName.Bl)
{
asm.Mov(Register(context.GetLrRegisterIndex()), nextAddress);
int imm = ImmUtils.ExtractSImm26Times4(encoding);
WriteTailCallConstant(context, ref asm, blockIndex, pc + (ulong)imm);
}
else
{
bool isCall = name == InstName.Blr;
if (isCall)
{
context.StoreToContextBeforeCall(blockIndex, nextAddress);
}
else
{
context.StoreToContextBeforeCall(blockIndex);
}
InstEmitSystem.RewriteCallInstruction(
context.Writer,
context.RegisterAllocator,
context.TailMerger,
context.WriteEpilogueWithoutContext,
context.FuncTable,
context.DispatchStubPointer,
name,
pc,
encoding,
context.GetReservedStackOffset(),
isTail: true);
}
break;
case InstName.Ret:
int rnIndex = RegisterUtils.ExtractRn(encoding);
if (rnIndex == RegisterUtils.ZrIndex)
{
WriteTailCallConstant(context, ref asm, blockIndex, 0UL);
}
else
{
rnIndex = context.RemapGprRegister(rnIndex);
context.StoreToContextBeforeCall(blockIndex);
if (rnIndex != 0)
{
asm.Mov(Register(0), Register(rnIndex));
}
context.TailMerger.AddUnconditionalReturn(writer, asm);
}
break;
case InstName.BCond:
case InstName.Cbnz:
case InstName.Cbz:
case InstName.Tbnz:
case InstName.Tbz:
uint branchMask;
if (name == InstName.Tbnz || name == InstName.Tbz)
{
originalOffset = ImmUtils.ExtractSImm14Times4(encoding);
branchMask = 0x3fff;
}
else
{
originalOffset = ImmUtils.ExtractSImm19Times4(encoding);
branchMask = 0x7ffff;
}
targetAddress = pc + (ulong)originalOffset;
int branchIndex = writer.InstructionPointer;
writer.WriteInstruction(0u); // Reserved for branch.
WriteTailCallConstant(context, ref asm, blockIndex, nextAddress);
int targetIndex = writer.InstructionPointer;
writer.WriteInstructionAt(branchIndex, (encoding & ~(branchMask << 5)) | (uint)(((targetIndex - branchIndex) & branchMask) << 5));
WriteTailCallConstant(context, ref asm, blockIndex, targetAddress);
break;
default:
Debug.Fail($"Unknown branch instruction \"{name}\".");
break;
}
}
private static void RewriteBranchInstructionWithTarget(
in Context context,
int blockIndex,
InstName name,
ulong pc,
uint encoding,
int branchIndex,
Dictionary<ulong, int> targets)
{
CodeWriter writer = context.Writer;
Assembler asm = new(writer);
int delta;
int targetIndex;
int originalOffset;
ulong targetAddress;
switch (name)
{
case InstName.BUncond:
originalOffset = ImmUtils.ExtractSImm26Times4(encoding);
targetAddress = pc + (ulong)originalOffset;
if (targets.TryGetValue(targetAddress, out targetIndex))
{
delta = targetIndex - branchIndex;
if (delta >= -Encodable26BitsOffsetLimit && delta < Encodable26BitsOffsetLimit)
{
writer.WriteInstructionAt(branchIndex, (encoding & ~0x3ffffffu) | (uint)(delta & 0x3ffffff));
break;
}
}
targetIndex = writer.InstructionPointer;
delta = targetIndex - branchIndex;
writer.WriteInstructionAt(branchIndex, (encoding & ~0x3ffffffu) | (uint)(delta & 0x3ffffff));
WriteTailCallConstant(context, ref asm, blockIndex, targetAddress);
break;
case InstName.BCond:
case InstName.Cbnz:
case InstName.Cbz:
case InstName.Tbnz:
case InstName.Tbz:
uint branchMask;
if (name == InstName.Tbnz || name == InstName.Tbz)
{
originalOffset = ImmUtils.ExtractSImm14Times4(encoding);
branchMask = 0x3fff;
}
else
{
originalOffset = ImmUtils.ExtractSImm19Times4(encoding);
branchMask = 0x7ffff;
}
int branchMax = (int)(branchMask + 1) / 2;
targetAddress = pc + (ulong)originalOffset;
if (targets.TryGetValue(targetAddress, out targetIndex))
{
delta = targetIndex - branchIndex;
if (delta >= -branchMax && delta < branchMax)
{
writer.WriteInstructionAt(branchIndex, (encoding & ~(branchMask << 5)) | (uint)((delta & branchMask) << 5));
break;
}
}
targetIndex = writer.InstructionPointer;
delta = targetIndex - branchIndex;
if (delta >= -branchMax && delta < branchMax)
{
writer.WriteInstructionAt(branchIndex, (encoding & ~(branchMask << 5)) | (uint)((delta & branchMask) << 5));
WriteTailCallConstant(context, ref asm, blockIndex, targetAddress);
}
else
{
// If the branch target is too far away, we use a regular unconditional branch
// instruction instead which has a much higher range.
// We branch directly to the end of the function, where we put the conditional branch,
// and then branch back to the next instruction or return the branch target depending
// on the branch being taken or not.
uint branchInst = 0x14000000u | ((uint)delta & 0x3ffffff);
Debug.Assert(ImmUtils.ExtractSImm26Times4(branchInst) == delta * 4);
writer.WriteInstructionAt(branchIndex, branchInst);
int movedBranchIndex = writer.InstructionPointer;
writer.WriteInstruction(0u); // Placeholder
asm.B((branchIndex + 1 - writer.InstructionPointer) * 4);
delta = writer.InstructionPointer - movedBranchIndex;
writer.WriteInstructionAt(movedBranchIndex, (encoding & ~(branchMask << 5)) | (uint)((delta & branchMask) << 5));
WriteTailCallConstant(context, ref asm, blockIndex, targetAddress);
}
break;
default:
Debug.Fail($"Unknown branch instruction \"{name}\".");
break;
}
}
private static void WriteTailCallConstant(in Context context, ref Assembler asm, int blockIndex, ulong address)
{
context.StoreToContextBeforeCall(blockIndex);
InstEmitSystem.WriteCallWithGuestAddress(
context.Writer,
ref asm,
context.RegisterAllocator,
context.TailMerger,
context.WriteEpilogueWithoutContext,
context.FuncTable,
context.DispatchStubPointer,
context.GetReservedStackOffset(),
0UL,
new Operand(OperandKind.Constant, OperandType.I64, address),
isTail: true);
}
private static Operand Register(int register, OperandType type = OperandType.I64)
{
return new Operand(register, RegisterType.Integer, type);
}
}
}

View File

@@ -0,0 +1,384 @@
using ARMeilleure.Memory;
using Ryujinx.Cpu.LightningJit.Graph;
using System.Collections.Generic;
using System.Diagnostics;
using System.Numerics;
namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64
{
static class Decoder
{
private const int MaxInstructionsPerBlock = 1000;
private const uint NzcvFlags = 0xfu << 28;
private const uint CFlag = 0x1u << 29;
public static MultiBlock DecodeMulti(CpuPreset cpuPreset, IMemoryManager memoryManager, ulong address)
{
List<Block> blocks = new();
List<ulong> branchTargets = new();
RegisterMask useMask = RegisterMask.Zero;
bool hasHostCall = false;
bool hasMemoryInstruction = false;
while (true)
{
Block block = Decode(cpuPreset, memoryManager, address, ref useMask, ref hasHostCall, ref hasMemoryInstruction);
if (!block.IsTruncated && TryGetBranchTarget(block, out ulong targetAddress))
{
branchTargets.Add(targetAddress);
}
blocks.Add(block);
if (block.IsTruncated || !HasNextBlock(block, block.EndAddress - 4UL, branchTargets))
{
break;
}
address = block.EndAddress;
}
branchTargets.Sort();
SplitBlocks(blocks, branchTargets);
NumberAndLinkBlocks(blocks);
return new(blocks, useMask, hasHostCall, hasMemoryInstruction);
}
private static bool TryGetBranchTarget(Block block, out ulong targetAddress)
{
return TryGetBranchTarget(block.Instructions[^1].Name, block.EndAddress - 4UL, block.Instructions[^1].Encoding, out targetAddress);
}
private static bool TryGetBranchTarget(InstName name, ulong pc, uint encoding, out ulong targetAddress)
{
int originalOffset;
switch (name)
{
case InstName.BUncond:
originalOffset = ImmUtils.ExtractSImm26Times4(encoding);
targetAddress = pc + (ulong)originalOffset;
return true;
case InstName.BCond:
case InstName.Cbnz:
case InstName.Cbz:
case InstName.Tbnz:
case InstName.Tbz:
if (name == InstName.Tbnz || name == InstName.Tbz)
{
originalOffset = ImmUtils.ExtractSImm14Times4(encoding);
}
else
{
originalOffset = ImmUtils.ExtractSImm19Times4(encoding);
}
targetAddress = pc + (ulong)originalOffset;
return true;
}
targetAddress = 0;
return false;
}
private static void SplitBlocks(List<Block> blocks, List<ulong> branchTargets)
{
int btIndex = 0;
while (btIndex < branchTargets.Count)
{
for (int blockIndex = 0; blockIndex < blocks.Count && btIndex < branchTargets.Count; blockIndex++)
{
Block block = blocks[blockIndex];
ulong currentBranchTarget = branchTargets[btIndex];
while (currentBranchTarget >= block.Address && currentBranchTarget < block.EndAddress)
{
if (block.Address != currentBranchTarget)
{
(Block leftBlock, Block rightBlock) = block.SplitAtAddress(currentBranchTarget);
blocks.Insert(blockIndex, leftBlock);
blocks[blockIndex + 1] = rightBlock;
block = leftBlock;
}
btIndex++;
while (btIndex < branchTargets.Count && branchTargets[btIndex] == currentBranchTarget)
{
btIndex++;
}
if (btIndex >= branchTargets.Count)
{
break;
}
currentBranchTarget = branchTargets[btIndex];
}
}
Debug.Assert(btIndex < int.MaxValue);
btIndex++;
}
}
private static void NumberAndLinkBlocks(List<Block> blocks)
{
Dictionary<ulong, Block> blocksByAddress = new();
for (int blockIndex = 0; blockIndex < blocks.Count; blockIndex++)
{
Block block = blocks[blockIndex];
blocksByAddress.Add(block.Address, block);
}
for (int blockIndex = 0; blockIndex < blocks.Count; blockIndex++)
{
Block block = blocks[blockIndex];
block.Number(blockIndex);
if (!block.IsTruncated)
{
bool hasNext = !block.EndsWithBranch;
bool hasBranch = false;
switch (block.Instructions[^1].Name)
{
case InstName.BUncond:
hasBranch = true;
break;
case InstName.BCond:
case InstName.Cbnz:
case InstName.Cbz:
case InstName.Tbnz:
case InstName.Tbz:
hasNext = true;
hasBranch = true;
break;
case InstName.Bl:
case InstName.Blr:
hasNext = true;
break;
case InstName.Ret:
hasNext = false;
hasBranch = false;
break;
}
if (hasNext && blocksByAddress.TryGetValue(block.EndAddress, out Block nextBlock))
{
block.AddSuccessor(nextBlock);
nextBlock.AddPredecessor(block);
}
if (hasBranch &&
TryGetBranchTarget(block, out ulong targetAddress) &&
blocksByAddress.TryGetValue(targetAddress, out Block branchBlock))
{
block.AddSuccessor(branchBlock);
branchBlock.AddPredecessor(block);
}
}
}
}
private static bool HasNextBlock(in Block block, ulong pc, List<ulong> branchTargets)
{
switch (block.Instructions[^1].Name)
{
case InstName.BUncond:
return branchTargets.Contains(pc + 4UL) ||
(TryGetBranchTarget(block, out ulong targetAddress) && targetAddress >= pc && targetAddress < pc + 0x1000);
case InstName.BCond:
case InstName.Bl:
case InstName.Blr:
case InstName.Cbnz:
case InstName.Cbz:
case InstName.Tbnz:
case InstName.Tbz:
return true;
case InstName.Br:
return false;
case InstName.Ret:
return branchTargets.Contains(pc + 4UL);
}
return !block.EndsWithBranch;
}
private static Block Decode(
CpuPreset cpuPreset,
IMemoryManager memoryManager,
ulong address,
ref RegisterMask useMask,
ref bool hasHostCall,
ref bool hasMemoryInstruction)
{
ulong startAddress = address;
List<InstInfo> insts = new();
uint gprUseMask = useMask.GprMask;
uint fpSimdUseMask = useMask.FpSimdMask;
uint pStateUseMask = useMask.PStateMask;
uint encoding;
InstName name;
InstFlags flags;
bool isControlFlow;
bool isTruncated = false;
do
{
encoding = memoryManager.Read<uint>(address);
address += 4UL;
(name, flags, AddressForm addressForm) = InstTable.GetInstNameAndFlags(encoding, cpuPreset.Version, cpuPreset.Features);
if (name.IsPrivileged())
{
name = InstName.UdfPermUndef;
flags = InstFlags.None;
addressForm = AddressForm.None;
}
(uint instGprReadMask, uint instFpSimdReadMask) = RegisterUtils.PopulateReadMasks(name, flags, encoding);
(uint instGprWriteMask, uint instFpSimdWriteMask) = RegisterUtils.PopulateWriteMasks(name, flags, encoding);
if (name.IsCall())
{
instGprWriteMask |= 1u << RegisterUtils.LrIndex;
}
uint tempGprUseMask = gprUseMask | instGprReadMask | instGprWriteMask;
if (CalculateAvailableTemps(tempGprUseMask) < CalculateRequiredGprTemps(tempGprUseMask) || insts.Count >= MaxInstructionsPerBlock)
{
isTruncated = true;
address -= 4UL;
break;
}
gprUseMask = tempGprUseMask;
uint instPStateReadMask = 0;
uint instPStateWriteMask = 0;
if (flags.HasFlag(InstFlags.Nzcv) || IsMrsNzcv(encoding))
{
instPStateReadMask = NzcvFlags;
}
else if (flags.HasFlag(InstFlags.C))
{
instPStateReadMask = CFlag;
}
if (flags.HasFlag(InstFlags.S) || IsMsrNzcv(encoding))
{
instPStateWriteMask = NzcvFlags;
}
if (flags.HasFlag(InstFlags.Memory) || name == InstName.Sys)
{
hasMemoryInstruction = true;
}
fpSimdUseMask |= instFpSimdReadMask | instFpSimdWriteMask;
pStateUseMask |= instPStateReadMask | instPStateWriteMask;
if (name.IsSystemOrCall() && !hasHostCall)
{
hasHostCall = name.IsCall() || InstEmitSystem.NeedsCall(encoding);
}
isControlFlow = name.IsControlFlowOrException();
RegisterUse registerUse = new(
instGprReadMask,
instGprWriteMask,
instFpSimdReadMask,
instFpSimdWriteMask,
instPStateReadMask,
instPStateWriteMask);
insts.Add(new(encoding, name, flags, addressForm, registerUse));
}
while (!isControlFlow);
bool isLoopEnd = false;
if (!isTruncated && IsBackwardsBranch(name, encoding))
{
hasHostCall = true;
isLoopEnd = true;
}
useMask = new(gprUseMask, fpSimdUseMask, pStateUseMask);
return new(startAddress, address, insts, !isTruncated && !name.IsException(), isTruncated, isLoopEnd);
}
private static bool IsMrsNzcv(uint encoding)
{
return (encoding & ~0x1fu) == 0xd53b4200u;
}
private static bool IsMsrNzcv(uint encoding)
{
return (encoding & ~0x1fu) == 0xd51b4200u;
}
private static bool IsBackwardsBranch(InstName name, uint encoding)
{
switch (name)
{
case InstName.BUncond:
return ImmUtils.ExtractSImm26Times4(encoding) < 0;
case InstName.BCond:
case InstName.Cbnz:
case InstName.Cbz:
case InstName.Tbnz:
case InstName.Tbz:
int imm = name == InstName.Tbnz || name == InstName.Tbz
? ImmUtils.ExtractSImm14Times4(encoding)
: ImmUtils.ExtractSImm19Times4(encoding);
return imm < 0;
}
return false;
}
private static int CalculateRequiredGprTemps(uint gprUseMask)
{
return BitOperations.PopCount(gprUseMask & RegisterUtils.ReservedRegsMask) + RegisterAllocator.MaxTempsInclFixed;
}
private static int CalculateAvailableTemps(uint gprUseMask)
{
return BitOperations.PopCount(~(gprUseMask | RegisterUtils.ReservedRegsMask));
}
}
}

View File

@@ -0,0 +1,593 @@
using ARMeilleure.Memory;
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System;
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64
{
static class InstEmitMemory
{
private const uint XMask = 0x3f808000u;
private const uint XValue = 0x8000000u;
public static void RewriteSysInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, uint encoding)
{
int rtIndex = RegisterUtils.ExtractRt(encoding);
if (rtIndex == RegisterUtils.ZrIndex)
{
writer.WriteInstruction(encoding);
return;
}
int tempRegister = regAlloc.AllocateTempGprRegister();
Operand rt = new(tempRegister, RegisterType.Integer, OperandType.I64);
Operand guestAddress = new(rtIndex, RegisterType.Integer, OperandType.I64);
Assembler asm = new(writer);
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, rt, guestAddress);
encoding = RegisterUtils.ReplaceRt(encoding, tempRegister);
writer.WriteInstruction(encoding);
regAlloc.FreeTempGprRegister(tempRegister);
}
public static void RewriteInstruction(
int asBits,
MemoryManagerType mmType,
CodeWriter writer,
RegisterAllocator regAlloc,
InstName name,
InstFlags flags,
AddressForm addressForm,
ulong pc,
uint encoding)
{
switch (addressForm)
{
case AddressForm.OffsetReg:
RewriteOffsetRegMemoryInstruction(asBits, mmType, writer, regAlloc, flags, encoding);
break;
case AddressForm.PostIndexed:
RewritePostIndexedMemoryInstruction(asBits, mmType, writer, regAlloc, flags, encoding);
break;
case AddressForm.PreIndexed:
RewritePreIndexedMemoryInstruction(asBits, mmType, writer, regAlloc, flags, encoding);
break;
case AddressForm.SignedScaled:
RewriteSignedScaledMemoryInstruction(asBits, mmType, writer, regAlloc, flags, encoding);
break;
case AddressForm.UnsignedScaled:
RewriteUnsignedScaledMemoryInstruction(asBits, mmType, writer, regAlloc, flags, encoding);
break;
case AddressForm.BaseRegister:
// Some applications uses unordered memory instructions in places where
// it does need proper ordering, and only work on some CPUs.
// To work around this, make all exclusive access operations ordered.
if ((encoding & XMask) == XValue)
{
// Set ordered flag.
encoding |= 1u << 15;
}
RewriteBaseRegisterMemoryInstruction(asBits, mmType, writer, regAlloc, encoding);
break;
case AddressForm.StructNoOffset:
RewriteBaseRegisterMemoryInstruction(asBits, mmType, writer, regAlloc, encoding);
break;
case AddressForm.BasePlusOffset:
RewriteBasePlusOffsetMemoryInstruction(asBits, mmType, writer, regAlloc, encoding);
break;
case AddressForm.Literal:
RewriteLiteralMemoryInstruction(asBits, mmType, writer, regAlloc, name, pc, encoding);
break;
case AddressForm.StructPostIndexedReg:
RewriteStructPostIndexedRegMemoryInstruction(asBits, mmType, writer, regAlloc, encoding);
break;
default:
writer.WriteInstruction(encoding);
break;
}
}
private static void RewriteOffsetRegMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, InstFlags flags, uint encoding)
{
// TODO: Some unallocated encoding cases.
ArmExtensionType extensionType = (ArmExtensionType)((encoding >> 13) & 7);
uint size = encoding >> 30;
if (flags.HasFlag(InstFlags.FpSimd))
{
size |= (encoding >> 21) & 4u;
}
int shift = (encoding & (1u << 12)) != 0 ? (int)size : 0;
int tempRegister = regAlloc.AllocateTempGprRegister();
Operand rn = new(tempRegister, RegisterType.Integer, OperandType.I64);
Operand guestAddress = new(RegisterUtils.ExtractRn(encoding), RegisterType.Integer, OperandType.I64);
Operand guestOffset = new(RegisterUtils.ExtractRm(encoding), RegisterType.Integer, OperandType.I64);
Assembler asm = new(writer);
asm.Add(rn, guestAddress, guestOffset, extensionType, shift);
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, rn, rn);
encoding = RegisterUtils.ReplaceRn(encoding, tempRegister);
encoding = (encoding & ~(0xfffu << 10)) | (1u << 24); // Register -> Unsigned offset
writer.WriteInstruction(encoding);
regAlloc.FreeTempGprRegister(tempRegister);
}
private static void RewritePostIndexedMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, InstFlags flags, uint encoding)
{
bool isPair = flags.HasFlag(InstFlags.Rt2);
int imm = isPair ? ExtractSImm7Scaled(flags, encoding) : ExtractSImm9(encoding);
int tempRegister = regAlloc.AllocateTempGprRegister();
Operand rn = new(tempRegister, RegisterType.Integer, OperandType.I64);
Operand guestAddress = new(RegisterUtils.ExtractRn(encoding), RegisterType.Integer, OperandType.I64);
Assembler asm = new(writer);
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, rn, guestAddress);
encoding = RegisterUtils.ReplaceRn(encoding, tempRegister);
if (isPair)
{
// Post-index -> Signed offset
encoding &= ~(0x7fu << 15);
encoding ^= 3u << 23;
}
else
{
// Post-index -> Unsigned offset
encoding = (encoding & ~(0xfffu << 10)) | (1u << 24);
}
writer.WriteInstruction(encoding);
WriteAddConstant(ref asm, guestAddress, guestAddress, imm);
regAlloc.FreeTempGprRegister(tempRegister);
}
private static void RewritePreIndexedMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, InstFlags flags, uint encoding)
{
bool isPair = flags.HasFlag(InstFlags.Rt2);
int imm = isPair ? ExtractSImm7Scaled(flags, encoding) : ExtractSImm9(encoding);
int tempRegister = regAlloc.AllocateTempGprRegister();
Operand rn = new(tempRegister, RegisterType.Integer, OperandType.I64);
Operand guestAddress = new(RegisterUtils.ExtractRn(encoding), RegisterType.Integer, OperandType.I64);
Assembler asm = new(writer);
WriteAddConstant(ref asm, guestAddress, guestAddress, imm);
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, rn, guestAddress);
encoding = RegisterUtils.ReplaceRn(encoding, tempRegister);
if (isPair)
{
// Pre-index -> Signed offset
encoding &= ~(0x7fu << 15);
encoding &= ~(1u << 23);
}
else
{
// Pre-index -> Unsigned offset
encoding = (encoding & ~(0xfffu << 10)) | (1u << 24);
}
writer.WriteInstruction(encoding);
regAlloc.FreeTempGprRegister(tempRegister);
}
private static void RewriteSignedScaledMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, InstFlags flags, uint encoding)
{
RewriteMemoryInstruction(asBits, mmType, writer, regAlloc, encoding, ExtractSImm7Scaled(flags, encoding), 0x7fu << 15);
}
private static void RewriteUnsignedScaledMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, InstFlags flags, uint encoding)
{
RewriteMemoryInstruction(asBits, mmType, writer, regAlloc, encoding, ExtractUImm12Scaled(flags, encoding), 0xfffu << 10);
}
private static void RewriteBaseRegisterMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, uint encoding)
{
RewriteMemoryInstruction(asBits, mmType, writer, regAlloc, encoding, 0, 0u);
}
private static void RewriteBasePlusOffsetMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, uint encoding)
{
RewriteMemoryInstruction(asBits, mmType, writer, regAlloc, encoding, ExtractSImm9(encoding), 0x1ffu << 12);
}
private static void RewriteMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, uint encoding, int imm, uint immMask)
{
int tempRegister = regAlloc.AllocateTempGprRegister();
Operand rn = new(tempRegister, RegisterType.Integer, OperandType.I64);
Operand guestAddress = new(RegisterUtils.ExtractRn(encoding), RegisterType.Integer, OperandType.I64);
Assembler asm = new(writer);
bool canFoldOffset = CanFoldOffset(mmType, imm);
if (canFoldOffset)
{
imm = 0;
}
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, rn, guestAddress, imm);
encoding = RegisterUtils.ReplaceRn(encoding, tempRegister);
if (!canFoldOffset)
{
encoding &= ~immMask; // Clear offset
}
writer.WriteInstruction(encoding);
regAlloc.FreeTempGprRegister(tempRegister);
}
private static void RewriteLiteralMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, InstName name, ulong pc, uint encoding)
{
Assembler asm = new(writer);
ulong targetAddress;
long imm;
int rtIndex = (int)(encoding & 0x1f);
if (rtIndex == RegisterUtils.ZrIndex && name != InstName.PrfmLit)
{
return;
}
Operand rt;
if (name == InstName.LdrLitFpsimd)
{
uint opc = encoding >> 30;
// TODO: Undefined if opc is invalid?
rt = new(rtIndex, RegisterType.Vector, opc switch
{
0 => OperandType.FP32,
1 => OperandType.FP64,
_ => OperandType.V128,
});
}
else
{
rt = new(rtIndex, RegisterType.Integer, OperandType.I64);
}
switch (name)
{
case InstName.Adr:
case InstName.Adrp:
imm = ((long)(encoding >> 29) & 3) | ((long)(encoding >> 3) & 0x1ffffc);
imm <<= 43;
if (name == InstName.Adrp)
{
imm >>= 31;
targetAddress = (pc & ~0xfffUL) + (ulong)imm;
}
else
{
imm >>= 43;
targetAddress = pc + (ulong)imm;
}
asm.Mov(rt, targetAddress);
break;
case InstName.LdrLitGen:
case InstName.LdrswLit:
case InstName.LdrLitFpsimd:
case InstName.PrfmLit:
imm = encoding & ~0x1fu;
imm <<= 40;
imm >>= 43;
targetAddress = pc + (ulong)imm;
int tempRegister = regAlloc.AllocateTempGprRegister();
Operand rn = new(tempRegister, RegisterType.Integer, OperandType.I64);
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, rn, targetAddress);
switch (name)
{
case InstName.LdrLitGen:
case InstName.LdrLitFpsimd:
asm.LdrRiUn(rt, rn, 0);
break;
case InstName.LdrswLit:
asm.LdrswRiUn(rt, rn, 0);
break;
case InstName.PrfmLit:
asm.PrfmR(rt, rn);
break;
}
regAlloc.FreeTempGprRegister(tempRegister);
break;
default:
Debug.Fail($"Invalid literal memory instruction '{name}'.");
break;
}
}
private static void RewriteStructPostIndexedRegMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, uint encoding)
{
// TODO: Some unallocated encoding cases.
int tempRegister = regAlloc.AllocateTempGprRegister();
Operand rn = new(tempRegister, RegisterType.Integer, OperandType.I64);
Operand guestAddress = new(RegisterUtils.ExtractRn(encoding), RegisterType.Integer, OperandType.I64);
int rmIndex = RegisterUtils.ExtractRm(encoding);
Assembler asm = new(writer);
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, rn, guestAddress);
encoding = RegisterUtils.ReplaceRn(encoding, tempRegister);
encoding &= ~((0x1fu << 16) | (1u << 23)); // Post-index -> No offset
writer.WriteInstruction(encoding);
if (rmIndex == RegisterUtils.ZrIndex)
{
bool isSingleStruct = (encoding & (1u << 24)) != 0;
int offset;
if (isSingleStruct)
{
int sElems = (int)(((encoding >> 12) & 2u) | ((encoding >> 21) & 1u)) + 1;
int size = (int)(encoding >> 10) & 3;
int s = (int)(encoding >> 12) & 1;
int scale = (int)(encoding >> 14) & 3;
int l = (int)(encoding >> 22) & 1;
switch (scale)
{
case 1:
if ((size & 1) != 0)
{
// Undef.
}
break;
case 2:
if ((size & 2) != 0 ||
((size & 1) != 0 && s != 0))
{
// Undef.
}
if ((size & 1) != 0)
{
scale = 3;
}
break;
case 3:
if (l == 0 || s != 0)
{
// Undef.
}
scale = size;
break;
}
int eBytes = 1 << scale;
offset = eBytes * sElems;
}
else
{
int reps;
int sElems;
switch ((encoding >> 12) & 0xf)
{
case 0b0000:
reps = 1;
sElems = 4;
break;
case 0b0010:
reps = 4;
sElems = 1;
break;
case 0b0100:
reps = 1;
sElems = 3;
break;
case 0b0110:
reps = 3;
sElems = 1;
break;
case 0b0111:
reps = 1;
sElems = 1;
break;
case 0b1000:
reps = 1;
sElems = 2;
break;
case 0b1010:
reps = 2;
sElems = 1;
break;
default:
// Undef.
reps = 0;
sElems = 0;
break;
}
int size = (int)(encoding >> 10) & 3;
bool q = (encoding & (1u << 30)) != 0;
if (!q && size == 3 && sElems != 1)
{
// Undef.
}
offset = reps * (q ? 16 : 8) * sElems;
}
asm.Add(guestAddress, guestAddress, new Operand(OperandKind.Constant, OperandType.I32, (ulong)offset));
}
else
{
Operand guestOffset = new(rmIndex, RegisterType.Integer, OperandType.I64);
asm.Add(guestAddress, guestAddress, guestOffset);
}
regAlloc.FreeTempGprRegister(tempRegister);
}
private static void WriteAddressTranslation(
int asBits,
MemoryManagerType mmType,
RegisterAllocator regAlloc,
ref Assembler asm,
Operand destination,
Operand guestAddress,
int offset)
{
if (offset != 0)
{
// They are assumed to be on different registers, otherwise this operation will thrash the address.
Debug.Assert(destination.Value != guestAddress.Value);
if (Math.Abs(offset) >= 0x1000)
{
// Too high to encode as 12-bit immediate, do a separate move.
asm.Mov(destination, (ulong)offset);
asm.Add(destination, destination, guestAddress);
}
else
{
// Encode as 12-bit immediate.
WriteAddConstant(ref asm, destination, guestAddress, offset);
}
guestAddress = destination;
}
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, destination, guestAddress);
}
private static void WriteAddressTranslation(int asBits, MemoryManagerType mmType, RegisterAllocator regAlloc, ref Assembler asm, Operand destination, ulong guestAddress)
{
asm.Mov(destination, guestAddress);
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, destination, destination);
}
private static void WriteAddressTranslation(int asBits, MemoryManagerType mmType, RegisterAllocator regAlloc, ref Assembler asm, Operand destination, Operand guestAddress)
{
Operand basePointer = new(regAlloc.FixedPageTableRegister, RegisterType.Integer, OperandType.I64);
if (mmType == MemoryManagerType.HostMapped || mmType == MemoryManagerType.HostMappedUnsafe)
{
if (mmType == MemoryManagerType.HostMapped)
{
asm.And(destination, guestAddress, new Operand(OperandKind.Constant, OperandType.I64, ulong.MaxValue >> (64 - asBits)));
guestAddress = destination;
}
asm.Add(destination, basePointer, guestAddress);
}
else
{
throw new NotImplementedException(mmType.ToString());
}
}
private static void WriteAddConstant(ref Assembler asm, Operand rd, Operand rn, int value)
{
if (value < 0)
{
asm.Sub(rd, rn, new Operand(OperandKind.Constant, OperandType.I32, (ulong)-value));
}
else
{
asm.Add(rd, rn, new Operand(OperandKind.Constant, OperandType.I32, (ulong)value));
}
}
private static bool CanFoldOffset(MemoryManagerType mmType, int offset)
{
return mmType == MemoryManagerType.HostMappedUnsafe;
}
private static int ExtractSImm7Scaled(InstFlags flags, uint encoding)
{
uint opc = flags.HasFlag(InstFlags.FpSimd) ? encoding >> 30 : encoding >> 31;
return ExtractSImm7(encoding) << (int)(2 + opc);
}
private static int ExtractSImm7(uint encoding)
{
int imm = (int)(encoding >> 15);
imm <<= 25;
imm >>= 25;
return imm;
}
private static int ExtractSImm9(uint encoding)
{
int imm = (int)(encoding >> 12);
imm <<= 23;
imm >>= 23;
return imm;
}
private static int ExtractUImm12Scaled(InstFlags flags, uint encoding)
{
uint size = encoding >> 30;
if (flags.HasFlag(InstFlags.FpSimd))
{
size |= (encoding >> 21) & 4u;
}
return ExtractUImm12(encoding) << (int)size;
}
private static int ExtractUImm12(uint encoding)
{
return (int)(encoding >> 10) & 0xfff;
}
}
}

View File

@@ -0,0 +1,610 @@
using ARMeilleure.Common;
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64
{
static class InstEmitSystem
{
private delegate void SoftwareInterruptHandler(ulong address, int imm);
private delegate ulong Get64();
private delegate bool GetBool();
public static void RewriteInstruction(
CodeWriter writer,
RegisterAllocator regAlloc,
TailMerger tailMerger,
InstName name,
ulong pc,
uint encoding,
int spillBaseOffset)
{
if (name == InstName.Brk)
{
Assembler asm = new(writer);
WriteCall(ref asm, regAlloc, GetBrkHandlerPtr(), spillBaseOffset, null, pc, encoding);
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, spillBaseOffset);
}
else if (name == InstName.Svc)
{
uint svcId = (ushort)(encoding >> 5);
Assembler asm = new(writer);
WriteCall(ref asm, regAlloc, GetSvcHandlerPtr(), spillBaseOffset, null, pc, svcId);
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, spillBaseOffset);
}
else if (name == InstName.UdfPermUndef)
{
Assembler asm = new(writer);
WriteCall(ref asm, regAlloc, GetUdfHandlerPtr(), spillBaseOffset, null, pc, encoding);
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, spillBaseOffset);
}
else if ((encoding & ~0x1f) == 0xd53bd060) // mrs x0, tpidrro_el0
{
uint rd = encoding & 0x1f;
if (rd != RegisterUtils.ZrIndex)
{
Assembler asm = new(writer);
asm.LdrRiUn(Register((int)rd), Register(regAlloc.FixedContextRegister), NativeContextOffsets.TpidrroEl0Offset);
}
}
else if ((encoding & ~0x1f) == 0xd53bd040) // mrs x0, tpidr_el0
{
uint rd = encoding & 0x1f;
if (rd != RegisterUtils.ZrIndex)
{
Assembler asm = new(writer);
asm.LdrRiUn(Register((int)rd), Register(regAlloc.FixedContextRegister), NativeContextOffsets.TpidrEl0Offset);
}
}
else if ((encoding & ~0x1f) == 0xd53b0020 && IsAppleOS()) // mrs x0, ctr_el0
{
uint rd = encoding & 0x1f;
if (rd != RegisterUtils.ZrIndex)
{
Assembler asm = new(writer);
// TODO: Use host value? But that register can't be accessed on macOS...
asm.Mov(Register((int)rd, OperandType.I32), 0x8444c004);
}
}
else if ((encoding & ~0x1f) == 0xd53be020) // mrs x0, cntpct_el0
{
uint rd = encoding & 0x1f;
if (rd != RegisterUtils.ZrIndex)
{
Assembler asm = new(writer);
WriteCall(ref asm, regAlloc, GetCntpctEl0Ptr(), spillBaseOffset, (int)rd);
}
}
else if ((encoding & ~0x1f) == 0xd51bd040) // msr tpidr_el0, x0
{
uint rd = encoding & 0x1f;
if (rd != RegisterUtils.ZrIndex)
{
Assembler asm = new(writer);
asm.StrRiUn(Register((int)rd), Register(regAlloc.FixedContextRegister), NativeContextOffsets.TpidrEl0Offset);
}
}
else
{
writer.WriteInstruction(encoding);
}
}
public static bool NeedsCall(uint encoding)
{
if ((encoding & ~(0xffffu << 5)) == 0xd4000001u) // svc #0
{
return true;
}
else if ((encoding & ~0x1f) == 0xd53b0020 && IsAppleOS()) // mrs x0, ctr_el0
{
return true;
}
else if ((encoding & ~0x1f) == 0xd53be020) // mrs x0, cntpct_el0
{
return true;
}
return false;
}
private static bool IsAppleOS()
{
return OperatingSystem.IsMacOS() || OperatingSystem.IsIOS();
}
public static bool NeedsContextStoreLoad(InstName name)
{
return name == InstName.Svc;
}
private static IntPtr GetBrkHandlerPtr()
{
return Marshal.GetFunctionPointerForDelegate<SoftwareInterruptHandler>(NativeInterface.Break);
}
private static IntPtr GetSvcHandlerPtr()
{
return Marshal.GetFunctionPointerForDelegate<SoftwareInterruptHandler>(NativeInterface.SupervisorCall);
}
private static IntPtr GetUdfHandlerPtr()
{
return Marshal.GetFunctionPointerForDelegate<SoftwareInterruptHandler>(NativeInterface.Undefined);
}
private static IntPtr GetCntpctEl0Ptr()
{
return Marshal.GetFunctionPointerForDelegate<Get64>(NativeInterface.GetCntpctEl0);
}
private static IntPtr CheckSynchronizationPtr()
{
return Marshal.GetFunctionPointerForDelegate<GetBool>(NativeInterface.CheckSynchronization);
}
public static void WriteSyncPoint(CodeWriter writer, RegisterAllocator regAlloc, TailMerger tailMerger, int spillBaseOffset)
{
Assembler asm = new(writer);
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, spillBaseOffset);
}
private static void WriteSyncPoint(CodeWriter writer, ref Assembler asm, RegisterAllocator regAlloc, TailMerger tailMerger, int spillBaseOffset)
{
int tempRegister = regAlloc.AllocateTempGprRegister();
Operand rt = Register(tempRegister, OperandType.I32);
asm.LdrRiUn(rt, Register(regAlloc.FixedContextRegister), NativeContextOffsets.CounterOffset);
int branchIndex = writer.InstructionPointer;
asm.Cbnz(rt, 0);
WriteSpill(ref asm, regAlloc, 1u << tempRegister, spillBaseOffset, tempRegister);
Operand rn = Register(tempRegister == 0 ? 1 : 0);
asm.Mov(rn, (ulong)CheckSynchronizationPtr());
asm.Blr(rn);
tailMerger.AddConditionalZeroReturn(writer, asm, Register(0, OperandType.I32));
WriteFill(ref asm, regAlloc, 1u << tempRegister, spillBaseOffset, tempRegister);
asm.LdrRiUn(rt, Register(regAlloc.FixedContextRegister), NativeContextOffsets.CounterOffset);
uint branchInst = writer.ReadInstructionAt(branchIndex);
writer.WriteInstructionAt(branchIndex, branchInst | (((uint)(writer.InstructionPointer - branchIndex) & 0x7ffff) << 5));
asm.Sub(rt, rt, new Operand(OperandKind.Constant, OperandType.I32, 1));
asm.StrRiUn(rt, Register(regAlloc.FixedContextRegister), NativeContextOffsets.CounterOffset);
regAlloc.FreeTempGprRegister(tempRegister);
}
public static void RewriteCallInstruction(
CodeWriter writer,
RegisterAllocator regAlloc,
TailMerger tailMerger,
Action writeEpilogue,
AddressTable<ulong> funcTable,
IntPtr dispatchStubPtr,
InstName name,
ulong pc,
uint encoding,
int spillBaseOffset,
bool isTail = false)
{
Assembler asm = new(writer);
switch (name)
{
case InstName.BUncond:
case InstName.Bl:
case InstName.Blr:
case InstName.Br:
if (name == InstName.BUncond || name == InstName.Bl)
{
int imm = ImmUtils.ExtractSImm26Times4(encoding);
WriteCallWithGuestAddress(
writer,
ref asm,
regAlloc,
tailMerger,
writeEpilogue,
funcTable,
dispatchStubPtr,
spillBaseOffset,
pc,
new(OperandKind.Constant, OperandType.I64, pc + (ulong)imm),
isTail);
}
else
{
int rnIndex = RegisterUtils.ExtractRn(encoding);
if (rnIndex == RegisterUtils.ZrIndex)
{
WriteCallWithGuestAddress(
writer,
ref asm,
regAlloc,
tailMerger,
writeEpilogue,
funcTable,
dispatchStubPtr,
spillBaseOffset,
pc,
new(OperandKind.Constant, OperandType.I64, 0UL),
isTail);
}
else
{
rnIndex = regAlloc.RemapReservedGprRegister(rnIndex);
WriteCallWithGuestAddress(
writer,
ref asm,
regAlloc,
tailMerger,
writeEpilogue,
funcTable,
dispatchStubPtr,
spillBaseOffset,
pc,
Register(rnIndex),
isTail);
}
}
break;
default:
Debug.Fail($"Unknown branch instruction \"{name}\".");
break;
}
}
public unsafe static void WriteCallWithGuestAddress(
CodeWriter writer,
ref Assembler asm,
RegisterAllocator regAlloc,
TailMerger tailMerger,
Action writeEpilogue,
AddressTable<ulong> funcTable,
IntPtr funcPtr,
int spillBaseOffset,
ulong pc,
Operand guestAddress,
bool isTail = false)
{
int tempRegister;
if (guestAddress.Kind == OperandKind.Constant)
{
tempRegister = regAlloc.AllocateTempGprRegister();
asm.Mov(Register(tempRegister), guestAddress.Value);
asm.StrRiUn(Register(tempRegister), Register(regAlloc.FixedContextRegister), NativeContextOffsets.DispatchAddressOffset);
regAlloc.FreeTempGprRegister(tempRegister);
}
else
{
asm.StrRiUn(guestAddress, Register(regAlloc.FixedContextRegister), NativeContextOffsets.DispatchAddressOffset);
}
tempRegister = regAlloc.FixedContextRegister == 1 ? 2 : 1;
if (!isTail)
{
WriteSpillSkipContext(ref asm, regAlloc, spillBaseOffset);
}
Operand rn = Register(tempRegister);
if (regAlloc.FixedContextRegister != 0)
{
asm.Mov(Register(0), Register(regAlloc.FixedContextRegister));
}
if (guestAddress.Kind == OperandKind.Constant && funcTable != null)
{
ulong funcPtrLoc = (ulong)Unsafe.AsPointer(ref funcTable.GetValue(guestAddress.Value));
asm.Mov(rn, funcPtrLoc & ~0xfffUL);
asm.LdrRiUn(rn, rn, (int)(funcPtrLoc & 0xfffUL));
}
else
{
asm.Mov(rn, (ulong)funcPtr);
}
if (isTail)
{
writeEpilogue();
asm.Br(rn);
}
else
{
asm.Blr(rn);
ulong nextAddress = pc + 4UL;
asm.Mov(rn, nextAddress);
asm.Cmp(Register(0), rn);
tailMerger.AddConditionalReturn(writer, asm, ArmCondition.Ne);
WriteFillSkipContext(ref asm, regAlloc, spillBaseOffset);
}
}
private static void WriteCall(
ref Assembler asm,
RegisterAllocator regAlloc,
IntPtr funcPtr,
int spillBaseOffset,
int? resultRegister,
params ulong[] callArgs)
{
uint resultMask = 0u;
if (resultRegister.HasValue)
{
resultMask = 1u << resultRegister.Value;
}
int tempRegister = callArgs.Length;
if (resultRegister.HasValue && tempRegister == resultRegister.Value)
{
tempRegister++;
}
WriteSpill(ref asm, regAlloc, resultMask, spillBaseOffset, tempRegister);
// We only support up to 7 arguments right now.
// ABI defines the first 8 integer arguments to be passed on registers X0-X7.
// We need at least one register to put the function address on, so that reduces the number of
// registers we can use for that by one.
Debug.Assert(callArgs.Length < 8);
for (int index = 0; index < callArgs.Length; index++)
{
asm.Mov(Register(index), callArgs[index]);
}
Operand rn = Register(tempRegister);
asm.Mov(rn, (ulong)funcPtr);
asm.Blr(rn);
if (resultRegister.HasValue && resultRegister.Value != 0)
{
asm.Mov(Register(resultRegister.Value), Register(0));
}
WriteFill(ref asm, regAlloc, resultMask, spillBaseOffset, tempRegister);
}
private static void WriteSpill(ref Assembler asm, RegisterAllocator regAlloc, uint exceptMask, int spillOffset, int tempRegister)
{
WriteSpillOrFill(ref asm, regAlloc, exceptMask, spillOffset, tempRegister, spill: true);
}
private static void WriteFill(ref Assembler asm, RegisterAllocator regAlloc, uint exceptMask, int spillOffset, int tempRegister)
{
WriteSpillOrFill(ref asm, regAlloc, exceptMask, spillOffset, tempRegister, spill: false);
}
private static void WriteSpillOrFill(
ref Assembler asm,
RegisterAllocator regAlloc,
uint exceptMask,
int spillOffset,
int tempRegister,
bool spill)
{
uint gprMask = regAlloc.AllGprMask & ~(AbiConstants.GprCalleeSavedRegsMask | exceptMask);
if (regAlloc.AllPStateMask != 0 && !spill)
{
// We must reload the status register before reloading the GPRs,
// since we might otherwise trash one of them by using it as temp register.
Operand rt = Register(tempRegister, OperandType.I32);
asm.LdrRiUn(rt, Register(RegisterUtils.SpIndex), spillOffset + BitOperations.PopCount(gprMask) * 8);
asm.MsrNzcv(rt);
}
while (gprMask != 0)
{
int reg = BitOperations.TrailingZeroCount(gprMask);
if (reg < 31 && (gprMask & (2u << reg)) != 0 && spillOffset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
{
if (spill)
{
asm.StpRiUn(
Register(regAlloc.RemapReservedGprRegister(reg)),
Register(regAlloc.RemapReservedGprRegister(reg + 1)),
Register(RegisterUtils.SpIndex),
spillOffset);
}
else
{
asm.LdpRiUn(
Register(regAlloc.RemapReservedGprRegister(reg)),
Register(regAlloc.RemapReservedGprRegister(reg + 1)),
Register(RegisterUtils.SpIndex),
spillOffset);
}
gprMask &= ~(3u << reg);
spillOffset += 16;
}
else
{
if (spill)
{
asm.StrRiUn(Register(regAlloc.RemapReservedGprRegister(reg)), Register(RegisterUtils.SpIndex), spillOffset);
}
else
{
asm.LdrRiUn(Register(regAlloc.RemapReservedGprRegister(reg)), Register(RegisterUtils.SpIndex), spillOffset);
}
gprMask &= ~(1u << reg);
spillOffset += 8;
}
}
if (regAlloc.AllPStateMask != 0)
{
if (spill)
{
Operand rt = Register(tempRegister, OperandType.I32);
asm.MrsNzcv(rt);
asm.StrRiUn(rt, Register(RegisterUtils.SpIndex), spillOffset);
}
spillOffset += 8;
}
if ((spillOffset & 8) != 0)
{
spillOffset += 8;
}
uint fpSimdMask = regAlloc.AllFpSimdMask;
while (fpSimdMask != 0)
{
int reg = BitOperations.TrailingZeroCount(fpSimdMask);
if (reg < 31 && (fpSimdMask & (2u << reg)) != 0 && spillOffset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
{
if (spill)
{
asm.StpRiUn(
Register(reg, OperandType.V128),
Register(reg + 1, OperandType.V128),
Register(RegisterUtils.SpIndex),
spillOffset);
}
else
{
asm.LdpRiUn(
Register(reg, OperandType.V128),
Register(reg + 1, OperandType.V128),
Register(RegisterUtils.SpIndex),
spillOffset);
}
fpSimdMask &= ~(3u << reg);
spillOffset += 32;
}
else
{
if (spill)
{
asm.StrRiUn(Register(reg, OperandType.V128), Register(RegisterUtils.SpIndex), spillOffset);
}
else
{
asm.LdrRiUn(Register(reg, OperandType.V128), Register(RegisterUtils.SpIndex), spillOffset);
}
fpSimdMask &= ~(1u << reg);
spillOffset += 16;
}
}
}
private static void WriteSpillSkipContext(ref Assembler asm, RegisterAllocator regAlloc, int spillOffset)
{
WriteSpillOrFillSkipContext(ref asm, regAlloc, spillOffset, spill: true);
}
private static void WriteFillSkipContext(ref Assembler asm, RegisterAllocator regAlloc, int spillOffset)
{
WriteSpillOrFillSkipContext(ref asm, regAlloc, spillOffset, spill: false);
}
private static void WriteSpillOrFillSkipContext(ref Assembler asm, RegisterAllocator regAlloc, int spillOffset, bool spill)
{
uint gprMask = regAlloc.AllGprMask & ((1u << regAlloc.FixedContextRegister) | (1u << regAlloc.FixedPageTableRegister));
gprMask &= ~AbiConstants.GprCalleeSavedRegsMask;
while (gprMask != 0)
{
int reg = BitOperations.TrailingZeroCount(gprMask);
if (reg < 31 && (gprMask & (2u << reg)) != 0 && spillOffset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
{
if (spill)
{
asm.StpRiUn(
Register(regAlloc.RemapReservedGprRegister(reg)),
Register(regAlloc.RemapReservedGprRegister(reg + 1)),
Register(RegisterUtils.SpIndex),
spillOffset);
}
else
{
asm.LdpRiUn(
Register(regAlloc.RemapReservedGprRegister(reg)),
Register(regAlloc.RemapReservedGprRegister(reg + 1)),
Register(RegisterUtils.SpIndex),
spillOffset);
}
gprMask &= ~(3u << reg);
spillOffset += 16;
}
else
{
if (spill)
{
asm.StrRiUn(Register(regAlloc.RemapReservedGprRegister(reg)), Register(RegisterUtils.SpIndex), spillOffset);
}
else
{
asm.LdrRiUn(Register(regAlloc.RemapReservedGprRegister(reg)), Register(RegisterUtils.SpIndex), spillOffset);
}
gprMask &= ~(1u << reg);
spillOffset += 8;
}
}
}
private static Operand Register(int register, OperandType type = OperandType.I64)
{
return new Operand(register, RegisterType.Integer, type);
}
}
}

File diff suppressed because it is too large Load Diff