Implement a new JIT for Arm devices (#6057)

* Implement a new JIT for Arm devices

* Auto-format

* Make a lot of Assembler members read-only

* More read-only

* Fix more warnings

* ObjectDisposedException.ThrowIf

* New JIT cache for platforms that enforce W^X, currently unused

* Remove unused using

* Fix assert

* Pass memory manager type around

* Safe memory manager mode support + other improvements

* Actual safe memory manager mode masking support

* PR feedback
This commit is contained in:
gdkchan
2024-01-20 11:11:28 -03:00
committed by GitHub
parent 331c07807f
commit 427b7d06b5
135 changed files with 43322 additions and 24 deletions

View File

@@ -0,0 +1,789 @@
using ARMeilleure.Common;
using ARMeilleure.Memory;
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Numerics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class Compiler
{
public const uint UsableGprsMask = 0x7fff;
public const uint UsableFpSimdMask = 0xffff;
public const uint UsablePStateMask = 0xf0000000;
private const int Encodable26BitsOffsetLimit = 0x2000000;
private readonly struct Context
{
public readonly CodeWriter Writer;
public readonly RegisterAllocator RegisterAllocator;
public readonly MemoryManagerType MemoryManagerType;
public readonly TailMerger TailMerger;
public readonly AddressTable<ulong> FuncTable;
public readonly IntPtr DispatchStubPointer;
private readonly RegisterSaveRestore _registerSaveRestore;
private readonly IntPtr _pageTablePointer;
public Context(
CodeWriter writer,
RegisterAllocator registerAllocator,
MemoryManagerType mmType,
TailMerger tailMerger,
AddressTable<ulong> funcTable,
RegisterSaveRestore registerSaveRestore,
IntPtr dispatchStubPointer,
IntPtr pageTablePointer)
{
Writer = writer;
RegisterAllocator = registerAllocator;
MemoryManagerType = mmType;
TailMerger = tailMerger;
FuncTable = funcTable;
_registerSaveRestore = registerSaveRestore;
DispatchStubPointer = dispatchStubPointer;
_pageTablePointer = pageTablePointer;
}
public readonly int GetReservedStackOffset()
{
return _registerSaveRestore.GetReservedStackOffset();
}
public readonly void WritePrologueAt(int instructionPointer)
{
CodeWriter writer = new();
Assembler asm = new(writer);
_registerSaveRestore.WritePrologue(ref asm);
// If needed, set up the fixed registers with the pointers we will use.
// First one is the context pointer (passed as first argument),
// second one is the page table or address space base, it is at a fixed memory location and considered constant.
if (RegisterAllocator.FixedContextRegister != 0)
{
asm.Mov(Register(RegisterAllocator.FixedContextRegister), Register(0));
}
asm.Mov(Register(RegisterAllocator.FixedPageTableRegister), (ulong)_pageTablePointer);
LoadFromContext(ref asm);
// Write the prologue at the specified position in our writer.
Writer.WriteInstructionsAt(instructionPointer, writer);
}
public readonly void WriteEpilogueWithoutContext()
{
Assembler asm = new(Writer);
_registerSaveRestore.WriteEpilogue(ref asm);
}
public void LoadFromContext()
{
Assembler asm = new(Writer);
LoadFromContext(ref asm);
}
private void LoadFromContext(ref Assembler asm)
{
LoadGprFromContext(ref asm, RegisterAllocator.UsedGprsMask & UsableGprsMask, NativeContextOffsets.GprBaseOffset);
LoadFpSimdFromContext(ref asm, RegisterAllocator.UsedFpSimdMask & UsableFpSimdMask, NativeContextOffsets.FpSimdBaseOffset);
LoadPStateFromContext(ref asm, UsablePStateMask, NativeContextOffsets.FlagsBaseOffset);
}
public void StoreToContext()
{
Assembler asm = new(Writer);
StoreToContext(ref asm);
}
private void StoreToContext(ref Assembler asm)
{
StoreGprToContext(ref asm, RegisterAllocator.UsedGprsMask & UsableGprsMask, NativeContextOffsets.GprBaseOffset);
StoreFpSimdToContext(ref asm, RegisterAllocator.UsedFpSimdMask & UsableFpSimdMask, NativeContextOffsets.FpSimdBaseOffset);
StorePStateToContext(ref asm, UsablePStateMask, NativeContextOffsets.FlagsBaseOffset);
}
private void LoadGprFromContext(ref Assembler asm, uint mask, int baseOffset)
{
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
while (mask != 0)
{
int reg = BitOperations.TrailingZeroCount(mask);
int offset = baseOffset + reg * 8;
if (reg < 31 && (mask & (2u << reg)) != 0 && offset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
{
mask &= ~(3u << reg);
asm.LdpRiUn(Register(reg), Register(reg + 1), contextPtr, offset);
}
else
{
mask &= ~(1u << reg);
asm.LdrRiUn(Register(reg), contextPtr, offset);
}
}
}
private void LoadFpSimdFromContext(ref Assembler asm, uint mask, int baseOffset)
{
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
while (mask != 0)
{
int reg = BitOperations.TrailingZeroCount(mask);
int offset = baseOffset + reg * 16;
mask &= ~(1u << reg);
asm.LdrRiUn(Register(reg, OperandType.V128), contextPtr, offset);
}
}
private void LoadPStateFromContext(ref Assembler asm, uint mask, int baseOffset)
{
if (mask == 0)
{
return;
}
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = RegisterAllocator.AllocateTempGprRegisterScoped();
asm.LdrRiUn(tempRegister.Operand, contextPtr, baseOffset);
asm.MsrNzcv(tempRegister.Operand);
}
private void StoreGprToContext(ref Assembler asm, uint mask, int baseOffset)
{
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
while (mask != 0)
{
int reg = BitOperations.TrailingZeroCount(mask);
int offset = baseOffset + reg * 8;
if (reg < 31 && (mask & (2u << reg)) != 0 && offset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
{
mask &= ~(3u << reg);
asm.StpRiUn(Register(reg), Register(reg + 1), contextPtr, offset);
}
else
{
mask &= ~(1u << reg);
asm.StrRiUn(Register(reg), contextPtr, offset);
}
}
}
private void StoreFpSimdToContext(ref Assembler asm, uint mask, int baseOffset)
{
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
while (mask != 0)
{
int reg = BitOperations.TrailingZeroCount(mask);
int offset = baseOffset + reg * 16;
mask &= ~(1u << reg);
asm.StrRiUn(Register(reg, OperandType.V128), contextPtr, offset);
}
}
private void StorePStateToContext(ref Assembler asm, uint mask, int baseOffset)
{
if (mask == 0)
{
return;
}
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempRegister2 = RegisterAllocator.AllocateTempGprRegisterScoped();
asm.LdrRiUn(tempRegister.Operand, contextPtr, baseOffset);
asm.MrsNzcv(tempRegister2.Operand);
asm.And(tempRegister.Operand, tempRegister.Operand, InstEmitCommon.Const(0xfffffff));
asm.Orr(tempRegister.Operand, tempRegister.Operand, tempRegister2.Operand);
asm.StrRiUn(tempRegister.Operand, contextPtr, baseOffset);
}
}
public static CompiledFunction Compile(CpuPreset cpuPreset, IMemoryManager memoryManager, ulong address, AddressTable<ulong> funcTable, IntPtr dispatchStubPtr, bool isThumb)
{
MultiBlock multiBlock = Decoder<InstEmit>.DecodeMulti(cpuPreset, memoryManager, address, isThumb);
Dictionary<ulong, int> targets = new();
CodeWriter writer = new();
RegisterAllocator regAlloc = new();
Assembler asm = new(writer);
CodeGenContext cgContext = new(writer, asm, regAlloc, memoryManager.Type, isThumb);
ArmCondition lastCondition = ArmCondition.Al;
int lastConditionIp = 0;
// Required for load/store to context.
regAlloc.EnsureTempGprRegisters(2);
ulong pc = address;
for (int blockIndex = 0; blockIndex < multiBlock.Blocks.Count; blockIndex++)
{
Block block = multiBlock.Blocks[blockIndex];
Debug.Assert(block.Address == pc);
targets.Add(pc, writer.InstructionPointer);
for (int index = 0; index < block.Instructions.Count; index++)
{
InstInfo instInfo = block.Instructions[index];
if (index < block.Instructions.Count - 1)
{
cgContext.SetNextInstruction(block.Instructions[index + 1]);
}
else
{
cgContext.SetNextInstruction(default);
}
SetConditionalStart(cgContext, ref lastCondition, ref lastConditionIp, instInfo.Name, instInfo.Flags, instInfo.Encoding);
if (block.IsLoopEnd && index == block.Instructions.Count - 1)
{
// If this is a loop, the code might run for a long time uninterrupted.
// We insert a "sync point" here to ensure the loop can be interrupted if needed.
cgContext.AddPendingSyncPoint();
asm.B(0);
}
cgContext.SetPc((uint)pc);
instInfo.EmitFunc(cgContext, instInfo.Encoding);
if (cgContext.ConsumeNzcvModified())
{
ForceConditionalEnd(cgContext, ref lastCondition, lastConditionIp);
}
cgContext.UpdateItState();
pc += instInfo.Flags.HasFlag(InstFlags.Thumb16) ? 2UL : 4UL;
}
if (Decoder<InstEmit>.WritesToPC(block.Instructions[^1].Encoding, block.Instructions[^1].Name, block.Instructions[^1].Flags, block.IsThumb))
{
// If the block ends with a PC register write, then we have a branch from register.
InstEmitCommon.SetThumbFlag(cgContext, regAlloc.RemapGprRegister(RegisterUtils.PcRegister));
cgContext.AddPendingIndirectBranch(block.Instructions[^1].Name, RegisterUtils.PcRegister);
asm.B(0);
}
ForceConditionalEnd(cgContext, ref lastCondition, lastConditionIp);
}
RegisterSaveRestore rsr = new(
regAlloc.UsedGprsMask & AbiConstants.GprCalleeSavedRegsMask,
regAlloc.UsedFpSimdMask & AbiConstants.FpSimdCalleeSavedRegsMask,
OperandType.FP64,
multiBlock.HasHostCall,
multiBlock.HasHostCall ? CalculateStackSizeForCallSpill(regAlloc.UsedGprsMask, regAlloc.UsedFpSimdMask, UsablePStateMask) : 0);
TailMerger tailMerger = new();
Context context = new(writer, regAlloc, memoryManager.Type, tailMerger, funcTable, rsr, dispatchStubPtr, memoryManager.PageTablePointer);
InstInfo lastInstruction = multiBlock.Blocks[^1].Instructions[^1];
bool lastInstIsConditional = GetCondition(lastInstruction, isThumb) != ArmCondition.Al;
if (multiBlock.IsTruncated || lastInstIsConditional || lastInstruction.Name.IsCall() || IsConditionalBranch(lastInstruction))
{
WriteTailCallConstant(context, ref asm, (uint)pc);
}
IEnumerable<PendingBranch> pendingBranches = cgContext.GetPendingBranches();
foreach (PendingBranch pendingBranch in pendingBranches)
{
RewriteBranchInstructionWithTarget(context, pendingBranch, targets);
}
tailMerger.WriteReturn(writer, context.WriteEpilogueWithoutContext);
context.WritePrologueAt(0);
return new(writer.AsByteSpan(), (int)(pc - address));
}
private static int CalculateStackSizeForCallSpill(uint gprUseMask, uint fpSimdUseMask, uint pStateUseMask)
{
// Note that we don't discard callee saved FP/SIMD register because only the lower 64 bits is callee saved,
// so if the function is using the full register, that won't be enough.
// We could do better, but it's likely not worth it since this case happens very rarely in practice.
return BitOperations.PopCount(gprUseMask & ~AbiConstants.GprCalleeSavedRegsMask) * 8 +
BitOperations.PopCount(fpSimdUseMask) * 16 +
(pStateUseMask != 0 ? 8 : 0);
}
private static void SetConditionalStart(
CodeGenContext context,
ref ArmCondition condition,
ref int instructionPointer,
InstName name,
InstFlags flags,
uint encoding)
{
if (!context.ConsumeItCondition(out ArmCondition currentCond))
{
currentCond = GetCondition(name, flags, encoding, context.IsThumb);
}
if (currentCond != condition)
{
WriteConditionalEnd(context, condition, instructionPointer);
condition = currentCond;
if (currentCond != ArmCondition.Al)
{
instructionPointer = context.CodeWriter.InstructionPointer;
context.Arm64Assembler.B(currentCond.Invert(), 0);
}
}
}
private static bool IsConditionalBranch(in InstInfo instInfo)
{
return instInfo.Name == InstName.B && (ArmCondition)(instInfo.Encoding >> 28) != ArmCondition.Al;
}
private static ArmCondition GetCondition(in InstInfo instInfo, bool isThumb)
{
return GetCondition(instInfo.Name, instInfo.Flags, instInfo.Encoding, isThumb);
}
private static ArmCondition GetCondition(InstName name, InstFlags flags, uint encoding, bool isThumb)
{
// For branch, we handle conditional execution on the instruction itself.
bool hasCond = flags.HasFlag(InstFlags.Cond) && !CanHandleConditionalInstruction(name, encoding, isThumb);
return hasCond ? (ArmCondition)(encoding >> 28) : ArmCondition.Al;
}
private static bool CanHandleConditionalInstruction(InstName name, uint encoding, bool isThumb)
{
if (name == InstName.B)
{
return true;
}
// We can use CSEL for conditional MOV from registers, as long the instruction is not setting flags.
// We don't handle thumb right now because the condition comes from the IT block which would be more complicated to handle.
if (name == InstName.MovR && !isThumb && (encoding & (1u << 20)) == 0)
{
return true;
}
return false;
}
private static void ForceConditionalEnd(CodeGenContext context, ref ArmCondition condition, int instructionPointer)
{
WriteConditionalEnd(context, condition, instructionPointer);
condition = ArmCondition.Al;
}
private static void WriteConditionalEnd(CodeGenContext context, ArmCondition condition, int instructionPointer)
{
if (condition != ArmCondition.Al)
{
int delta = context.CodeWriter.InstructionPointer - instructionPointer;
uint branchInst = context.CodeWriter.ReadInstructionAt(instructionPointer) | (((uint)delta & 0x7ffff) << 5);
Debug.Assert((int)((branchInst & ~0x1fu) << 8) >> 11 == delta * 4);
context.CodeWriter.WriteInstructionAt(instructionPointer, branchInst);
}
}
private static void RewriteBranchInstructionWithTarget(in Context context, in PendingBranch pendingBranch, Dictionary<ulong, int> targets)
{
switch (pendingBranch.BranchType)
{
case BranchType.Branch:
RewriteBranchInstructionWithTarget(context, pendingBranch.Name, pendingBranch.TargetAddress, pendingBranch.WriterPointer, targets);
break;
case BranchType.Call:
RewriteCallInstructionWithTarget(context, pendingBranch.TargetAddress, pendingBranch.NextAddress, pendingBranch.WriterPointer);
break;
case BranchType.IndirectBranch:
RewriteIndirectBranchInstructionWithTarget(context, pendingBranch.Name, pendingBranch.TargetAddress, pendingBranch.WriterPointer);
break;
case BranchType.TableBranchByte:
case BranchType.TableBranchHalfword:
RewriteTableBranchInstructionWithTarget(
context,
pendingBranch.BranchType == BranchType.TableBranchHalfword,
pendingBranch.TargetAddress,
pendingBranch.NextAddress,
pendingBranch.WriterPointer);
break;
case BranchType.IndirectCall:
RewriteIndirectCallInstructionWithTarget(context, pendingBranch.TargetAddress, pendingBranch.NextAddress, pendingBranch.WriterPointer);
break;
case BranchType.SyncPoint:
case BranchType.SoftwareInterrupt:
case BranchType.ReadCntpct:
RewriteHostCall(context, pendingBranch.Name, pendingBranch.BranchType, pendingBranch.TargetAddress, pendingBranch.NextAddress, pendingBranch.WriterPointer);
break;
default:
Debug.Fail($"Invalid branch type '{pendingBranch.BranchType}'");
break;
}
}
private static void RewriteBranchInstructionWithTarget(in Context context, InstName name, uint targetAddress, int branchIndex, Dictionary<ulong, int> targets)
{
CodeWriter writer = context.Writer;
Assembler asm = new(writer);
int delta;
int targetIndex;
uint encoding = writer.ReadInstructionAt(branchIndex);
if (encoding == 0x14000000)
{
// Unconditional branch.
if (targets.TryGetValue(targetAddress, out targetIndex))
{
delta = targetIndex - branchIndex;
if (delta >= -Encodable26BitsOffsetLimit && delta < Encodable26BitsOffsetLimit)
{
writer.WriteInstructionAt(branchIndex, encoding | (uint)(delta & 0x3ffffff));
return;
}
}
targetIndex = writer.InstructionPointer;
delta = targetIndex - branchIndex;
writer.WriteInstructionAt(branchIndex, encoding | (uint)(delta & 0x3ffffff));
WriteTailCallConstant(context, ref asm, targetAddress);
}
else
{
// Conditional branch.
uint branchMask = 0x7ffff;
int branchMax = (int)(branchMask + 1) / 2;
if (targets.TryGetValue(targetAddress, out targetIndex))
{
delta = targetIndex - branchIndex;
if (delta >= -branchMax && delta < branchMax)
{
writer.WriteInstructionAt(branchIndex, encoding | (uint)((delta & branchMask) << 5));
return;
}
}
targetIndex = writer.InstructionPointer;
delta = targetIndex - branchIndex;
if (delta >= -branchMax && delta < branchMax)
{
writer.WriteInstructionAt(branchIndex, encoding | (uint)((delta & branchMask) << 5));
WriteTailCallConstant(context, ref asm, targetAddress);
}
else
{
// If the branch target is too far away, we use a regular unconditional branch
// instruction instead which has a much higher range.
// We branch directly to the end of the function, where we put the conditional branch,
// and then branch back to the next instruction or return the branch target depending
// on the branch being taken or not.
uint branchInst = 0x14000000u | ((uint)delta & 0x3ffffff);
Debug.Assert((int)(branchInst << 6) >> 4 == delta * 4);
writer.WriteInstructionAt(branchIndex, branchInst);
int movedBranchIndex = writer.InstructionPointer;
writer.WriteInstruction(0u); // Placeholder
asm.B((branchIndex + 1 - writer.InstructionPointer) * 4);
delta = writer.InstructionPointer - movedBranchIndex;
writer.WriteInstructionAt(movedBranchIndex, encoding | (uint)((delta & branchMask) << 5));
WriteTailCallConstant(context, ref asm, targetAddress);
}
}
Debug.Assert(name == InstName.B || name == InstName.Cbnz, $"Unknown branch instruction \"{name}\".");
}
private static void RewriteCallInstructionWithTarget(in Context context, uint targetAddress, uint nextAddress, int branchIndex)
{
CodeWriter writer = context.Writer;
Assembler asm = new(writer);
WriteBranchToCurrentPosition(context, branchIndex);
asm.Mov(context.RegisterAllocator.RemapGprRegister(RegisterUtils.LrRegister), nextAddress);
context.StoreToContext();
InstEmitFlow.WriteCallWithGuestAddress(
writer,
ref asm,
context.RegisterAllocator,
context.TailMerger,
context.WriteEpilogueWithoutContext,
context.FuncTable,
context.DispatchStubPointer,
context.GetReservedStackOffset(),
nextAddress,
InstEmitCommon.Const((int)targetAddress));
context.LoadFromContext();
// Branch back to the next instruction (after the call).
asm.B((branchIndex + 1 - writer.InstructionPointer) * 4);
}
private static void RewriteIndirectBranchInstructionWithTarget(in Context context, InstName name, uint targetRegister, int branchIndex)
{
CodeWriter writer = context.Writer;
Assembler asm = new(writer);
WriteBranchToCurrentPosition(context, branchIndex);
using ScopedRegister target = context.RegisterAllocator.AllocateTempGprRegisterScoped();
asm.And(target.Operand, context.RegisterAllocator.RemapGprRegister((int)targetRegister), InstEmitCommon.Const(~1));
context.StoreToContext();
if ((name == InstName.Bx && targetRegister == RegisterUtils.LrRegister) ||
name == InstName.Ldm ||
name == InstName.Ldmda ||
name == InstName.Ldmdb ||
name == InstName.Ldmib)
{
// Arm32 does not have a return instruction, instead returns are implemented
// either using BX LR (for leaf functions), or POP { ... PC }.
asm.Mov(Register(0), target.Operand);
context.TailMerger.AddUnconditionalReturn(writer, asm);
}
else
{
InstEmitFlow.WriteCallWithGuestAddress(
writer,
ref asm,
context.RegisterAllocator,
context.TailMerger,
context.WriteEpilogueWithoutContext,
context.FuncTable,
context.DispatchStubPointer,
context.GetReservedStackOffset(),
0u,
target.Operand,
isTail: true);
}
}
private static void RewriteTableBranchInstructionWithTarget(in Context context, bool halfword, uint rn, uint rm, int branchIndex)
{
CodeWriter writer = context.Writer;
Assembler asm = new(writer);
WriteBranchToCurrentPosition(context, branchIndex);
using ScopedRegister target = context.RegisterAllocator.AllocateTempGprRegisterScoped();
asm.Add(
target.Operand,
context.RegisterAllocator.RemapGprRegister((int)rn),
context.RegisterAllocator.RemapGprRegister((int)rm),
ArmShiftType.Lsl,
halfword ? 1 : 0);
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, asm, target.Operand, target.Operand);
if (halfword)
{
asm.LdrhRiUn(target.Operand, target.Operand, 0);
}
else
{
asm.LdrbRiUn(target.Operand, target.Operand, 0);
}
asm.Add(target.Operand, context.RegisterAllocator.RemapGprRegister(RegisterUtils.PcRegister), target.Operand, ArmShiftType.Lsl, 1);
context.StoreToContext();
InstEmitFlow.WriteCallWithGuestAddress(
writer,
ref asm,
context.RegisterAllocator,
context.TailMerger,
context.WriteEpilogueWithoutContext,
context.FuncTable,
context.DispatchStubPointer,
context.GetReservedStackOffset(),
0u,
target.Operand,
isTail: true);
}
private static void RewriteIndirectCallInstructionWithTarget(in Context context, uint targetRegister, uint nextAddress, int branchIndex)
{
CodeWriter writer = context.Writer;
Assembler asm = new(writer);
WriteBranchToCurrentPosition(context, branchIndex);
using ScopedRegister target = context.RegisterAllocator.AllocateTempGprRegisterScoped();
asm.And(target.Operand, context.RegisterAllocator.RemapGprRegister((int)targetRegister), InstEmitCommon.Const(~1));
asm.Mov(context.RegisterAllocator.RemapGprRegister(RegisterUtils.LrRegister), nextAddress);
context.StoreToContext();
InstEmitFlow.WriteCallWithGuestAddress(
writer,
ref asm,
context.RegisterAllocator,
context.TailMerger,
context.WriteEpilogueWithoutContext,
context.FuncTable,
context.DispatchStubPointer,
context.GetReservedStackOffset(),
nextAddress & ~1u,
target.Operand);
context.LoadFromContext();
// Branch back to the next instruction (after the call).
asm.B((branchIndex + 1 - writer.InstructionPointer) * 4);
}
private static void RewriteHostCall(in Context context, InstName name, BranchType type, uint imm, uint pc, int branchIndex)
{
CodeWriter writer = context.Writer;
Assembler asm = new(writer);
uint encoding = writer.ReadInstructionAt(branchIndex);
int targetIndex = writer.InstructionPointer;
int delta = targetIndex - branchIndex;
writer.WriteInstructionAt(branchIndex, encoding | (uint)(delta & 0x3ffffff));
switch (type)
{
case BranchType.SyncPoint:
InstEmitSystem.WriteSyncPoint(context.Writer, context.RegisterAllocator, context.TailMerger, context.GetReservedStackOffset());
break;
case BranchType.SoftwareInterrupt:
context.StoreToContext();
switch (name)
{
case InstName.Bkpt:
InstEmitSystem.WriteBkpt(context.Writer, context.RegisterAllocator, context.TailMerger, context.GetReservedStackOffset(), pc, imm);
break;
case InstName.Svc:
InstEmitSystem.WriteSvc(context.Writer, context.RegisterAllocator, context.TailMerger, context.GetReservedStackOffset(), pc, imm);
break;
case InstName.Udf:
InstEmitSystem.WriteUdf(context.Writer, context.RegisterAllocator, context.TailMerger, context.GetReservedStackOffset(), pc, imm);
break;
}
context.LoadFromContext();
break;
case BranchType.ReadCntpct:
InstEmitSystem.WriteReadCntpct(context.Writer, context.RegisterAllocator, context.GetReservedStackOffset(), (int)imm, (int)pc);
break;
default:
Debug.Fail($"Invalid branch type '{type}'");
break;
}
// Branch back to the next instruction.
asm.B((branchIndex + 1 - writer.InstructionPointer) * 4);
}
private static void WriteBranchToCurrentPosition(in Context context, int branchIndex)
{
CodeWriter writer = context.Writer;
int targetIndex = writer.InstructionPointer;
if (branchIndex + 1 == targetIndex)
{
writer.RemoveLastInstruction();
}
else
{
uint encoding = writer.ReadInstructionAt(branchIndex);
int delta = targetIndex - branchIndex;
writer.WriteInstructionAt(branchIndex, encoding | (uint)(delta & 0x3ffffff));
}
}
private static void WriteTailCallConstant(in Context context, ref Assembler asm, uint address)
{
context.StoreToContext();
InstEmitFlow.WriteCallWithGuestAddress(
context.Writer,
ref asm,
context.RegisterAllocator,
context.TailMerger,
context.WriteEpilogueWithoutContext,
context.FuncTable,
context.DispatchStubPointer,
context.GetReservedStackOffset(),
0u,
InstEmitCommon.Const((int)address),
isTail: true);
}
private static Operand Register(int register, OperandType type = OperandType.I64)
{
return new Operand(register, RegisterType.Integer, type);
}
public static void PrintStats()
{
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,87 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitAbsDiff
{
public static void Usad8(CodeGenContext context, uint rd, uint rn, uint rm)
{
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
for (int b = 0; b < 4; b++)
{
context.Arm64Assembler.Ubfx(tempN.Operand, rnOperand, b * 8, 8);
context.Arm64Assembler.Ubfx(tempM.Operand, rmOperand, b * 8, 8);
Operand dest = b == 0 ? tempD.Operand : tempD2.Operand;
context.Arm64Assembler.Sub(dest, tempN.Operand, tempM.Operand);
EmitAbs(context, dest);
if (b > 0)
{
if (b < 3)
{
context.Arm64Assembler.Add(tempD.Operand, tempD.Operand, dest);
}
else
{
context.Arm64Assembler.Add(rdOperand, tempD.Operand, dest);
}
}
}
}
public static void Usada8(CodeGenContext context, uint rd, uint rn, uint rm, uint ra)
{
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand raOperand = InstEmitCommon.GetInputGpr(context, ra);
for (int b = 0; b < 4; b++)
{
context.Arm64Assembler.Ubfx(tempN.Operand, rnOperand, b * 8, 8);
context.Arm64Assembler.Ubfx(tempM.Operand, rmOperand, b * 8, 8);
Operand dest = b == 0 ? tempD.Operand : tempD2.Operand;
context.Arm64Assembler.Sub(dest, tempN.Operand, tempM.Operand);
EmitAbs(context, dest);
if (b > 0)
{
context.Arm64Assembler.Add(tempD.Operand, tempD.Operand, dest);
}
}
context.Arm64Assembler.Add(rdOperand, tempD.Operand, raOperand);
}
private static void EmitAbs(CodeGenContext context, Operand value)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
// r = (value + ((int)value >> 31)) ^ ((int)value >> 31).
// Subtracts 1 and then inverts the value if the sign bit is set, same as a conditional negation.
context.Arm64Assembler.Add(tempRegister.Operand, value, value, ArmShiftType.Asr, 31);
context.Arm64Assembler.Eor(value, tempRegister.Operand, value, ArmShiftType.Asr, 31);
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,103 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitBit
{
public static void Bfc(CodeGenContext context, uint rd, uint lsb, uint msb)
{
// This is documented as "unpredictable".
if (msb < lsb)
{
return;
}
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
context.Arm64Assembler.Bfc(rdOperand, (int)lsb, (int)(msb - lsb + 1));
}
public static void Bfi(CodeGenContext context, uint rd, uint rn, uint lsb, uint msb)
{
// This is documented as "unpredictable".
if (msb < lsb)
{
return;
}
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
context.Arm64Assembler.Bfi(rdOperand, rnOperand, (int)lsb, (int)(msb - lsb + 1));
}
public static void Clz(CodeGenContext context, uint rd, uint rm)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
context.Arm64Assembler.Clz(rdOperand, rmOperand);
}
public static void Rbit(CodeGenContext context, uint rd, uint rm)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
context.Arm64Assembler.Rbit(rdOperand, rmOperand);
}
public static void Rev(CodeGenContext context, uint rd, uint rm)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
context.Arm64Assembler.Rev(rdOperand, rmOperand);
}
public static void Rev16(CodeGenContext context, uint rd, uint rm)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
context.Arm64Assembler.Rev16(rdOperand, rmOperand);
}
public static void Revsh(CodeGenContext context, uint rd, uint rm)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
context.Arm64Assembler.Rev16(rdOperand, rmOperand);
context.Arm64Assembler.Sxth(rdOperand, rdOperand);
}
public static void Sbfx(CodeGenContext context, uint rd, uint rn, uint lsb, uint widthMinus1)
{
// This is documented as "unpredictable".
if (lsb + widthMinus1 > 31)
{
return;
}
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
context.Arm64Assembler.Sbfx(rdOperand, rnOperand, (int)lsb, (int)widthMinus1 + 1);
}
public static void Ubfx(CodeGenContext context, uint rd, uint rn, uint lsb, uint widthMinus1)
{
// This is documented as "unpredictable".
if (lsb + widthMinus1 > 31)
{
return;
}
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
context.Arm64Assembler.Ubfx(rdOperand, rnOperand, (int)lsb, (int)widthMinus1 + 1);
}
}
}

View File

@@ -0,0 +1,263 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitCommon
{
public static Operand Const(int value)
{
return new(OperandKind.Constant, OperandType.I32, (uint)value);
}
public static Operand GetInputGpr(CodeGenContext context, uint register)
{
Operand operand = context.RegisterAllocator.RemapGprRegister((int)register);
if (register == RegisterUtils.PcRegister)
{
context.Arm64Assembler.Mov(operand, context.Pc);
}
return operand;
}
public static Operand GetOutputGpr(CodeGenContext context, uint register)
{
return context.RegisterAllocator.RemapGprRegister((int)register);
}
public static void GetCurrentFlags(CodeGenContext context, Operand flagsOut)
{
context.Arm64Assembler.MrsNzcv(flagsOut);
context.Arm64Assembler.Lsr(flagsOut, flagsOut, Const(28));
}
public static void RestoreNzcvFlags(CodeGenContext context, Operand nzcvFlags)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Lsl(tempRegister.Operand, nzcvFlags, Const(28));
context.Arm64Assembler.MsrNzcv(tempRegister.Operand);
}
public static void RestoreCvFlags(CodeGenContext context, Operand cvFlags)
{
// Arm64 zeros the carry and overflow flags for logical operations, but Arm32 keeps them unchanged.
// This will restore carry and overflow after a operation has zeroed them.
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.MrsNzcv(tempRegister.Operand);
context.Arm64Assembler.Bfi(tempRegister.Operand, cvFlags, 28, 2);
context.Arm64Assembler.MsrNzcv(tempRegister.Operand);
}
public static void SetThumbFlag(CodeGenContext context)
{
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
context.Arm64Assembler.Orr(tempRegister.Operand, tempRegister.Operand, Const(1 << 5));
context.Arm64Assembler.StrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
}
public static void SetThumbFlag(CodeGenContext context, Operand value)
{
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
context.Arm64Assembler.Bfi(tempRegister.Operand, value, 5, 1);
context.Arm64Assembler.StrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
}
public static void ClearThumbFlag(CodeGenContext context)
{
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
context.Arm64Assembler.Bfc(tempRegister.Operand, 5, 1);
context.Arm64Assembler.StrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
}
public static void EmitSigned16BitPair(CodeGenContext context, uint rd, uint rn, Action<Operand, Operand> elementAction)
{
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand rdOperand = GetOutputGpr(context, rd);
Operand rnOperand = GetInputGpr(context, rn);
context.Arm64Assembler.Sxth(tempN.Operand, rnOperand);
elementAction(tempD.Operand, tempN.Operand);
context.Arm64Assembler.Uxth(tempD2.Operand, tempD.Operand);
context.Arm64Assembler.Asr(tempN.Operand, rnOperand, Const(16));
elementAction(tempD.Operand, tempN.Operand);
context.Arm64Assembler.Orr(rdOperand, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, 16);
}
public static void EmitSigned16BitPair(CodeGenContext context, uint rd, uint rn, uint rm, Action<Operand, Operand, Operand> elementAction)
{
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand rdOperand = GetOutputGpr(context, rd);
Operand rnOperand = GetInputGpr(context, rn);
Operand rmOperand = GetInputGpr(context, rm);
context.Arm64Assembler.Sxth(tempN.Operand, rnOperand);
context.Arm64Assembler.Sxth(tempM.Operand, rmOperand);
elementAction(tempD.Operand, tempN.Operand, tempM.Operand);
context.Arm64Assembler.Uxth(tempD2.Operand, tempD.Operand);
context.Arm64Assembler.Asr(tempN.Operand, rnOperand, Const(16));
context.Arm64Assembler.Asr(tempM.Operand, rmOperand, Const(16));
elementAction(tempD.Operand, tempN.Operand, tempM.Operand);
context.Arm64Assembler.Orr(rdOperand, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, 16);
}
public static void EmitSigned16BitXPair(CodeGenContext context, uint rd, uint rn, uint rm, Action<Operand, Operand, Operand, int> elementAction)
{
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand rdOperand = GetOutputGpr(context, rd);
Operand rnOperand = GetInputGpr(context, rn);
Operand rmOperand = GetInputGpr(context, rm);
context.Arm64Assembler.Sxth(tempN.Operand, rnOperand);
context.Arm64Assembler.Asr(tempM.Operand, rmOperand, Const(16));
elementAction(tempD.Operand, tempN.Operand, tempM.Operand, 0);
context.Arm64Assembler.Uxth(tempD2.Operand, tempD.Operand);
context.Arm64Assembler.Asr(tempN.Operand, rnOperand, Const(16));
context.Arm64Assembler.Sxth(tempM.Operand, rmOperand);
elementAction(tempD.Operand, tempN.Operand, tempM.Operand, 1);
context.Arm64Assembler.Orr(rdOperand, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, 16);
}
public static void EmitSigned8BitPair(CodeGenContext context, uint rd, uint rn, uint rm, Action<Operand, Operand, Operand> elementAction)
{
Emit8BitPair(context, rd, rn, rm, elementAction, unsigned: false);
}
public static void EmitUnsigned16BitPair(CodeGenContext context, uint rd, uint rn, uint rm, Action<Operand, Operand, Operand> elementAction)
{
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand rdOperand = GetOutputGpr(context, rd);
Operand rnOperand = GetInputGpr(context, rn);
Operand rmOperand = GetInputGpr(context, rm);
context.Arm64Assembler.Uxth(tempN.Operand, rnOperand);
context.Arm64Assembler.Uxth(tempM.Operand, rmOperand);
elementAction(tempD.Operand, tempN.Operand, tempM.Operand);
context.Arm64Assembler.Uxth(tempD2.Operand, tempD.Operand);
context.Arm64Assembler.Lsr(tempN.Operand, rnOperand, Const(16));
context.Arm64Assembler.Lsr(tempM.Operand, rmOperand, Const(16));
elementAction(tempD.Operand, tempN.Operand, tempM.Operand);
context.Arm64Assembler.Orr(rdOperand, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, 16);
}
public static void EmitUnsigned16BitXPair(CodeGenContext context, uint rd, uint rn, uint rm, Action<Operand, Operand, Operand, int> elementAction)
{
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand rdOperand = GetOutputGpr(context, rd);
Operand rnOperand = GetInputGpr(context, rn);
Operand rmOperand = GetInputGpr(context, rm);
context.Arm64Assembler.Uxth(tempN.Operand, rnOperand);
context.Arm64Assembler.Lsr(tempM.Operand, rmOperand, Const(16));
elementAction(tempD.Operand, tempN.Operand, tempM.Operand, 0);
context.Arm64Assembler.Uxth(tempD2.Operand, tempD.Operand);
context.Arm64Assembler.Lsr(tempN.Operand, rnOperand, Const(16));
context.Arm64Assembler.Uxth(tempM.Operand, rmOperand);
elementAction(tempD.Operand, tempN.Operand, tempM.Operand, 1);
context.Arm64Assembler.Orr(rdOperand, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, 16);
}
public static void EmitUnsigned8BitPair(CodeGenContext context, uint rd, uint rn, uint rm, Action<Operand, Operand, Operand> elementAction)
{
Emit8BitPair(context, rd, rn, rm, elementAction, unsigned: true);
}
private static void Emit8BitPair(CodeGenContext context, uint rd, uint rn, uint rm, Action<Operand, Operand, Operand> elementAction, bool unsigned)
{
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand rdOperand = GetOutputGpr(context, rd);
Operand rnOperand = GetInputGpr(context, rn);
Operand rmOperand = GetInputGpr(context, rm);
for (int b = 0; b < 4; b++)
{
if (unsigned)
{
context.Arm64Assembler.Ubfx(tempN.Operand, rnOperand, b * 8, 8);
context.Arm64Assembler.Ubfx(tempM.Operand, rmOperand, b * 8, 8);
}
else
{
context.Arm64Assembler.Sbfx(tempN.Operand, rnOperand, b * 8, 8);
context.Arm64Assembler.Sbfx(tempM.Operand, rmOperand, b * 8, 8);
}
elementAction(tempD.Operand, tempN.Operand, tempM.Operand);
if (b == 0)
{
context.Arm64Assembler.Uxtb(tempD2.Operand, tempD.Operand);
}
else if (b < 3)
{
context.Arm64Assembler.Uxtb(tempD.Operand, tempD.Operand);
context.Arm64Assembler.Orr(tempD2.Operand, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, b * 8);
}
else
{
context.Arm64Assembler.Orr(rdOperand, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, 24);
}
}
}
public static uint CombineV(uint low4, uint high1, uint size)
{
return size == 3 ? CombineV(low4, high1) : CombineVF(high1, low4);
}
public static uint CombineV(uint low4, uint high1)
{
return low4 | (high1 << 4);
}
public static uint CombineVF(uint low1, uint high4)
{
return low1 | (high4 << 1);
}
}
}

View File

@@ -0,0 +1,26 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using System;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitCrc32
{
public static void Crc32(CodeGenContext context, uint rd, uint rn, uint rm, uint sz)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
context.Arm64Assembler.Crc32(rdOperand, rnOperand, rmOperand, Math.Min(2, sz));
}
public static void Crc32c(CodeGenContext context, uint rd, uint rn, uint rm, uint sz)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
context.Arm64Assembler.Crc32c(rdOperand, rnOperand, rmOperand, Math.Min(2, sz));
}
}
}

View File

@@ -0,0 +1,25 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitDivide
{
public static void Sdiv(CodeGenContext context, uint rd, uint rn, uint rm)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
context.Arm64Assembler.Sdiv(rdOperand, rnOperand, rmOperand);
}
public static void Udiv(CodeGenContext context, uint rd, uint rn, uint rm)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
context.Arm64Assembler.Udiv(rdOperand, rnOperand, rmOperand);
}
}
}

View File

@@ -0,0 +1,191 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitExtension
{
public static void Sxtab(CodeGenContext context, uint rd, uint rn, uint rm, uint rotate)
{
EmitRotated(context, ArmExtensionType.Sxtb, rd, rn, rm, rotate);
}
public static void Sxtab16(CodeGenContext context, uint rd, uint rn, uint rm, uint rotate)
{
EmitExtendAccumulate8(context, rd, rn, rm, rotate, unsigned: false);
}
public static void Sxtah(CodeGenContext context, uint rd, uint rn, uint rm, uint rotate)
{
EmitRotated(context, ArmExtensionType.Sxth, rd, rn, rm, rotate);
}
public static void Sxtb(CodeGenContext context, uint rd, uint rm, uint rotate)
{
EmitRotated(context, context.Arm64Assembler.Sxtb, rd, rm, rotate);
}
public static void Sxtb16(CodeGenContext context, uint rd, uint rm, uint rotate)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempRegister2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
if (rotate != 0)
{
context.Arm64Assembler.Ror(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)rotate * 8));
context.Arm64Assembler.And(rdOperand, tempRegister.Operand, InstEmitCommon.Const(0xff00ff));
}
else
{
context.Arm64Assembler.And(rdOperand, rmOperand, InstEmitCommon.Const(0xff00ff));
}
// Sign-extend by broadcasting sign bits.
context.Arm64Assembler.And(tempRegister.Operand, rdOperand, InstEmitCommon.Const(0x800080));
context.Arm64Assembler.Lsl(tempRegister2.Operand, tempRegister.Operand, InstEmitCommon.Const(9));
context.Arm64Assembler.Sub(tempRegister.Operand, tempRegister2.Operand, tempRegister.Operand);
context.Arm64Assembler.Orr(rdOperand, rdOperand, tempRegister.Operand);
}
public static void Sxth(CodeGenContext context, uint rd, uint rm, uint rotate)
{
EmitRotated(context, context.Arm64Assembler.Sxth, rd, rm, rotate);
}
public static void Uxtab(CodeGenContext context, uint rd, uint rn, uint rm, uint rotate)
{
EmitRotated(context, ArmExtensionType.Uxtb, rd, rn, rm, rotate);
}
public static void Uxtab16(CodeGenContext context, uint rd, uint rn, uint rm, uint rotate)
{
EmitExtendAccumulate8(context, rd, rn, rm, rotate, unsigned: true);
}
public static void Uxtah(CodeGenContext context, uint rd, uint rn, uint rm, uint rotate)
{
EmitRotated(context, ArmExtensionType.Uxth, rd, rn, rm, rotate);
}
public static void Uxtb(CodeGenContext context, uint rd, uint rm, uint rotate)
{
EmitRotated(context, context.Arm64Assembler.Uxtb, rd, rm, rotate);
}
public static void Uxtb16(CodeGenContext context, uint rd, uint rm, uint rotate)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
if (rotate != 0)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Ror(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)rotate * 8));
context.Arm64Assembler.And(rdOperand, tempRegister.Operand, InstEmitCommon.Const(0xff00ff));
}
else
{
context.Arm64Assembler.And(rdOperand, rmOperand, InstEmitCommon.Const(0xff00ff));
}
}
public static void Uxth(CodeGenContext context, uint rd, uint rm, uint rotate)
{
EmitRotated(context, context.Arm64Assembler.Uxth, rd, rm, rotate);
}
private static void EmitRotated(CodeGenContext context, Action<Operand, Operand> action, uint rd, uint rm, uint rotate)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
if (rotate != 0)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Ror(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)rotate * 8));
action(rdOperand, tempRegister.Operand);
}
else
{
action(rdOperand, rmOperand);
}
}
private static void EmitRotated(CodeGenContext context, ArmExtensionType extensionType, uint rd, uint rn, uint rm, uint rotate)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
if (rotate != 0)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Ror(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)rotate * 8));
context.Arm64Assembler.Add(rdOperand, rnOperand, tempRegister.Operand, extensionType);
}
else
{
context.Arm64Assembler.Add(rdOperand, rnOperand, rmOperand, extensionType);
}
}
private static void EmitExtendAccumulate8(CodeGenContext context, uint rd, uint rn, uint rm, uint rotate, bool unsigned)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
if (rotate != 0)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Ror(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)rotate * 8));
EmitExtendAccumulate8Core(context, rdOperand, rnOperand, tempRegister.Operand, unsigned);
}
else
{
EmitExtendAccumulate8Core(context, rdOperand, rnOperand, rmOperand, unsigned);
}
}
private static void EmitExtendAccumulate8Core(CodeGenContext context, Operand rd, Operand rn, Operand rm, bool unsigned)
{
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
if (unsigned)
{
context.Arm64Assembler.Uxth(tempN.Operand, rn);
}
else
{
context.Arm64Assembler.Sxth(tempN.Operand, rn);
}
context.Arm64Assembler.Add(tempD.Operand, tempN.Operand, rm, unsigned ? ArmExtensionType.Uxtb : ArmExtensionType.Sxtb);
context.Arm64Assembler.Uxth(tempD2.Operand, tempD.Operand);
if (unsigned)
{
context.Arm64Assembler.Lsr(tempN.Operand, rn, InstEmitCommon.Const(16));
}
else
{
context.Arm64Assembler.Asr(tempN.Operand, rn, InstEmitCommon.Const(16));
}
context.Arm64Assembler.Lsr(tempD.Operand, rm, InstEmitCommon.Const(16));
context.Arm64Assembler.Add(tempD.Operand, tempN.Operand, tempD.Operand, unsigned ? ArmExtensionType.Uxtb : ArmExtensionType.Sxtb);
context.Arm64Assembler.Orr(rd, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, 16);
}
}
}

View File

@@ -0,0 +1,256 @@
using ARMeilleure.Common;
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitFlow
{
private const int SpIndex = 31;
public static void B(CodeGenContext context, int imm, ArmCondition condition)
{
context.AddPendingBranch(InstName.B, imm);
if (condition == ArmCondition.Al)
{
context.Arm64Assembler.B(0);
}
else
{
context.Arm64Assembler.B(condition, 0);
}
}
public static void Bl(CodeGenContext context, int imm, bool sourceIsThumb, bool targetIsThumb)
{
uint nextAddress = sourceIsThumb ? context.Pc | 1u : context.Pc - 4;
uint targetAddress = targetIsThumb ? context.Pc + (uint)imm : (context.Pc & ~3u) + (uint)imm;
if (sourceIsThumb != targetIsThumb)
{
if (targetIsThumb)
{
InstEmitCommon.SetThumbFlag(context);
}
else
{
InstEmitCommon.ClearThumbFlag(context);
}
}
context.AddPendingCall(targetAddress, nextAddress);
context.Arm64Assembler.B(0);
}
public static void Blx(CodeGenContext context, uint rm, bool sourceIsThumb)
{
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
InstEmitCommon.SetThumbFlag(context, rmOperand);
uint nextAddress = sourceIsThumb ? (context.Pc - 2) | 1u : context.Pc - 4;
context.AddPendingIndirectCall(rm, nextAddress);
context.Arm64Assembler.B(0);
}
public static void Bx(CodeGenContext context, uint rm)
{
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
InstEmitCommon.SetThumbFlag(context, rmOperand);
context.AddPendingIndirectBranch(InstName.Bx, rm);
context.Arm64Assembler.B(0);
}
public static void Cbnz(CodeGenContext context, uint rn, int imm, bool op)
{
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
context.AddPendingBranch(InstName.Cbnz, imm);
if (op)
{
context.Arm64Assembler.Cbnz(rnOperand, 0);
}
else
{
context.Arm64Assembler.Cbz(rnOperand, 0);
}
}
public static void It(CodeGenContext context, uint firstCond, uint mask)
{
Debug.Assert(mask != 0);
int instCount = 4 - BitOperations.TrailingZeroCount(mask);
Span<ArmCondition> conditions = stackalloc ArmCondition[instCount];
int i = 0;
for (int index = 5 - instCount; index < 4; index++)
{
bool invert = (mask & (1u << index)) != 0;
if (invert)
{
conditions[i++] = ((ArmCondition)firstCond).Invert();
}
else
{
conditions[i++] = (ArmCondition)firstCond;
}
}
conditions[i] = (ArmCondition)firstCond;
context.SetItBlockStart(conditions);
}
public static void Tbb(CodeGenContext context, uint rn, uint rm, bool h)
{
context.Arm64Assembler.Mov(context.RegisterAllocator.RemapGprRegister(RegisterUtils.PcRegister), context.Pc);
context.AddPendingTableBranch(rn, rm, h);
context.Arm64Assembler.B(0);
}
public unsafe static void WriteCallWithGuestAddress(
CodeWriter writer,
ref Assembler asm,
RegisterAllocator regAlloc,
TailMerger tailMerger,
Action writeEpilogue,
AddressTable<ulong> funcTable,
IntPtr funcPtr,
int spillBaseOffset,
uint nextAddress,
Operand guestAddress,
bool isTail = false)
{
int tempRegister;
if (guestAddress.Kind == OperandKind.Constant)
{
tempRegister = regAlloc.AllocateTempGprRegister();
asm.Mov(Register(tempRegister), guestAddress.Value);
asm.StrRiUn(Register(tempRegister), Register(regAlloc.FixedContextRegister), NativeContextOffsets.DispatchAddressOffset);
regAlloc.FreeTempGprRegister(tempRegister);
}
else
{
asm.StrRiUn(guestAddress, Register(regAlloc.FixedContextRegister), NativeContextOffsets.DispatchAddressOffset);
}
tempRegister = regAlloc.FixedContextRegister == 1 ? 2 : 1;
if (!isTail)
{
WriteSpillSkipContext(ref asm, regAlloc, spillBaseOffset);
}
Operand rn = Register(tempRegister);
if (regAlloc.FixedContextRegister != 0)
{
asm.Mov(Register(0), Register(regAlloc.FixedContextRegister));
}
if (guestAddress.Kind == OperandKind.Constant && funcTable != null)
{
ulong funcPtrLoc = (ulong)Unsafe.AsPointer(ref funcTable.GetValue(guestAddress.Value));
asm.Mov(rn, funcPtrLoc & ~0xfffUL);
asm.LdrRiUn(rn, rn, (int)(funcPtrLoc & 0xfffUL));
}
else
{
asm.Mov(rn, (ulong)funcPtr);
}
if (isTail)
{
writeEpilogue();
asm.Br(rn);
}
else
{
asm.Blr(rn);
asm.Mov(rn, nextAddress);
asm.Cmp(Register(0), rn);
tailMerger.AddConditionalReturn(writer, asm, ArmCondition.Ne);
WriteFillSkipContext(ref asm, regAlloc, spillBaseOffset);
}
}
private static void WriteSpillSkipContext(ref Assembler asm, RegisterAllocator regAlloc, int spillOffset)
{
WriteSpillOrFillSkipContext(ref asm, regAlloc, spillOffset, spill: true);
}
private static void WriteFillSkipContext(ref Assembler asm, RegisterAllocator regAlloc, int spillOffset)
{
WriteSpillOrFillSkipContext(ref asm, regAlloc, spillOffset, spill: false);
}
private static void WriteSpillOrFillSkipContext(ref Assembler asm, RegisterAllocator regAlloc, int spillOffset, bool spill)
{
uint gprMask = regAlloc.UsedGprsMask & ((1u << regAlloc.FixedContextRegister) | (1u << regAlloc.FixedPageTableRegister));
while (gprMask != 0)
{
int reg = BitOperations.TrailingZeroCount(gprMask);
if (reg < 31 && (gprMask & (2u << reg)) != 0 && spillOffset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
{
if (spill)
{
asm.StpRiUn(Register(reg), Register(reg + 1), Register(SpIndex), spillOffset);
}
else
{
asm.LdpRiUn(Register(reg), Register(reg + 1), Register(SpIndex), spillOffset);
}
gprMask &= ~(3u << reg);
spillOffset += 16;
}
else
{
if (spill)
{
asm.StrRiUn(Register(reg), Register(SpIndex), spillOffset);
}
else
{
asm.LdrRiUn(Register(reg), Register(SpIndex), spillOffset);
}
gprMask &= ~(1u << reg);
spillOffset += 8;
}
}
}
private static Operand Register(int register, OperandType type = OperandType.I64)
{
return new Operand(register, RegisterType.Integer, type);
}
}
}

View File

@@ -0,0 +1,265 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitGE
{
public static void Sadd16(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSub(context, rd, rn, rm, is16Bit: true, add: true, unsigned: false);
}
public static void Sadd8(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSub(context, rd, rn, rm, is16Bit: false, add: true, unsigned: false);
}
public static void Sasx(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAsxSax(context, rd, rn, rm, isAsx: true, unsigned: false);
}
public static void Sel(CodeGenContext context, uint rd, uint rn, uint rm)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
using ScopedRegister geFlags = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
ExtractGEFlags(context, geFlags.Operand);
// Broadcast compact GE flags (one bit to one byte, 0b1111 -> 0x1010101).
context.Arm64Assembler.Mov(tempRegister.Operand, 0x204081u);
context.Arm64Assembler.Mul(geFlags.Operand, geFlags.Operand, tempRegister.Operand);
context.Arm64Assembler.And(geFlags.Operand, geFlags.Operand, InstEmitCommon.Const(0x1010101));
// Build mask from expanded flags (0x1010101 -> 0xFFFFFFFF).
context.Arm64Assembler.Lsl(tempRegister.Operand, geFlags.Operand, InstEmitCommon.Const(8));
context.Arm64Assembler.Sub(geFlags.Operand, tempRegister.Operand, geFlags.Operand);
// Result = (n & mask) | (m & ~mask).
context.Arm64Assembler.And(tempRegister.Operand, geFlags.Operand, rnOperand);
context.Arm64Assembler.Bic(rdOperand, rmOperand, geFlags.Operand);
context.Arm64Assembler.Orr(rdOperand, rdOperand, tempRegister.Operand);
}
public static void Ssax(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAsxSax(context, rd, rn, rm, isAsx: false, unsigned: false);
}
public static void Ssub16(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSub(context, rd, rn, rm, is16Bit: true, add: false, unsigned: false);
}
public static void Ssub8(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSub(context, rd, rn, rm, is16Bit: false, add: false, unsigned: false);
}
public static void Uadd16(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSub(context, rd, rn, rm, is16Bit: true, add: true, unsigned: true);
}
public static void Uadd8(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSub(context, rd, rn, rm, is16Bit: false, add: true, unsigned: true);
}
public static void Uasx(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAsxSax(context, rd, rn, rm, isAsx: true, unsigned: true);
}
public static void Usax(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAsxSax(context, rd, rn, rm, isAsx: false, unsigned: true);
}
public static void Usub16(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSub(context, rd, rn, rm, is16Bit: true, add: false, unsigned: true);
}
public static void Usub8(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSub(context, rd, rn, rm, is16Bit: false, add: false, unsigned: true);
}
private static void EmitAddSub(CodeGenContext context, uint rd, uint rn, uint rm, bool is16Bit, bool add, bool unsigned)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
using ScopedRegister geFlags = context.RegisterAllocator.AllocateTempGprRegisterScoped();
int e = 0;
void Emit(Operand d, Operand n, Operand m)
{
if (add)
{
context.Arm64Assembler.Add(d, n, m);
}
else
{
context.Arm64Assembler.Sub(d, n, m);
}
if (unsigned && add)
{
if (e == 0)
{
context.Arm64Assembler.Lsr(geFlags.Operand, d, InstEmitCommon.Const(is16Bit ? 16 : 8));
}
else
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Lsr(tempRegister.Operand, d, InstEmitCommon.Const(is16Bit ? 16 : 8));
context.Arm64Assembler.Orr(geFlags.Operand, geFlags.Operand, tempRegister.Operand, ArmShiftType.Lsl, e);
}
}
else
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Mvn(tempRegister.Operand, d);
if (e == 0)
{
context.Arm64Assembler.Lsr(geFlags.Operand, tempRegister.Operand, InstEmitCommon.Const(31));
}
else
{
context.Arm64Assembler.Lsr(tempRegister.Operand, tempRegister.Operand, InstEmitCommon.Const(31));
context.Arm64Assembler.Orr(geFlags.Operand, geFlags.Operand, tempRegister.Operand, ArmShiftType.Lsl, e);
}
}
e += is16Bit ? 2 : 1;
}
if (is16Bit)
{
if (unsigned)
{
InstEmitCommon.EmitUnsigned16BitPair(context, rd, rn, rm, Emit);
}
else
{
InstEmitCommon.EmitSigned16BitPair(context, rd, rn, rm, Emit);
}
// Duplicate bits.
context.Arm64Assembler.Orr(geFlags.Operand, geFlags.Operand, geFlags.Operand, ArmShiftType.Lsl, 1);
}
else
{
if (unsigned)
{
InstEmitCommon.EmitUnsigned8BitPair(context, rd, rn, rm, Emit);
}
else
{
InstEmitCommon.EmitSigned8BitPair(context, rd, rn, rm, Emit);
}
}
UpdateGEFlags(context, geFlags.Operand);
}
private static void EmitAsxSax(CodeGenContext context, uint rd, uint rn, uint rm, bool isAsx, bool unsigned)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
using ScopedRegister geFlags = context.RegisterAllocator.AllocateTempGprRegisterScoped();
void Emit(Operand d, Operand n, Operand m, int e)
{
bool add = e == (isAsx ? 1 : 0);
if (add)
{
context.Arm64Assembler.Add(d, n, m);
}
else
{
context.Arm64Assembler.Sub(d, n, m);
}
if (unsigned && add)
{
if (e == 0)
{
context.Arm64Assembler.Lsr(geFlags.Operand, d, InstEmitCommon.Const(16));
}
else
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Lsr(tempRegister.Operand, d, InstEmitCommon.Const(16));
context.Arm64Assembler.Orr(geFlags.Operand, geFlags.Operand, tempRegister.Operand, ArmShiftType.Lsl, e * 2);
}
}
else
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Mvn(tempRegister.Operand, d);
if (e == 0)
{
context.Arm64Assembler.Lsr(geFlags.Operand, tempRegister.Operand, InstEmitCommon.Const(31));
}
else
{
context.Arm64Assembler.Lsr(tempRegister.Operand, tempRegister.Operand, InstEmitCommon.Const(31));
context.Arm64Assembler.Orr(geFlags.Operand, geFlags.Operand, tempRegister.Operand, ArmShiftType.Lsl, e * 2);
}
}
}
if (unsigned)
{
InstEmitCommon.EmitUnsigned16BitXPair(context, rd, rn, rm, Emit);
}
else
{
InstEmitCommon.EmitSigned16BitXPair(context, rd, rn, rm, Emit);
}
// Duplicate bits.
context.Arm64Assembler.Orr(geFlags.Operand, geFlags.Operand, geFlags.Operand, ArmShiftType.Lsl, 1);
UpdateGEFlags(context, geFlags.Operand);
}
public static void UpdateGEFlags(CodeGenContext context, Operand flags)
{
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
context.Arm64Assembler.Bfi(tempRegister.Operand, flags, 16, 4);
context.Arm64Assembler.StrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
}
public static void ExtractGEFlags(CodeGenContext context, Operand flags)
{
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
context.Arm64Assembler.LdrRiUn(flags, ctx, NativeContextOffsets.FlagsBaseOffset);
context.Arm64Assembler.Ubfx(flags, flags, 16, 4);
}
}
}

View File

@@ -0,0 +1,178 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitHalve
{
public static void Shadd16(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitHadd(context, rd, rn, rm, 0x7fff7fff, unsigned: false);
}
public static void Shadd8(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitHadd(context, rd, rn, rm, 0x7f7f7f7f, unsigned: false);
}
public static void Shsub16(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitHsub(context, rd, rn, rm, 0x7fff7fff, unsigned: false);
}
public static void Shsub8(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitHsub(context, rd, rn, rm, 0x7f7f7f7f, unsigned: false);
}
public static void Shasx(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitSigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
{
if (e == 0)
{
context.Arm64Assembler.Sub(d, n, m);
}
else
{
context.Arm64Assembler.Add(d, n, m);
}
context.Arm64Assembler.Lsr(d, d, InstEmitCommon.Const(1));
});
}
public static void Shsax(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitSigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
{
if (e == 0)
{
context.Arm64Assembler.Add(d, n, m);
}
else
{
context.Arm64Assembler.Sub(d, n, m);
}
context.Arm64Assembler.Lsr(d, d, InstEmitCommon.Const(1));
});
}
public static void Uhadd16(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitHadd(context, rd, rn, rm, 0x7fff7fff, unsigned: true);
}
public static void Uhadd8(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitHadd(context, rd, rn, rm, 0x7f7f7f7f, unsigned: true);
}
public static void Uhasx(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitUnsigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
{
if (e == 0)
{
context.Arm64Assembler.Sub(d, n, m);
}
else
{
context.Arm64Assembler.Add(d, n, m);
}
context.Arm64Assembler.Lsr(d, d, InstEmitCommon.Const(1));
});
}
public static void Uhsax(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitUnsigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
{
if (e == 0)
{
context.Arm64Assembler.Add(d, n, m);
}
else
{
context.Arm64Assembler.Sub(d, n, m);
}
context.Arm64Assembler.Lsr(d, d, InstEmitCommon.Const(1));
});
}
public static void Uhsub16(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitHsub(context, rd, rn, rm, 0x7fff7fff, unsigned: true);
}
public static void Uhsub8(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitHsub(context, rd, rn, rm, 0x7f7f7f7f, unsigned: true);
}
private static void EmitHadd(CodeGenContext context, uint rd, uint rn, uint rm, int mask, bool unsigned)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
using ScopedRegister res = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister carry = context.RegisterAllocator.AllocateTempGprRegisterScoped();
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
// Note that x^y always contains the LSB of the result.
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
// We mask by 0x7F/0x7FFF to remove the LSB so that it doesn't leak into the field below.
context.Arm64Assembler.And(res.Operand, rmOperand, rnOperand);
context.Arm64Assembler.Eor(carry.Operand, rmOperand, rnOperand);
context.Arm64Assembler.Lsr(rdOperand, carry.Operand, InstEmitCommon.Const(1));
context.Arm64Assembler.And(rdOperand, rdOperand, InstEmitCommon.Const(mask));
context.Arm64Assembler.Add(rdOperand, rdOperand, res.Operand);
if (!unsigned)
{
// Propagates the sign bit from (x^y)>>1 upwards by one.
context.Arm64Assembler.And(carry.Operand, carry.Operand, InstEmitCommon.Const(~mask));
context.Arm64Assembler.Eor(rdOperand, rdOperand, carry.Operand);
}
}
private static void EmitHsub(CodeGenContext context, uint rd, uint rn, uint rm, int mask, bool unsigned)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
using ScopedRegister carry = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister left = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister right = context.RegisterAllocator.AllocateTempGprRegisterScoped();
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
// Note that x^y always contains the LSB of the result.
// Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
context.Arm64Assembler.Eor(carry.Operand, rmOperand, rnOperand);
context.Arm64Assembler.Lsr(left.Operand, carry.Operand, InstEmitCommon.Const(1));
context.Arm64Assembler.And(right.Operand, carry.Operand, rmOperand);
// We must now perform a partitioned subtraction.
// We can do this because minuend contains 7/15 bit fields.
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
// We invert this bit at the end as this tells us if that bit was borrowed from.
context.Arm64Assembler.Orr(rdOperand, left.Operand, InstEmitCommon.Const(~mask));
context.Arm64Assembler.Sub(rdOperand, rdOperand, right.Operand);
context.Arm64Assembler.Eor(rdOperand, rdOperand, InstEmitCommon.Const(~mask));
if (!unsigned)
{
// We then sign extend the result into this bit.
context.Arm64Assembler.And(carry.Operand, carry.Operand, InstEmitCommon.Const(~mask));
context.Arm64Assembler.Eor(rdOperand, rdOperand, carry.Operand);
}
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,350 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitMove
{
public static void MvnI(CodeGenContext context, uint rd, uint imm, bool immRotated, bool s)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
if (s)
{
using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
if (immRotated)
{
if ((imm & (1u << 31)) != 0)
{
context.Arm64Assembler.Orr(flagsRegister.Operand, flagsRegister.Operand, InstEmitCommon.Const(1 << 29));
}
else
{
context.Arm64Assembler.Bfc(flagsRegister.Operand, 29, 1);
}
}
context.Arm64Assembler.Mov(rdOperand, ~imm);
context.Arm64Assembler.Tst(rdOperand, rdOperand);
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
context.SetNzcvModified();
}
else
{
context.Arm64Assembler.Mov(rdOperand, ~imm);
}
}
public static void MvnR(CodeGenContext context, uint rd, uint rm, uint sType, uint imm5, bool s)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
ScopedRegister flagsRegister = default;
if (s)
{
flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
rmOperand = InstEmitAlu.GetMShiftedByImmediate(context, tempRegister.Operand, rmOperand, imm5, sType, flagsRegister.Operand);
}
else
{
rmOperand = InstEmitAlu.GetMShiftedByImmediate(context, tempRegister.Operand, rmOperand, imm5, sType);
}
context.Arm64Assembler.Mvn(rdOperand, rmOperand);
if (s)
{
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
flagsRegister.Dispose();
context.SetNzcvModified();
}
}
public static void MvnRr(CodeGenContext context, uint rd, uint rm, uint sType, uint rs, bool s)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand rsOperand = InstEmitCommon.GetInputGpr(context, rs);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
ScopedRegister flagsRegister = default;
if (s)
{
flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
rmOperand = InstEmitAlu.GetMShiftedByReg(context, tempRegister.Operand, rmOperand, rsOperand, sType, flagsRegister.Operand);
}
else
{
rmOperand = InstEmitAlu.GetMShiftedByReg(context, tempRegister.Operand, rmOperand, rsOperand, sType);
}
context.Arm64Assembler.Mvn(rdOperand, rmOperand);
if (s)
{
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
flagsRegister.Dispose();
context.SetNzcvModified();
}
}
public static void MovI(CodeGenContext context, uint rd, uint imm, bool immRotated, bool s)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
if (s)
{
using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
if (immRotated)
{
if ((imm & (1u << 31)) != 0)
{
context.Arm64Assembler.Orr(flagsRegister.Operand, flagsRegister.Operand, InstEmitCommon.Const(2));
}
else
{
context.Arm64Assembler.Bfc(flagsRegister.Operand, 1, 1);
}
}
context.Arm64Assembler.Mov(rdOperand, imm);
context.Arm64Assembler.Tst(rdOperand, rdOperand);
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
context.SetNzcvModified();
}
else
{
context.Arm64Assembler.Mov(rdOperand, imm);
}
}
public static void MovR(CodeGenContext context, uint rd, uint rm, uint sType, uint imm5, bool s)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
if (InstEmitAlu.CanShift(sType, imm5) && !s)
{
if (imm5 != 0)
{
switch ((ArmShiftType)sType)
{
case ArmShiftType.Lsl:
context.Arm64Assembler.Lsl(rdOperand, rmOperand, InstEmitCommon.Const((int)imm5));
break;
case ArmShiftType.Lsr:
context.Arm64Assembler.Lsr(rdOperand, rmOperand, InstEmitCommon.Const((int)imm5));
break;
case ArmShiftType.Asr:
context.Arm64Assembler.Asr(rdOperand, rmOperand, InstEmitCommon.Const((int)imm5));
break;
case ArmShiftType.Ror:
context.Arm64Assembler.Ror(rdOperand, rmOperand, InstEmitCommon.Const((int)imm5));
break;
}
}
else
{
context.Arm64Assembler.Mov(rdOperand, rmOperand);
}
}
else
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
ScopedRegister flagsRegister = default;
if (s)
{
flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
rmOperand = InstEmitAlu.GetMShiftedByImmediate(context, tempRegister.Operand, rmOperand, imm5, sType, flagsRegister.Operand);
}
else
{
rmOperand = InstEmitAlu.GetMShiftedByImmediate(context, tempRegister.Operand, rmOperand, imm5, sType, null);
}
context.Arm64Assembler.Mov(rdOperand, rmOperand);
if (s)
{
context.Arm64Assembler.Tst(rdOperand, rdOperand);
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
flagsRegister.Dispose();
context.SetNzcvModified();
}
}
}
public static void MovR(CodeGenContext context, uint cond, uint rd, uint rm, uint sType, uint imm5, bool s)
{
if (context.ConsumeSkipNextInstruction())
{
return;
}
if ((ArmCondition)cond >= ArmCondition.Al || s)
{
MovR(context, rd, rm, sType, imm5, s);
return;
}
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
if (InstEmitAlu.CanShift(sType, imm5))
{
if (imm5 != 0)
{
switch ((ArmShiftType)sType)
{
case ArmShiftType.Lsl:
context.Arm64Assembler.Lsl(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)imm5));
break;
case ArmShiftType.Lsr:
context.Arm64Assembler.Lsr(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)imm5));
break;
case ArmShiftType.Asr:
context.Arm64Assembler.Asr(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)imm5));
break;
case ArmShiftType.Ror:
context.Arm64Assembler.Ror(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)imm5));
break;
}
context.Arm64Assembler.Csel(rdOperand, tempRegister.Operand, rdOperand, (ArmCondition)cond);
}
else
{
Operand other = rdOperand;
InstInfo nextInstruction = context.PeekNextInstruction();
if (nextInstruction.Name == InstName.MovR)
{
// If this instruction is followed by another move with the inverse condition,
// we can just put it into the second operand of the CSEL instruction and skip the next move.
InstCondb28w4Sb20w1Rdb12w4Imm5b7w5Stypeb5w2Rmb0w4 nextInst = new(nextInstruction.Encoding);
if (nextInst.Rd == rd &&
nextInst.S == 0 &&
nextInst.Stype == 0 &&
nextInst.Imm5 == 0 &&
nextInst.Cond == (cond ^ 1u) &&
nextInst.Rm != RegisterUtils.PcRegister)
{
other = InstEmitCommon.GetInputGpr(context, nextInst.Rm);
context.SetSkipNextInstruction();
}
}
context.Arm64Assembler.Csel(rdOperand, rmOperand, other, (ArmCondition)cond);
}
}
else
{
rmOperand = InstEmitAlu.GetMShiftedByImmediate(context, tempRegister.Operand, rmOperand, imm5, sType, null);
context.Arm64Assembler.Csel(rdOperand, rmOperand, rdOperand, (ArmCondition)cond);
}
}
public static void MovRr(CodeGenContext context, uint rd, uint rm, uint sType, uint rs, bool s)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand rsOperand = InstEmitCommon.GetInputGpr(context, rs);
if (!s)
{
InstEmitAlu.GetMShiftedByReg(context, rdOperand, rmOperand, rsOperand, sType);
}
else
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
rmOperand = InstEmitAlu.GetMShiftedByReg(context, tempRegister.Operand, rmOperand, rsOperand, sType, flagsRegister.Operand);
context.Arm64Assembler.Mov(rdOperand, rmOperand);
context.Arm64Assembler.Tst(rdOperand, rdOperand);
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
context.SetNzcvModified();
}
}
public static void Movt(CodeGenContext context, uint rd, uint imm)
{
Operand rdOperand = InstEmitCommon.GetInputGpr(context, rd);
context.Arm64Assembler.Movk(rdOperand, (int)imm, 1);
}
public static void Pkh(CodeGenContext context, uint rd, uint rn, uint rm, bool tb, uint imm5)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
if (!tb && imm5 == 0)
{
context.Arm64Assembler.Extr(rdOperand, rnOperand, rmOperand, 16);
}
else
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
if (tb)
{
context.Arm64Assembler.Asr(tempRegister.Operand, rmOperand, InstEmitCommon.Const(imm5 == 0 ? 31 : (int)imm5));
context.Arm64Assembler.Extr(rdOperand, tempRegister.Operand, rnOperand, 16);
}
else
{
context.Arm64Assembler.Lsl(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)imm5));
context.Arm64Assembler.Extr(rdOperand, rnOperand, tempRegister.Operand, 16);
}
}
context.Arm64Assembler.Ror(rdOperand, rdOperand, InstEmitCommon.Const(16));
}
}
}

View File

@@ -0,0 +1,603 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using System;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitMultiply
{
public static void Mla(CodeGenContext context, uint rd, uint rn, uint rm, uint ra)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand raOperand = InstEmitCommon.GetInputGpr(context, ra);
context.Arm64Assembler.Madd(rdOperand, rnOperand, rmOperand, raOperand);
}
public static void Mls(CodeGenContext context, uint rd, uint rn, uint rm, uint ra)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand raOperand = InstEmitCommon.GetInputGpr(context, ra);
context.Arm64Assembler.Msub(rdOperand, rnOperand, rmOperand, raOperand);
}
public static void Mul(CodeGenContext context, uint rd, uint rn, uint rm, bool s)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
if (s)
{
using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
context.Arm64Assembler.Mul(rdOperand, rnOperand, rmOperand);
context.Arm64Assembler.Tst(rdOperand, rdOperand);
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
context.SetNzcvModified();
}
else
{
context.Arm64Assembler.Mul(rdOperand, rnOperand, rmOperand);
}
}
public static void Smlabb(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool nHigh, bool mHigh)
{
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempA = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempM64 = new(OperandKind.Register, OperandType.I64, tempM.Operand.Value);
Operand tempA64 = new(OperandKind.Register, OperandType.I64, tempA.Operand.Value);
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand raOperand = InstEmitCommon.GetInputGpr(context, ra);
SelectSignedHalfword(context, tempN.Operand, rnOperand, nHigh);
SelectSignedHalfword(context, tempM.Operand, rmOperand, mHigh);
context.Arm64Assembler.Sxtw(tempA64, raOperand);
context.Arm64Assembler.Smaddl(tempN.Operand, tempN.Operand, tempM.Operand, tempA64);
CheckResultOverflow(context, tempM64, tempN.Operand);
context.Arm64Assembler.Mov(rdOperand, tempN.Operand);
}
public static void Smlad(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool x)
{
EmitSmladSmlsd(context, rd, rn, rm, ra, x, add: true);
}
public static void Smlal(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool s)
{
EmitMultiplyAddLong(context, context.Arm64Assembler.Smaddl, rdLo, rdHi, rn, rm, s);
}
public static void Smlalbb(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool nHigh, bool mHigh)
{
Operand rdLoOperand = InstEmitCommon.GetOutputGpr(context, rdLo);
Operand rdHiOperand = InstEmitCommon.GetOutputGpr(context, rdHi);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand rdLoOperand64 = new(OperandKind.Register, OperandType.I64, rdLoOperand.Value);
Operand rdHiOperand64 = new(OperandKind.Register, OperandType.I64, rdHiOperand.Value);
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempA = context.RegisterAllocator.AllocateTempGprRegisterScoped();
SelectSignedHalfword(context, tempN.Operand, rnOperand, nHigh);
SelectSignedHalfword(context, tempM.Operand, rmOperand, mHigh);
Operand tempA64 = new(OperandKind.Register, OperandType.I64, tempA.Operand.Value);
context.Arm64Assembler.Lsl(tempA64, rdHiOperand64, InstEmitCommon.Const(32));
context.Arm64Assembler.Orr(tempA64, tempA64, rdLoOperand);
context.Arm64Assembler.Smaddl(rdLoOperand64, tempN.Operand, tempM.Operand, tempA64);
if (rdLo != rdHi)
{
context.Arm64Assembler.Lsr(rdHiOperand64, rdLoOperand64, InstEmitCommon.Const(32));
}
context.Arm64Assembler.Mov(rdLoOperand, rdLoOperand); // Zero-extend.
}
public static void Smlald(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool x)
{
EmitSmlaldSmlsld(context, rdLo, rdHi, rn, rm, x, add: true);
}
public static void Smlawb(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool mHigh)
{
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempA = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempN64 = new(OperandKind.Register, OperandType.I64, tempN.Operand.Value);
Operand tempM64 = new(OperandKind.Register, OperandType.I64, tempM.Operand.Value);
Operand tempA64 = new(OperandKind.Register, OperandType.I64, tempA.Operand.Value);
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand raOperand = InstEmitCommon.GetInputGpr(context, ra);
SelectSignedHalfword(context, tempM.Operand, rmOperand, mHigh);
context.Arm64Assembler.Sxtw(tempA64, raOperand);
context.Arm64Assembler.Lsl(tempA64, tempA64, InstEmitCommon.Const(16));
context.Arm64Assembler.Smaddl(tempN.Operand, rnOperand, tempM.Operand, tempA64);
context.Arm64Assembler.Asr(tempN64, tempN64, InstEmitCommon.Const(16));
CheckResultOverflow(context, tempM64, tempN.Operand);
context.Arm64Assembler.Mov(rdOperand, tempN.Operand);
}
public static void Smlsd(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool x)
{
EmitSmladSmlsd(context, rd, rn, rm, ra, x, add: false);
}
public static void Smlsld(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool x)
{
EmitSmlaldSmlsld(context, rdLo, rdHi, rn, rm, x, add: false);
}
public static void Smmla(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool r)
{
EmitSmmlaSmmls(context, rd, rn, rm, ra, r, add: true);
}
public static void Smmls(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool r)
{
EmitSmmlaSmmls(context, rd, rn, rm, ra, r, add: false);
}
public static void Smmul(CodeGenContext context, uint rd, uint rn, uint rm, bool r)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand rdOperand64 = new(OperandKind.Register, OperandType.I64, rdOperand.Value);
context.Arm64Assembler.Smull(rdOperand64, rnOperand, rmOperand);
if (r)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Mov(tempRegister.Operand, 0x80000000u);
context.Arm64Assembler.Add(rdOperand64, rdOperand64, tempRegister.Operand);
}
context.Arm64Assembler.Lsr(rdOperand64, rdOperand64, InstEmitCommon.Const(32));
}
public static void Smuad(CodeGenContext context, uint rd, uint rn, uint rm, bool x)
{
EmitSmuadSmusd(context, rd, rn, rm, x, add: true);
}
public static void Smulbb(CodeGenContext context, uint rd, uint rn, uint rm, bool nHigh, bool mHigh)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand rdOperand64 = new(OperandKind.Register, OperandType.I64, rdOperand.Value);
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
SelectSignedHalfword(context, tempN.Operand, rnOperand, nHigh);
SelectSignedHalfword(context, tempM.Operand, rmOperand, mHigh);
context.Arm64Assembler.Smull(rdOperand64, tempN.Operand, tempM.Operand);
context.Arm64Assembler.Mov(rdOperand, rdOperand); // Zero-extend.
}
public static void Smull(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool s)
{
EmitMultiplyLong(context, context.Arm64Assembler.Smull, rdLo, rdHi, rn, rm, s);
}
public static void Smulwb(CodeGenContext context, uint rd, uint rn, uint rm, bool mHigh)
{
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempN64 = new(OperandKind.Register, OperandType.I64, tempN.Operand.Value);
Operand tempM64 = new(OperandKind.Register, OperandType.I64, tempM.Operand.Value);
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
SelectSignedHalfword(context, tempM.Operand, rmOperand, mHigh);
context.Arm64Assembler.Smull(tempN.Operand, rnOperand, tempM.Operand);
context.Arm64Assembler.Asr(tempN64, tempN64, InstEmitCommon.Const(16));
CheckResultOverflow(context, tempM64, tempN.Operand);
context.Arm64Assembler.Mov(rdOperand, tempN.Operand);
}
public static void Smusd(CodeGenContext context, uint rd, uint rn, uint rm, bool x)
{
EmitSmuadSmusd(context, rd, rn, rm, x, add: false);
}
public static void Umaal(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm)
{
Operand rdLoOperand = InstEmitCommon.GetOutputGpr(context, rdLo);
Operand rdHiOperand = InstEmitCommon.GetOutputGpr(context, rdHi);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand rdLoOperand64 = new(OperandKind.Register, OperandType.I64, rdLoOperand.Value);
Operand rdHiOperand64 = new(OperandKind.Register, OperandType.I64, rdHiOperand.Value);
if (rdLo == rdHi)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempRegister64 = new(OperandKind.Register, OperandType.I64, tempRegister.Operand.Value);
context.Arm64Assembler.Umaddl(tempRegister64, rnOperand, rmOperand, rdLoOperand64);
context.Arm64Assembler.Add(rdLoOperand64, tempRegister64, rdHiOperand64);
}
else
{
context.Arm64Assembler.Umaddl(rdLoOperand64, rnOperand, rmOperand, rdLoOperand64);
context.Arm64Assembler.Add(rdLoOperand64, rdLoOperand64, rdHiOperand64);
}
if (rdLo != rdHi)
{
context.Arm64Assembler.Lsr(rdHiOperand64, rdLoOperand64, InstEmitCommon.Const(32));
}
context.Arm64Assembler.Mov(rdLoOperand, rdLoOperand); // Zero-extend.
}
public static void Umlal(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool s)
{
EmitMultiplyAddLong(context, context.Arm64Assembler.Umaddl, rdLo, rdHi, rn, rm, s);
}
public static void Umull(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool s)
{
EmitMultiplyLong(context, context.Arm64Assembler.Umull, rdLo, rdHi, rn, rm, s);
}
private static void EmitMultiplyLong(CodeGenContext context, Action<Operand, Operand, Operand> action, uint rdLo, uint rdHi, uint rn, uint rm, bool s)
{
Operand rdLoOperand = InstEmitCommon.GetOutputGpr(context, rdLo);
Operand rdHiOperand = InstEmitCommon.GetOutputGpr(context, rdHi);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand rdLoOperand64 = new(OperandKind.Register, OperandType.I64, rdLoOperand.Value);
Operand rdHiOperand64 = new(OperandKind.Register, OperandType.I64, rdHiOperand.Value);
if (s)
{
using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
action(rdLoOperand64, rnOperand, rmOperand);
context.Arm64Assembler.Tst(rdLoOperand64, rdLoOperand64);
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
}
else
{
action(rdLoOperand64, rnOperand, rmOperand);
}
if (rdLo != rdHi)
{
context.Arm64Assembler.Lsr(rdHiOperand64, rdLoOperand64, InstEmitCommon.Const(32));
}
context.Arm64Assembler.Mov(rdLoOperand, rdLoOperand); // Zero-extend.
}
private static void EmitMultiplyAddLong(CodeGenContext context, Action<Operand, Operand, Operand, Operand> action, uint rdLo, uint rdHi, uint rn, uint rm, bool s)
{
Operand rdLoOperand = InstEmitCommon.GetOutputGpr(context, rdLo);
Operand rdHiOperand = InstEmitCommon.GetOutputGpr(context, rdHi);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand rdLoOperand64 = new(OperandKind.Register, OperandType.I64, rdLoOperand.Value);
Operand rdHiOperand64 = new(OperandKind.Register, OperandType.I64, rdHiOperand.Value);
using ScopedRegister raRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand raOperand64 = new(OperandKind.Register, OperandType.I64, raRegister.Operand.Value);
context.Arm64Assembler.Lsl(raOperand64, rdHiOperand64, InstEmitCommon.Const(32));
context.Arm64Assembler.Orr(raOperand64, raOperand64, rdLoOperand);
if (s)
{
using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
action(rdLoOperand64, rnOperand, rmOperand, raOperand64);
context.Arm64Assembler.Tst(rdLoOperand64, rdLoOperand64);
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
context.SetNzcvModified();
}
else
{
action(rdLoOperand64, rnOperand, rmOperand, raOperand64);
}
if (rdLo != rdHi)
{
context.Arm64Assembler.Lsr(rdHiOperand64, rdLoOperand64, InstEmitCommon.Const(32));
}
context.Arm64Assembler.Mov(rdLoOperand, rdLoOperand); // Zero-extend.
}
private static void EmitSmladSmlsd(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool x, bool add)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand raOperand = InstEmitCommon.GetInputGpr(context, ra);
Operand rdOperand64 = new(OperandKind.Register, OperandType.I64, rdOperand.Value);
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempA = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempN64 = new(OperandKind.Register, OperandType.I64, tempN.Operand.Value);
Operand tempM64 = new(OperandKind.Register, OperandType.I64, tempM.Operand.Value);
Operand tempA64 = new(OperandKind.Register, OperandType.I64, tempA.Operand.Value);
ScopedRegister swapTemp = default;
if (x)
{
swapTemp = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Ror(swapTemp.Operand, rmOperand, InstEmitCommon.Const(16));
rmOperand = swapTemp.Operand;
}
context.Arm64Assembler.Sxth(tempN64, rnOperand);
context.Arm64Assembler.Sxth(tempM64, rmOperand);
context.Arm64Assembler.Sxtw(tempA64, raOperand);
context.Arm64Assembler.Mul(rdOperand64, tempN64, tempM64);
context.Arm64Assembler.Asr(tempN.Operand, rnOperand, InstEmitCommon.Const(16));
context.Arm64Assembler.Asr(tempM.Operand, rmOperand, InstEmitCommon.Const(16));
if (add)
{
context.Arm64Assembler.Smaddl(rdOperand64, tempN.Operand, tempM.Operand, rdOperand64);
}
else
{
context.Arm64Assembler.Smsubl(rdOperand64, tempN.Operand, tempM.Operand, rdOperand64);
}
context.Arm64Assembler.Add(rdOperand64, rdOperand64, tempA64);
CheckResultOverflow(context, tempM64, rdOperand64);
context.Arm64Assembler.Mov(rdOperand, rdOperand); // Zero-extend.
if (x)
{
swapTemp.Dispose();
}
}
private static void EmitSmlaldSmlsld(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool x, bool add)
{
Operand rdLoOperand = InstEmitCommon.GetOutputGpr(context, rdLo);
Operand rdHiOperand = InstEmitCommon.GetOutputGpr(context, rdHi);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand rdLoOperand64 = new(OperandKind.Register, OperandType.I64, rdLoOperand.Value);
Operand rdHiOperand64 = new(OperandKind.Register, OperandType.I64, rdHiOperand.Value);
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempA = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempN64 = new(OperandKind.Register, OperandType.I64, tempN.Operand.Value);
Operand tempM64 = new(OperandKind.Register, OperandType.I64, tempM.Operand.Value);
Operand tempA64 = new(OperandKind.Register, OperandType.I64, tempA.Operand.Value);
ScopedRegister swapTemp = default;
if (x)
{
swapTemp = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Ror(swapTemp.Operand, rmOperand, InstEmitCommon.Const(16));
rmOperand = swapTemp.Operand;
}
context.Arm64Assembler.Sxth(tempN64, rnOperand);
context.Arm64Assembler.Sxth(tempM64, rmOperand);
context.Arm64Assembler.Mul(rdLoOperand64, tempN64, tempM64);
context.Arm64Assembler.Asr(tempN.Operand, rnOperand, InstEmitCommon.Const(16));
context.Arm64Assembler.Asr(tempM.Operand, rmOperand, InstEmitCommon.Const(16));
if (add)
{
context.Arm64Assembler.Smaddl(rdLoOperand64, tempN.Operand, tempM.Operand, rdLoOperand64);
}
else
{
context.Arm64Assembler.Smsubl(rdLoOperand64, tempN.Operand, tempM.Operand, rdLoOperand64);
}
context.Arm64Assembler.Lsl(tempA64, rdHiOperand64, InstEmitCommon.Const(32));
context.Arm64Assembler.Orr(tempA64, tempA64, rdLoOperand);
context.Arm64Assembler.Add(rdLoOperand64, rdLoOperand64, tempA64);
if (rdLo != rdHi)
{
context.Arm64Assembler.Lsr(rdHiOperand64, rdLoOperand64, InstEmitCommon.Const(32));
}
context.Arm64Assembler.Mov(rdLoOperand, rdLoOperand); // Zero-extend.
if (x)
{
swapTemp.Dispose();
}
}
private static void EmitSmmlaSmmls(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool r, bool add)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand raOperand = InstEmitCommon.GetInputGpr(context, ra);
Operand rdOperand64 = new(OperandKind.Register, OperandType.I64, rdOperand.Value);
Operand raOperand64 = new(OperandKind.Register, OperandType.I64, raOperand.Value);
using ScopedRegister tempA = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempA64 = new(OperandKind.Register, OperandType.I64, tempA.Operand.Value);
context.Arm64Assembler.Lsl(tempA64, raOperand64, InstEmitCommon.Const(32));
if (add)
{
context.Arm64Assembler.Smaddl(rdOperand64, rnOperand, rmOperand, tempA64);
}
else
{
context.Arm64Assembler.Smsubl(rdOperand64, rnOperand, rmOperand, tempA64);
}
if (r)
{
context.Arm64Assembler.Mov(tempA.Operand, 0x80000000u);
context.Arm64Assembler.Add(rdOperand64, rdOperand64, tempA64);
}
context.Arm64Assembler.Lsr(rdOperand64, rdOperand64, InstEmitCommon.Const(32));
}
private static void EmitSmuadSmusd(CodeGenContext context, uint rd, uint rn, uint rm, bool x, bool add)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand rdOperand64 = new(OperandKind.Register, OperandType.I64, rdOperand.Value);
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempN64 = new(OperandKind.Register, OperandType.I64, tempN.Operand.Value);
Operand tempM64 = new(OperandKind.Register, OperandType.I64, tempM.Operand.Value);
ScopedRegister swapTemp = default;
if (x)
{
swapTemp = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Ror(swapTemp.Operand, rmOperand, InstEmitCommon.Const(16));
rmOperand = swapTemp.Operand;
}
context.Arm64Assembler.Sxth(tempN64, rnOperand);
context.Arm64Assembler.Sxth(tempM64, rmOperand);
context.Arm64Assembler.Mul(rdOperand64, tempN64, tempM64);
context.Arm64Assembler.Asr(tempN.Operand, rnOperand, InstEmitCommon.Const(16));
context.Arm64Assembler.Asr(tempM.Operand, rmOperand, InstEmitCommon.Const(16));
if (add)
{
context.Arm64Assembler.Smaddl(rdOperand64, tempN.Operand, tempM.Operand, rdOperand64);
}
else
{
context.Arm64Assembler.Smsubl(rdOperand64, tempN.Operand, tempM.Operand, rdOperand64);
}
context.Arm64Assembler.Mov(rdOperand, rdOperand); // Zero-extend.
if (x)
{
swapTemp.Dispose();
}
}
private static void SelectSignedHalfword(CodeGenContext context, Operand dest, Operand source, bool high)
{
if (high)
{
context.Arm64Assembler.Asr(dest, source, InstEmitCommon.Const(16));
}
else
{
context.Arm64Assembler.Sxth(dest, source);
}
}
private static void CheckResultOverflow(CodeGenContext context, Operand temp64, Operand result)
{
context.Arm64Assembler.Sxtw(temp64, result);
context.Arm64Assembler.Sub(temp64, temp64, result);
int branchIndex = context.CodeWriter.InstructionPointer;
context.Arm64Assembler.Cbz(temp64, 0);
// Set Q flag if we had an overflow.
InstEmitSaturate.SetQFlag(context);
int delta = context.CodeWriter.InstructionPointer - branchIndex;
context.CodeWriter.WriteInstructionAt(branchIndex, context.CodeWriter.ReadInstructionAt(branchIndex) | (uint)((delta & 0x7ffff) << 5));
}
}
}

View File

@@ -0,0 +1,344 @@
using System;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonArithmetic
{
public static void Vaba(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Uaba : context.Arm64Assembler.Saba, null);
}
public static void Vabal(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, u ? context.Arm64Assembler.Uabal : context.Arm64Assembler.Sabal);
}
public static void VabdF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FabdV, context.Arm64Assembler.FabdVH);
}
public static void VabdI(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Uabd : context.Arm64Assembler.Sabd, null);
}
public static void Vabdl(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, u ? context.Arm64Assembler.Uabdl : context.Arm64Assembler.Sabdl);
}
public static void Vabs(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FabsSingleAndDouble, context.Arm64Assembler.FabsHalf);
}
else
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.AbsV);
}
}
public static void VaddF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FaddSingleAndDouble, context.Arm64Assembler.FaddHalf);
}
public static void VaddI(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.AddV, context.Arm64Assembler.AddS);
}
public static void Vaddhn(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryNarrow(context, rd, rn, rm, size, context.Arm64Assembler.Addhn);
}
public static void Vaddl(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, u ? context.Arm64Assembler.Uaddl : context.Arm64Assembler.Saddl);
}
public static void Vaddw(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryWide(context, rd, rn, rm, size, u ? context.Arm64Assembler.Uaddw : context.Arm64Assembler.Saddw);
}
public static void VfmaF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRdF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FmlaVecSingleAndDouble, context.Arm64Assembler.FmlaVecHalf);
}
public static void VfmsF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRdF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FmlsVecSingleAndDouble, context.Arm64Assembler.FmlsVecHalf);
}
public static void Vhadd(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Uhadd : context.Arm64Assembler.Shadd, null);
}
public static void Vhsub(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Uhsub : context.Arm64Assembler.Shsub, null);
}
public static void Vmaxnm(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FmaxnmSingleAndDouble, context.Arm64Assembler.FmaxnmHalf);
}
public static void VmaxF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FmaxSingleAndDouble, context.Arm64Assembler.FmaxHalf);
}
public static void VmaxI(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Umax : context.Arm64Assembler.Smax, null);
}
public static void Vminnm(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FminnmSingleAndDouble, context.Arm64Assembler.FminnmHalf);
}
public static void VminF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FminSingleAndDouble, context.Arm64Assembler.FminHalf);
}
public static void VminI(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Umin : context.Arm64Assembler.Smin, null);
}
public static void VmlaF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryMulNegRdF(context, rd, rn, rm, sz, q, negProduct: false);
}
public static void VmlaI(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRd(context, rd, rn, rm, size, q, context.Arm64Assembler.MlaVec);
}
public static void VmlaS(CodeGenContext context, uint rd, uint rn, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorTernaryMulNegRdByScalarAnyF(context, rd, rn, rm, size, q, negProduct: false);
}
else
{
InstEmitNeonCommon.EmitVectorTernaryRdByScalar(context, rd, rn, rm, size, q, context.Arm64Assembler.MlaElt);
}
}
public static void VmlalI(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorTernaryRdLong(context, rd, rn, rm, size, u ? context.Arm64Assembler.UmlalVec : context.Arm64Assembler.SmlalVec);
}
public static void VmlalS(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorTernaryRdLongByScalar(context, rd, rn, rm, size, u ? context.Arm64Assembler.UmlalElt : context.Arm64Assembler.SmlalElt);
}
public static void VmlsF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryMulNegRdF(context, rd, rn, rm, sz, q, negProduct: true);
}
public static void VmlsI(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRd(context, rd, rn, rm, size, q, context.Arm64Assembler.MlsVec);
}
public static void VmlsS(CodeGenContext context, uint rd, uint rn, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorTernaryMulNegRdByScalarAnyF(context, rd, rn, rm, size, q, negProduct: true);
}
else
{
InstEmitNeonCommon.EmitVectorTernaryRdByScalar(context, rd, rn, rm, size, q, context.Arm64Assembler.MlsElt);
}
}
public static void VmlslI(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorTernaryRdLong(context, rd, rn, rm, size, u ? context.Arm64Assembler.UmlslVec : context.Arm64Assembler.SmlslVec);
}
public static void VmlslS(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorTernaryRdLongByScalar(context, rd, rn, rm, size, u ? context.Arm64Assembler.UmlslElt : context.Arm64Assembler.SmlslElt);
}
public static void VmulF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FmulVecSingleAndDouble, context.Arm64Assembler.FmulVecHalf);
}
public static void VmulI(CodeGenContext context, uint rd, uint rn, uint rm, bool op, uint size, uint q)
{
if (op)
{
// TODO: Feature check, emulation if not supported.
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.Pmul, null);
}
else
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.MulVec, null);
}
}
public static void VmulS(CodeGenContext context, uint rd, uint rn, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorBinaryByScalarAnyF(context, rd, rn, rm, size, q, context.Arm64Assembler.FmulElt2regElementSingleAndDouble, context.Arm64Assembler.FmulElt2regElementHalf);
}
else
{
InstEmitNeonCommon.EmitVectorBinaryByScalar(context, rd, rn, rm, size, q, context.Arm64Assembler.MulElt);
}
}
public static void VmullI(CodeGenContext context, uint rd, uint rn, uint rm, bool op, bool u, uint size)
{
if (op)
{
// TODO: Feature check, emulation if not supported.
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size == 2 ? 3 : size, context.Arm64Assembler.Pmull);
}
else
{
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, u ? context.Arm64Assembler.UmullVec : context.Arm64Assembler.SmullVec);
}
}
public static void VmullS(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLongByScalar(context, rd, rn, rm, size, u ? context.Arm64Assembler.UmullElt : context.Arm64Assembler.SmullElt);
}
public static void Vneg(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FnegSingleAndDouble, context.Arm64Assembler.FnegHalf);
}
else
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.NegV);
}
}
public static void Vpadal(CodeGenContext context, uint rd, uint rm, bool op, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryRd(context, rd, rm, size, q, op ? context.Arm64Assembler.Uadalp : context.Arm64Assembler.Sadalp);
}
public static void VpaddF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FaddpVecSingleAndDouble, context.Arm64Assembler.FaddpVecHalf);
}
public static void VpaddI(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.AddpVec, null);
}
public static void Vpaddl(CodeGenContext context, uint rd, uint rm, bool op, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, op ? context.Arm64Assembler.Uaddlp : context.Arm64Assembler.Saddlp);
}
public static void VpmaxF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FmaxpVecSingleAndDouble, context.Arm64Assembler.FmaxpVecHalf);
}
public static void VpmaxI(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Umaxp : context.Arm64Assembler.Smaxp, null);
}
public static void VpminF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FminpVecSingleAndDouble, context.Arm64Assembler.FminpVecHalf);
}
public static void VpminI(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Uminp : context.Arm64Assembler.Sminp, null);
}
public static void Vrecpe(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrecpeV, context.Arm64Assembler.FrecpeVH);
}
else
{
throw new NotImplementedException();
}
}
public static void Vrecps(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FrecpsV, context.Arm64Assembler.FrecpsVH);
}
public static void Vrsqrte(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrsqrteV, context.Arm64Assembler.FrsqrteVH);
}
else
{
throw new NotImplementedException();
}
}
public static void Vrsqrts(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FrsqrtsV, context.Arm64Assembler.FrsqrtsVH);
}
public static void VsubF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FsubSingleAndDouble, context.Arm64Assembler.FsubHalf);
}
public static void VsubI(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.SubV, context.Arm64Assembler.SubS);
}
public static void Vsubhn(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryNarrow(context, rd, rn, rm, size, context.Arm64Assembler.Subhn);
}
public static void Vsubl(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, u ? context.Arm64Assembler.Usubl : context.Arm64Assembler.Ssubl);
}
public static void Vsubw(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryWide(context, rd, rn, rm, size, u ? context.Arm64Assembler.Usubw : context.Arm64Assembler.Ssubw);
}
}
}

View File

@@ -0,0 +1,35 @@
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonBit
{
public static void Vcls(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.Cls);
}
public static void Vclz(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.Clz);
}
public static void Vcnt(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.Cnt);
}
public static void Vrev16(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.Rev16);
}
public static void Vrev32(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.Rev32);
}
public static void Vrev64(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.Rev64);
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,126 @@
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonCompare
{
public static void Vacge(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FacgeV, context.Arm64Assembler.FacgeVH);
}
public static void Vacgt(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FacgtV, context.Arm64Assembler.FacgtVH);
}
public static void VceqI(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcmeqZeroV, context.Arm64Assembler.FcmeqZeroVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.CmeqZeroV);
}
}
public static void VceqR(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.CmeqRegV, context.Arm64Assembler.CmeqRegS);
}
public static void VceqFR(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FcmeqRegV, context.Arm64Assembler.FcmeqRegVH);
}
public static void VcgeI(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcmgeZeroV, context.Arm64Assembler.FcmgeZeroVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.CmgeZeroV);
}
}
public static void VcgeR(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(
context,
rd,
rn,
rm,
size,
q,
u ? context.Arm64Assembler.CmhsV : context.Arm64Assembler.CmgeRegV,
u ? context.Arm64Assembler.CmhsS : context.Arm64Assembler.CmgeRegS);
}
public static void VcgeFR(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FcmgeRegV, context.Arm64Assembler.FcmgeRegVH);
}
public static void VcgtI(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcmgtZeroV, context.Arm64Assembler.FcmgtZeroVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.CmgtZeroV);
}
}
public static void VcgtR(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(
context,
rd,
rn,
rm,
size,
q,
u ? context.Arm64Assembler.CmhiV : context.Arm64Assembler.CmgtRegV,
u ? context.Arm64Assembler.CmhiS : context.Arm64Assembler.CmgtRegS);
}
public static void VcgtFR(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FcmgtRegV, context.Arm64Assembler.FcmgtRegVH);
}
public static void VcleI(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcmleV, context.Arm64Assembler.FcmleVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.CmleV);
}
}
public static void VcltI(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcmltV, context.Arm64Assembler.FcmltVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.CmltV);
}
}
public static void Vtst(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.CmtstV, context.Arm64Assembler.CmtstS);
}
}
}

View File

@@ -0,0 +1,137 @@
using System;
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonConvert
{
public static void Vcvta(CodeGenContext context, uint rd, uint rm, bool op, uint size, uint q)
{
if (op)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtauV, context.Arm64Assembler.FcvtauVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtasV, context.Arm64Assembler.FcvtasVH);
}
}
public static void Vcvtm(CodeGenContext context, uint rd, uint rm, bool op, uint size, uint q)
{
if (op)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtmuV, context.Arm64Assembler.FcvtmuVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtmsV, context.Arm64Assembler.FcvtmsVH);
}
}
public static void Vcvtn(CodeGenContext context, uint rd, uint rm, bool op, uint size, uint q)
{
if (op)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtnuV, context.Arm64Assembler.FcvtnuVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtnsV, context.Arm64Assembler.FcvtnsVH);
}
}
public static void Vcvtp(CodeGenContext context, uint rd, uint rm, bool op, uint size, uint q)
{
if (op)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtpuV, context.Arm64Assembler.FcvtpuVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtpsV, context.Arm64Assembler.FcvtpsVH);
}
}
public static void VcvtHs(CodeGenContext context, uint rd, uint rm, bool op)
{
bool halfToSingle = op;
if (halfToSingle)
{
// Half to single.
InstEmitNeonCommon.EmitVectorUnaryLong(context, rd, rm, 0, context.Arm64Assembler.Fcvtl);
}
else
{
// Single to half.
InstEmitNeonCommon.EmitVectorUnaryNarrow(context, rd, rm, 0, context.Arm64Assembler.Fcvtn);
}
}
public static void VcvtIs(CodeGenContext context, uint rd, uint rm, uint op, uint size, uint q)
{
Debug.Assert(op >> 2 == 0);
bool unsigned = (op & 1) != 0;
bool toInteger = (op >> 1) != 0;
if (toInteger)
{
if (unsigned)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtzuIntV, context.Arm64Assembler.FcvtzuIntVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtzsIntV, context.Arm64Assembler.FcvtzsIntVH);
}
}
else
{
if (unsigned)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.UcvtfIntV, context.Arm64Assembler.UcvtfIntVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.ScvtfIntV, context.Arm64Assembler.ScvtfIntVH);
}
}
}
public static void VcvtXs(CodeGenContext context, uint rd, uint rm, uint imm6, uint op, bool u, uint q)
{
Debug.Assert(op >> 2 == 0);
bool unsigned = u;
bool toFixed = (op & 1) != 0;
uint size = 1 + (op >> 1);
uint fbits = Math.Clamp(64u - imm6, 1, 8u << (int)size);
if (toFixed)
{
if (unsigned)
{
InstEmitNeonCommon.EmitVectorUnaryFixedAnyF(context, rd, rm, fbits, size, q, context.Arm64Assembler.FcvtzuFixV);
}
else
{
InstEmitNeonCommon.EmitVectorUnaryFixedAnyF(context, rd, rm, fbits, size, q, context.Arm64Assembler.FcvtzsFixV);
}
}
else
{
if (unsigned)
{
InstEmitNeonCommon.EmitVectorUnaryFixedAnyF(context, rd, rm, fbits, size, q, context.Arm64Assembler.UcvtfFixV);
}
else
{
InstEmitNeonCommon.EmitVectorUnaryFixedAnyF(context, rd, rm, fbits, size, q, context.Arm64Assembler.ScvtfFixV);
}
}
}
}
}

View File

@@ -0,0 +1,43 @@
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonCrypto
{
public static void Aesd(CodeGenContext context, uint rd, uint rm, uint size)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(size == 0);
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, context.Arm64Assembler.Aesd);
}
public static void Aese(CodeGenContext context, uint rd, uint rm, uint size)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(size == 0);
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, context.Arm64Assembler.Aese);
}
public static void Aesimc(CodeGenContext context, uint rd, uint rm, uint size)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(size == 0);
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, context.Arm64Assembler.Aesimc);
}
public static void Aesmc(CodeGenContext context, uint rd, uint rm, uint size)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(size == 0);
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, context.Arm64Assembler.Aesmc);
}
}
}

View File

@@ -0,0 +1,97 @@
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonHash
{
public static void Sha1c(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(q == 1);
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, context.Arm64Assembler.Sha1c);
}
public static void Sha1h(CodeGenContext context, uint rd, uint rm, uint size)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(size == 2);
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, context.Arm64Assembler.Sha1h);
}
public static void Sha1m(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(q == 1);
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, context.Arm64Assembler.Sha1m);
}
public static void Sha1p(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(q == 1);
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, context.Arm64Assembler.Sha1p);
}
public static void Sha1su0(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(q == 1);
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, context.Arm64Assembler.Sha1su0);
}
public static void Sha1su1(CodeGenContext context, uint rd, uint rm, uint size)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(size == 2);
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, context.Arm64Assembler.Sha1su1);
}
public static void Sha256h(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(q == 1);
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, context.Arm64Assembler.Sha256h);
}
public static void Sha256h2(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(q == 1);
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, context.Arm64Assembler.Sha256h2);
}
public static void Sha256su0(CodeGenContext context, uint rd, uint rm, uint size)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(size == 2);
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, context.Arm64Assembler.Sha256su0);
}
public static void Sha256su1(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(q == 1);
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, context.Arm64Assembler.Sha256su1);
}
}
}

View File

@@ -0,0 +1,79 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonLogical
{
public static void VandR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, q, context.Arm64Assembler.And);
}
public static void VbicI(CodeGenContext context, uint rd, uint cmode, uint imm8, uint q)
{
EmitMovi(context, rd, cmode, imm8, 1, q);
}
public static void VbicR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, q, context.Arm64Assembler.BicReg);
}
public static void VbifR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRd(context, rd, rn, rm, q, context.Arm64Assembler.Bif);
}
public static void VbitR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRd(context, rd, rn, rm, q, context.Arm64Assembler.Bit);
}
public static void VbslR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRd(context, rd, rn, rm, q, context.Arm64Assembler.Bsl);
}
public static void VeorR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, q, context.Arm64Assembler.Eor);
}
public static void VornR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, q, context.Arm64Assembler.Orn);
}
public static void VorrI(CodeGenContext context, uint rd, uint cmode, uint imm8, uint q)
{
EmitMovi(context, rd, cmode, imm8, 0, q);
}
public static void VorrR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, q, context.Arm64Assembler.OrrReg);
}
private static void EmitMovi(CodeGenContext context, uint rd, uint cmode, uint imm8, uint op, uint q)
{
(uint a, uint b, uint c, uint d, uint e, uint f, uint g, uint h) = InstEmitNeonMove.Split(imm8);
if (q == 0)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
InstEmitNeonCommon.MoveScalarToSide(context, tempRegister.Operand, rd, false);
context.Arm64Assembler.Movi(tempRegister.Operand, h, g, f, e, d, cmode, c, b, a, op, q);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
}
else
{
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
context.Arm64Assembler.Movi(rdOperand, h, g, f, e, d, cmode, c, b, a, op, q);
}
}
}
}

View File

@@ -0,0 +1,797 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System;
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonMemory
{
public static void Vld11(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
{
uint index = indexAlign >> ((int)size + 1);
EmitMemory1234InstructionCore(context, rn, rm, 1 << (int)size, (address) =>
{
EmitMemoryLoad1234SingleInstruction(context, address, rd, index, size, 1, 1, context.Arm64Assembler.Ld1SnglAsNoPostIndex);
});
}
public static void Vld1A(CodeGenContext context, uint rd, uint rn, uint rm, uint a, uint t, uint size)
{
EmitMemory1234InstructionCore(context, rn, rm, 1 << (int)size, (address) =>
{
EmitMemoryLoad1SingleReplicateInstruction(context, address, rd, size, t + 1, 1, context.Arm64Assembler.Ld1rAsNoPostIndex);
});
}
public static void Vld1M(CodeGenContext context, uint rd, uint rn, uint rm, uint registersCount, uint align, uint size)
{
EmitMemory1234InstructionCore(context, rn, rm, 8 * (int)registersCount, (address) =>
{
EmitMemoryLoad1234MultipleInstruction(context, address, rd, size, registersCount, 1, context.Arm64Assembler.Ld1MultAsNoPostIndex);
});
}
public static void Vld21(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
{
uint index = indexAlign >> ((int)size + 1);
uint step = size > 0 && (indexAlign & (1u << (int)size)) != 0 ? 2u : 1u;
EmitMemory1234InstructionCore(context, rn, rm, 2 * (1 << (int)size), (address) =>
{
EmitMemoryLoad1234SingleInstruction(context, address, rd, index, size, 2, step, context.Arm64Assembler.Ld2SnglAsNoPostIndex);
});
}
public static void Vld2A(CodeGenContext context, uint rd, uint rn, uint rm, uint a, uint t, uint size)
{
EmitMemory1234InstructionCore(context, rn, rm, 2 * (1 << (int)size), (address) =>
{
EmitMemoryLoad234SingleReplicateInstruction(context, address, rd, size, 2, t + 1, context.Arm64Assembler.Ld2rAsNoPostIndex);
});
}
public static void Vld2M(CodeGenContext context, uint rd, uint rn, uint rm, uint type, uint align, uint size)
{
uint step = (type & 1) + 1;
EmitMemory1234InstructionCore(context, rn, rm, 16, (address) =>
{
EmitMemoryLoad1234MultipleInstruction(context, address, rd, size, 2, step, context.Arm64Assembler.Ld2MultAsNoPostIndex);
});
}
public static void Vld2M(CodeGenContext context, uint rd, uint rn, uint rm, uint align, uint size)
{
EmitMemory1234InstructionCore(context, rn, rm, 32, (address) =>
{
EmitMemoryLoad1234Multiple2x2Instruction(context, address, rd, size, context.Arm64Assembler.Ld2MultAsNoPostIndex);
});
}
public static void Vld31(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
{
uint index = indexAlign >> ((int)size + 1);
uint step = size > 0 && (indexAlign & (1u << (int)size)) != 0 ? 2u : 1u;
EmitMemory1234InstructionCore(context, rn, rm, 3 * (1 << (int)size), (address) =>
{
EmitMemoryLoad1234SingleInstruction(context, address, rd, index, size, 3, step, context.Arm64Assembler.Ld3SnglAsNoPostIndex);
});
}
public static void Vld3A(CodeGenContext context, uint rd, uint rn, uint rm, uint a, uint t, uint size)
{
EmitMemory1234InstructionCore(context, rn, rm, 3 * (1 << (int)size), (address) =>
{
EmitMemoryLoad234SingleReplicateInstruction(context, address, rd, size, 3, t + 1, context.Arm64Assembler.Ld3rAsNoPostIndex);
});
}
public static void Vld3M(CodeGenContext context, uint rd, uint rn, uint rm, uint type, uint align, uint size)
{
uint step = (type & 1) + 1;
EmitMemory1234InstructionCore(context, rn, rm, 24, (address) =>
{
EmitMemoryLoad1234MultipleInstruction(context, address, rd, size, 3, step, context.Arm64Assembler.Ld3MultAsNoPostIndex);
});
}
public static void Vld41(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
{
uint index = indexAlign >> ((int)size + 1);
uint step = size > 0 && (indexAlign & (1u << (int)size)) != 0 ? 2u : 1u;
EmitMemory1234InstructionCore(context, rn, rm, 4 * (1 << (int)size), (address) =>
{
EmitMemoryLoad1234SingleInstruction(context, address, rd, index, size, 4, step, context.Arm64Assembler.Ld4SnglAsNoPostIndex);
});
}
public static void Vld4A(CodeGenContext context, uint rd, uint rn, uint rm, uint a, uint t, uint size)
{
EmitMemory1234InstructionCore(context, rn, rm, 4 * (1 << (int)size), (address) =>
{
EmitMemoryLoad234SingleReplicateInstruction(context, address, rd, size, 4, t + 1, context.Arm64Assembler.Ld4rAsNoPostIndex);
});
}
public static void Vld4M(CodeGenContext context, uint rd, uint rn, uint rm, uint type, uint align, uint size)
{
uint step = (type & 1) + 1;
EmitMemory1234InstructionCore(context, rn, rm, 32, (address) =>
{
EmitMemoryLoad1234MultipleInstruction(context, address, rd, size, 4, step, context.Arm64Assembler.Ld4MultAsNoPostIndex);
});
}
public static void Vldm(CodeGenContext context, uint rd, uint rn, uint registerCount, bool u, bool w, bool singleRegs)
{
EmitMemoryMultipleInstruction(context, rd, rn, registerCount, u, w, singleRegs, isStore: false);
}
public static void Vldr(CodeGenContext context, uint rd, uint rn, uint imm8, bool u, uint size)
{
EmitMemoryInstruction(context, rd, rn, imm8, u, size, isStore: false);
}
public static void Vst11(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
{
uint index = indexAlign >> ((int)size + 1);
EmitMemory1234InstructionCore(context, rn, rm, 1 << (int)size, (address) =>
{
EmitMemoryStore1234SingleInstruction(context, address, rd, index, size, 1, 1, context.Arm64Assembler.St1SnglAsNoPostIndex);
});
}
public static void Vst1M(CodeGenContext context, uint rd, uint rn, uint rm, uint registersCount, uint align, uint size)
{
EmitMemory1234InstructionCore(context, rn, rm, 8 * (int)registersCount, (address) =>
{
EmitMemoryStore1234MultipleInstruction(context, address, rd, size, registersCount, 1, context.Arm64Assembler.St1MultAsNoPostIndex);
});
}
public static void Vst21(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
{
uint index = indexAlign >> ((int)size + 1);
uint step = size > 0 && (indexAlign & (1u << (int)size)) != 0 ? 2u : 1u;
EmitMemory1234InstructionCore(context, rn, rm, 2 * (1 << (int)size), (address) =>
{
EmitMemoryStore1234SingleInstruction(context, address, rd, index, size, 2, step, context.Arm64Assembler.St2SnglAsNoPostIndex);
});
}
public static void Vst2M(CodeGenContext context, uint rd, uint rn, uint rm, uint type, uint align, uint size)
{
uint step = (type & 1) + 1;
EmitMemory1234InstructionCore(context, rn, rm, 16, (address) =>
{
EmitMemoryStore1234MultipleInstruction(context, address, rd, size, 2, step, context.Arm64Assembler.St2MultAsNoPostIndex);
});
}
public static void Vst2M(CodeGenContext context, uint rd, uint rn, uint rm, uint align, uint size)
{
EmitMemory1234InstructionCore(context, rn, rm, 32, (address) =>
{
EmitMemoryStore1234Multiple2x2Instruction(context, address, rd, size, context.Arm64Assembler.St2MultAsNoPostIndex);
});
}
public static void Vst31(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
{
uint index = indexAlign >> ((int)size + 1);
uint step = size > 0 && (indexAlign & (1u << (int)size)) != 0 ? 2u : 1u;
EmitMemory1234InstructionCore(context, rn, rm, 3 * (1 << (int)size), (address) =>
{
EmitMemoryStore1234SingleInstruction(context, address, rd, index, size, 3, step, context.Arm64Assembler.St3SnglAsNoPostIndex);
});
}
public static void Vst3M(CodeGenContext context, uint rd, uint rn, uint rm, uint type, uint align, uint size)
{
uint step = (type & 1) + 1;
EmitMemory1234InstructionCore(context, rn, rm, 24, (address) =>
{
EmitMemoryStore1234MultipleInstruction(context, address, rd, size, 3, step, context.Arm64Assembler.St3MultAsNoPostIndex);
});
}
public static void Vst41(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
{
uint index = indexAlign >> ((int)size + 1);
uint step = size > 0 && (indexAlign & (1u << (int)size)) != 0 ? 2u : 1u;
EmitMemory1234InstructionCore(context, rn, rm, 4 * (1 << (int)size), (address) =>
{
EmitMemoryStore1234SingleInstruction(context, address, rd, index, size, 4, step, context.Arm64Assembler.St4SnglAsNoPostIndex);
});
}
public static void Vst4M(CodeGenContext context, uint rd, uint rn, uint rm, uint type, uint align, uint size)
{
uint step = (type & 1) + 1;
EmitMemory1234InstructionCore(context, rn, rm, 32, (address) =>
{
EmitMemoryStore1234MultipleInstruction(context, address, rd, size, 4, step, context.Arm64Assembler.St4MultAsNoPostIndex);
});
}
public static void Vstm(CodeGenContext context, uint rd, uint rn, uint registerCount, bool u, bool w, bool singleRegs)
{
EmitMemoryMultipleInstruction(context, rd, rn, registerCount, u, w, singleRegs, isStore: true);
}
public static void Vstr(CodeGenContext context, uint rd, uint rn, uint imm8, bool u, uint size)
{
EmitMemoryInstruction(context, rd, rn, imm8, u, size, isStore: true);
}
private static void EmitMemoryMultipleInstruction(
CodeGenContext context,
uint rd,
uint rn,
uint registerCount,
bool add,
bool wBack,
bool singleRegs,
bool isStore)
{
Operand baseAddress = InstEmitCommon.GetInputGpr(context, rn);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand offset = InstEmitCommon.Const((int)registerCount * (singleRegs ? 4 : 8));
if (!add)
{
if (wBack)
{
InstEmitMemory.WriteAddShiftOffset(context.Arm64Assembler, baseAddress, baseAddress, offset, false, ArmShiftType.Lsl, 0);
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, context.Arm64Assembler, tempRegister.Operand, baseAddress);
}
else
{
InstEmitMemory.WriteAddShiftOffset(context.Arm64Assembler, tempRegister.Operand, baseAddress, offset, false, ArmShiftType.Lsl, 0);
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, context.Arm64Assembler, tempRegister.Operand, tempRegister.Operand);
}
}
else
{
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, context.Arm64Assembler, tempRegister.Operand, baseAddress);
}
EmitMemoryMultipleInstructionCore(context, tempRegister.Operand, rd, registerCount, singleRegs, isStore);
if (add && wBack)
{
context.Arm64Assembler.Add(baseAddress, baseAddress, offset);
}
}
private static void EmitMemoryMultipleInstructionCore(CodeGenContext context, Operand baseAddress, uint rd, uint registerCount, bool singleRegs, bool isStore)
{
int offs = 0;
uint r = rd;
uint upperBound = Math.Min(rd + registerCount, 32u);
uint regMask = singleRegs ? 3u : 1u;
// Read/write misaligned elements first.
for (; (r & regMask) != 0 && r < upperBound; r++)
{
EmitMemoryInstruction(context, baseAddress, r, offs, singleRegs, isStore);
offs += singleRegs ? 4 : 8;
}
// Read/write aligned, full vectors.
while (upperBound - r >= (singleRegs ? 4 : 2))
{
int qIndex = (int)(r >> (singleRegs ? 2 : 1));
Operand rtOperand = context.RegisterAllocator.RemapSimdRegister(qIndex);
if (upperBound - r >= (singleRegs ? 8 : 4) && (offs & 0xf) == 0)
{
Operand rt2Operand = context.RegisterAllocator.RemapSimdRegister(qIndex + 1);
if (isStore)
{
context.Arm64Assembler.StpRiUn(rtOperand, rt2Operand, baseAddress, offs);
}
else
{
context.Arm64Assembler.LdpRiUn(rtOperand, rt2Operand, baseAddress, offs);
}
r += singleRegs ? 8u : 4u;
offs += 32;
}
else
{
if ((offs & 0xf) == 0)
{
if (isStore)
{
context.Arm64Assembler.StrRiUn(rtOperand, baseAddress, offs);
}
else
{
context.Arm64Assembler.LdrRiUn(rtOperand, baseAddress, offs);
}
}
else
{
if (isStore)
{
context.Arm64Assembler.Stur(rtOperand, baseAddress, offs);
}
else
{
context.Arm64Assembler.Ldur(rtOperand, baseAddress, offs);
}
}
r += singleRegs ? 4u : 2u;
offs += 16;
}
}
// Read/write last misaligned elements.
for (; r < upperBound; r++)
{
EmitMemoryInstruction(context, baseAddress, r, offs, singleRegs, isStore);
offs += singleRegs ? 4 : 8;
}
}
private static void EmitMemoryInstruction(CodeGenContext context, Operand baseAddress, uint r, int offs, bool singleRegs, bool isStore)
{
if (isStore)
{
using ScopedRegister tempRegister = InstEmitNeonCommon.MoveScalarToSide(context, r, singleRegs);
context.Arm64Assembler.StrRiUn(tempRegister.Operand, baseAddress, offs);
}
else
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempFpRegisterScoped(singleRegs);
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, baseAddress, offs);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, r, singleRegs);
}
}
private static void EmitMemoryInstruction(CodeGenContext context, uint rd, uint rn, uint imm8, bool add, uint size, bool isStore)
{
bool singleRegs = size != 3;
int offs = (int)imm8;
if (size == 1)
{
offs <<= 1;
}
else
{
offs <<= 2;
}
using ScopedRegister address = context.RegisterAllocator.AllocateTempGprRegisterScoped();
if (rn == RegisterUtils.PcRegister)
{
if (!add)
{
offs = -offs;
}
context.Arm64Assembler.Mov(address.Operand, (context.Pc & ~3u) + (uint)offs);
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, context.Arm64Assembler, address.Operand, address.Operand);
offs = 0;
}
else
{
Operand rnOperand = context.RegisterAllocator.RemapGprRegister((int)rn);
if (InstEmitMemory.CanFoldOffset(context.MemoryManagerType, add ? offs : -offs, (int)size, true, out _))
{
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, context.Arm64Assembler, address.Operand, rnOperand);
if (!add)
{
offs = -offs;
}
}
else
{
InstEmitMemory.WriteAddShiftOffset(context.Arm64Assembler, address.Operand, rnOperand, InstEmitCommon.Const(offs), add, ArmShiftType.Lsl, 0);
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, context.Arm64Assembler, address.Operand, address.Operand);
offs = 0;
}
}
if ((size == 3 && (offs & 7) != 0) || offs < 0)
{
if (isStore)
{
using ScopedRegister tempRegister = InstEmitNeonCommon.MoveScalarToSide(context, rd, singleRegs);
context.Arm64Assembler.Stur(tempRegister.Operand, address.Operand, offs, size);
}
else
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempFpRegisterScoped(singleRegs);
context.Arm64Assembler.Ldur(tempRegister.Operand, address.Operand, offs, size);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, singleRegs);
}
}
else
{
if (isStore)
{
using ScopedRegister tempRegister = InstEmitNeonCommon.MoveScalarToSide(context, rd, singleRegs);
context.Arm64Assembler.StrRiUn(tempRegister.Operand, address.Operand, offs, size);
}
else
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempFpRegisterScoped(singleRegs);
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, address.Operand, offs, size);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, singleRegs);
}
}
}
private static void EmitMemory1234InstructionCore(CodeGenContext context, uint rn, uint rm, int bytes, Action<Operand> callback)
{
bool wBack = rm != RegisterUtils.PcRegister;
bool registerIndex = rm != RegisterUtils.PcRegister && rm != RegisterUtils.SpRegister;
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
using ScopedRegister address = context.RegisterAllocator.AllocateTempGprRegisterScoped();
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, context.Arm64Assembler, address.Operand, rnOperand);
callback(address.Operand);
if (wBack)
{
if (registerIndex)
{
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
context.Arm64Assembler.Add(rnOperand, rnOperand, rmOperand);
}
else
{
context.Arm64Assembler.Add(rnOperand, rnOperand, InstEmitCommon.Const(bytes));
}
}
}
private static void EmitMemoryLoad1234SingleInstruction(
CodeGenContext context,
Operand baseAddress,
uint rd,
uint index,
uint size,
uint registerCount,
uint step,
Action<Operand, Operand, uint, uint> action)
{
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)registerCount);
MoveDoublewordsToQuadwordsLower(context, rd, registerCount, step, tempRegisters);
action(tempRegisters[0].Operand, baseAddress, index, size);
MoveQuadwordsLowerToDoublewords(context, rd, registerCount, step, tempRegisters);
FreeSequentialRegisters(tempRegisters);
}
private static void EmitMemoryLoad1SingleReplicateInstruction(
CodeGenContext context,
Operand baseAddress,
uint rd,
uint size,
uint registerCount,
uint step,
Action<Operand, Operand, uint, uint> action)
{
if ((rd & 1) == 0 && registerCount == 2)
{
action(context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1)), baseAddress, size, 1);
}
else
{
uint vecsCount = (registerCount + 1) >> 1;
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)vecsCount);
action(tempRegisters[0].Operand, baseAddress, size, registerCount > 1 ? 1u : 0u);
MoveQuadwordsToDoublewords(context, rd, registerCount, step, tempRegisters);
FreeSequentialRegisters(tempRegisters);
}
}
private static void EmitMemoryLoad234SingleReplicateInstruction(
CodeGenContext context,
Operand baseAddress,
uint rd,
uint size,
uint registerCount,
uint step,
Action<Operand, Operand, uint, uint> action)
{
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)registerCount);
action(tempRegisters[0].Operand, baseAddress, size, 0u);
MoveQuadwordsLowerToDoublewords(context, rd, registerCount, step, tempRegisters);
FreeSequentialRegisters(tempRegisters);
}
private static void EmitMemoryLoad1234MultipleInstruction(
CodeGenContext context,
Operand baseAddress,
uint rd,
uint size,
uint registerCount,
uint step,
Action<Operand, Operand, uint, uint> action)
{
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)registerCount);
action(tempRegisters[0].Operand, baseAddress, size, 0);
MoveQuadwordsLowerToDoublewords(context, rd, registerCount, step, tempRegisters);
FreeSequentialRegisters(tempRegisters);
}
private static void EmitMemoryLoad1234MultipleInstruction(
CodeGenContext context,
Operand baseAddress,
uint rd,
uint size,
uint registerCount,
uint step,
Action<Operand, Operand, uint, uint, uint> action)
{
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)registerCount);
action(tempRegisters[0].Operand, baseAddress, registerCount, size, 0);
MoveQuadwordsLowerToDoublewords(context, rd, registerCount, step, tempRegisters);
FreeSequentialRegisters(tempRegisters);
}
private static void EmitMemoryLoad1234Multiple2x2Instruction(
CodeGenContext context,
Operand baseAddress,
uint rd,
uint size,
Action<Operand, Operand, uint, uint> action)
{
if ((rd & 1) == 0)
{
action(context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1), 2), baseAddress, size, 1);
}
else
{
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, 2);
action(tempRegisters[0].Operand, baseAddress, size, 1);
MoveQuadwordsToDoublewords2x2(context, rd, tempRegisters);
FreeSequentialRegisters(tempRegisters);
}
}
private static void EmitMemoryStore1234SingleInstruction(
CodeGenContext context,
Operand baseAddress,
uint rd,
uint index,
uint size,
uint registerCount,
uint step,
Action<Operand, Operand, uint, uint> action)
{
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)registerCount);
MoveDoublewordsToQuadwordsLower(context, rd, registerCount, step, tempRegisters);
action(tempRegisters[0].Operand, baseAddress, index, size);
FreeSequentialRegisters(tempRegisters);
}
private static void EmitMemoryStore1234MultipleInstruction(
CodeGenContext context,
Operand baseAddress,
uint rd,
uint size,
uint registerCount,
uint step,
Action<Operand, Operand, uint, uint> action)
{
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)registerCount);
MoveDoublewordsToQuadwordsLower(context, rd, registerCount, step, tempRegisters);
action(tempRegisters[0].Operand, baseAddress, size, 0);
FreeSequentialRegisters(tempRegisters);
}
private static void EmitMemoryStore1234MultipleInstruction(
CodeGenContext context,
Operand baseAddress,
uint rd,
uint size,
uint registerCount,
uint step,
Action<Operand, Operand, uint, uint, uint> action)
{
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)registerCount);
MoveDoublewordsToQuadwordsLower(context, rd, registerCount, step, tempRegisters);
action(tempRegisters[0].Operand, baseAddress, registerCount, size, 0);
FreeSequentialRegisters(tempRegisters);
}
private static void EmitMemoryStore1234Multiple2x2Instruction(
CodeGenContext context,
Operand baseAddress,
uint rd,
uint size,
Action<Operand, Operand, uint, uint> action)
{
if ((rd & 1) == 0)
{
action(context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1), 2), baseAddress, size, 1);
}
else
{
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, 2);
MoveDoublewordsToQuadwords2x2(context, rd, tempRegisters);
action(tempRegisters[0].Operand, baseAddress, size, 1);
FreeSequentialRegisters(tempRegisters);
}
}
private static ScopedRegister[] AllocateSequentialRegisters(CodeGenContext context, int count)
{
ScopedRegister[] registers = new ScopedRegister[count];
for (int index = 0; index < count; index++)
{
registers[index] = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
}
AssertSequentialRegisters(registers);
return registers;
}
private static void FreeSequentialRegisters(ReadOnlySpan<ScopedRegister> registers)
{
for (int index = 0; index < registers.Length; index++)
{
registers[index].Dispose();
}
}
[Conditional("DEBUG")]
private static void AssertSequentialRegisters(ReadOnlySpan<ScopedRegister> registers)
{
for (int index = 1; index < registers.Length; index++)
{
Debug.Assert(registers[index].Operand.GetRegister().Index == registers[0].Operand.GetRegister().Index + index);
}
}
private static void MoveQuadwordsLowerToDoublewords(CodeGenContext context, uint rd, uint registerCount, uint step, ReadOnlySpan<ScopedRegister> registers)
{
for (int index = 0; index < registerCount; index++)
{
uint r = rd + (uint)index * step;
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(r >> 1));
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(r & 1u, false);
context.Arm64Assembler.InsElt(rdOperand, registers[index].Operand, 0, imm5);
}
}
private static void MoveDoublewordsToQuadwordsLower(CodeGenContext context, uint rd, uint registerCount, uint step, ReadOnlySpan<ScopedRegister> registers)
{
for (int index = 0; index < registerCount; index++)
{
uint r = rd + (uint)index * step;
InstEmitNeonCommon.MoveScalarToSide(context, registers[index].Operand, r, false);
}
}
private static void MoveDoublewordsToQuadwords2x2(CodeGenContext context, uint rd, ReadOnlySpan<ScopedRegister> registers)
{
for (int index = 0; index < 2; index++)
{
uint r = rd + (uint)index * 2;
uint r2 = r + 1;
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(r >> 1));
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(0, false);
context.Arm64Assembler.InsElt(registers[index].Operand, rdOperand, (r & 1u) << 3, imm5);
rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(r2 >> 1));
imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(1, false);
context.Arm64Assembler.InsElt(registers[index].Operand, rdOperand, (r2 & 1u) << 3, imm5);
}
}
private static void MoveQuadwordsToDoublewords(CodeGenContext context, uint rd, uint registerCount, uint step, ReadOnlySpan<ScopedRegister> registers)
{
for (int index = 0; index < registerCount; index++)
{
uint r = rd + (uint)index * step;
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(r >> 1));
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(r & 1u, false);
context.Arm64Assembler.InsElt(rdOperand, registers[index >> 1].Operand, ((uint)index & 1u) << 3, imm5);
}
}
private static void MoveQuadwordsToDoublewords2x2(CodeGenContext context, uint rd, ReadOnlySpan<ScopedRegister> registers)
{
for (int index = 0; index < 2; index++)
{
uint r = rd + (uint)index * 2;
uint r2 = r + 1;
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(r >> 1));
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(r & 1u, false);
context.Arm64Assembler.InsElt(rdOperand, registers[index].Operand, 0, imm5);
rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(r2 >> 1));
imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(r2 & 1u, false);
context.Arm64Assembler.InsElt(rdOperand, registers[index].Operand, 1u << 3, imm5);
}
}
}
}

View File

@@ -0,0 +1,665 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using System;
using System.Diagnostics;
using System.Numerics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonMove
{
public static void VdupR(CodeGenContext context, uint rd, uint rt, uint b, uint e, uint q)
{
uint size = 2 - (e | (b << 1));
Debug.Assert(size < 3);
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(0, size);
if (q == 0)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
context.Arm64Assembler.DupGen(tempRegister.Operand, rtOperand, imm5, q);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
}
else
{
Debug.Assert((rd & 1) == 0);
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
context.Arm64Assembler.DupGen(rdOperand, rtOperand, imm5, q);
}
}
public static void VdupS(CodeGenContext context, uint rd, uint rm, uint imm4, uint q)
{
uint size = (uint)BitOperations.TrailingZeroCount(imm4);
Debug.Assert(size < 3);
uint index = imm4 >> (int)(size + 1);
Operand rmOperand = context.RegisterAllocator.RemapSimdRegister((int)(rm >> 1));
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(index | ((rm & 1) << (int)(3 - size)), size);
if (q == 0)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
context.Arm64Assembler.DupEltVectorFromElement(tempRegister.Operand, rmOperand, imm5, q);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
}
else
{
Debug.Assert((rd & 1) == 0);
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
context.Arm64Assembler.DupEltVectorFromElement(rdOperand, rmOperand, imm5, q);
}
}
public static void Vext(CodeGenContext context, uint rd, uint rn, uint rm, uint imm4, uint q)
{
if (q == 0)
{
using ScopedRegister rnReg = InstEmitNeonCommon.MoveScalarToSide(context, rn, false);
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, false);
using ScopedRegister tempRegister = InstEmitNeonCommon.PickSimdRegister(context.RegisterAllocator, rnReg, rmReg);
context.Arm64Assembler.Ext(tempRegister.Operand, rnReg.Operand, imm4, rmReg.Operand, q);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
}
else
{
Debug.Assert(((rd | rn | rm) & 1) == 0);
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
Operand rnOperand = context.RegisterAllocator.RemapSimdRegister((int)(rn >> 1));
Operand rmOperand = context.RegisterAllocator.RemapSimdRegister((int)(rm >> 1));
context.Arm64Assembler.Ext(rdOperand, rnOperand, imm4, rmOperand, q);
}
}
public static void Vmovl(CodeGenContext context, uint rd, uint rm, bool u, uint imm3h)
{
uint size = (uint)BitOperations.TrailingZeroCount(imm3h);
Debug.Assert(size < 3);
InstEmitNeonCommon.EmitVectorBinaryLongShift(
context,
rd,
rm,
0,
size,
isShl: true,
u ? context.Arm64Assembler.Ushll : context.Arm64Assembler.Sshll);
}
public static void Vmovn(CodeGenContext context, uint rd, uint rm, uint size)
{
Debug.Assert(size < 3);
InstEmitNeonCommon.EmitVectorUnaryNarrow(context, rd, rm, size, context.Arm64Assembler.Xtn);
}
public static void Vmovx(CodeGenContext context, uint rd, uint rm)
{
InstEmitNeonCommon.EmitScalarBinaryShift(context, rd, rm, 16, 2, isShl: false, context.Arm64Assembler.UshrS);
}
public static void VmovD(CodeGenContext context, uint rt, uint rt2, uint rm, bool op)
{
Operand rmReg = context.RegisterAllocator.RemapSimdRegister((int)(rm >> 1));
uint top = rm & 1;
uint ftype = top + 1;
if (op)
{
Operand rtOperand = InstEmitCommon.GetOutputGpr(context, rt);
Operand rt2Operand = InstEmitCommon.GetOutputGpr(context, rt2);
Operand rtOperand64 = new(OperandKind.Register, OperandType.I64, rtOperand.Value);
Operand rt2Operand64 = new(OperandKind.Register, OperandType.I64, rt2Operand.Value);
context.Arm64Assembler.FmovFloatGen(rtOperand64, rmReg, ftype, 1, 0, top);
context.Arm64Assembler.Lsr(rt2Operand64, rtOperand64, InstEmitCommon.Const(32));
context.Arm64Assembler.Mov(rtOperand, rtOperand); // Zero-extend.
}
else
{
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
Operand rt2Operand = InstEmitCommon.GetInputGpr(context, rt2);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempRegister64 = new(OperandKind.Register, OperandType.I64, tempRegister.Operand.Value);
context.Arm64Assembler.Lsl(tempRegister64, rt2Operand, InstEmitCommon.Const(32));
context.Arm64Assembler.Orr(tempRegister64, tempRegister64, rtOperand);
if (top == 0)
{
// Doing FMOV on Rm directly would clear the high bits if we are moving to the bottom.
using ScopedRegister tempRegister2 = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
context.Arm64Assembler.FmovFloatGen(tempRegister2.Operand, tempRegister64, ftype, 1, 1, top);
InstEmitNeonCommon.InsertResult(context, tempRegister2.Operand, rm, false);
}
else
{
context.Arm64Assembler.FmovFloatGen(rmReg, tempRegister64, ftype, 1, 1, top);
}
}
}
public static void VmovH(CodeGenContext context, uint rt, uint rn, bool op)
{
if (op)
{
Operand rtOperand = InstEmitCommon.GetOutputGpr(context, rt);
using ScopedRegister tempRegister = InstEmitNeonCommon.MoveScalarToSide(context, rn, true);
context.Arm64Assembler.FmovFloatGen(rtOperand, tempRegister.Operand, 3, 0, 0, 0);
}
else
{
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
context.Arm64Assembler.FmovFloatGen(tempRegister.Operand, rtOperand, 3, 0, 1, 0);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rn, true);
}
}
public static void VmovI(CodeGenContext context, uint rd, uint op, uint cmode, uint imm8, uint q)
{
(uint a, uint b, uint c, uint d, uint e, uint f, uint g, uint h) = Split(imm8);
if (q == 0)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
context.Arm64Assembler.Movi(tempRegister.Operand, h, g, f, e, d, cmode, c, b, a, op, q);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
}
else
{
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
context.Arm64Assembler.Movi(rdOperand, h, g, f, e, d, cmode, c, b, a, op, q);
}
}
public static void VmovFI(CodeGenContext context, uint rd, uint imm8, uint size)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
context.Arm64Assembler.FmovFloatImm(tempRegister.Operand, imm8, size ^ 2u);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, size != 3);
}
public static void VmovR(CodeGenContext context, uint rd, uint rm, uint size)
{
bool singleRegister = size == 2;
int shift = singleRegister ? 2 : 1;
uint mask = singleRegister ? 3u : 1u;
uint dstElt = rd & mask;
uint srcElt = rm & mask;
uint imm4 = srcElt << (singleRegister ? 2 : 3);
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(dstElt, singleRegister);
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> shift));
Operand rmOperand = context.RegisterAllocator.RemapSimdRegister((int)(rm >> shift));
context.Arm64Assembler.InsElt(rdOperand, rmOperand, imm4, imm5);
}
public static void VmovRs(CodeGenContext context, uint rd, uint rt, uint opc1, uint opc2)
{
uint index;
uint size;
if ((opc1 & 2u) != 0)
{
index = opc2 | ((opc1 & 1u) << 2);
size = 0;
}
else if ((opc2 & 1u) != 0)
{
index = (opc2 >> 1) | ((opc1 & 1u) << 1);
size = 1;
}
else
{
Debug.Assert(opc1 == 0 || opc1 == 1);
Debug.Assert(opc2 == 0);
index = opc1 & 1u;
size = 2;
}
index |= (rd & 1u) << (int)(3 - size);
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
Operand rdReg = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
context.Arm64Assembler.InsGen(rdReg, rtOperand, InstEmitNeonCommon.GetImm5ForElementIndex(index, size));
}
public static void VmovS(CodeGenContext context, uint rt, uint rn, bool op)
{
if (op)
{
Operand rtOperand = InstEmitCommon.GetOutputGpr(context, rt);
using ScopedRegister tempRegister = InstEmitNeonCommon.MoveScalarToSide(context, rn, true);
context.Arm64Assembler.FmovFloatGen(rtOperand, tempRegister.Operand, 0, 0, 0, 0);
}
else
{
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
context.Arm64Assembler.FmovFloatGen(tempRegister.Operand, rtOperand, 0, 0, 1, 0);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rn, true);
}
}
public static void VmovSr(CodeGenContext context, uint rt, uint rn, bool u, uint opc1, uint opc2)
{
uint index;
uint size;
if ((opc1 & 2u) != 0)
{
index = opc2 | ((opc1 & 1u) << 2);
size = 0;
}
else if ((opc2 & 1u) != 0)
{
index = (opc2 >> 1) | ((opc1 & 1u) << 1);
size = 1;
}
else
{
Debug.Assert(opc1 == 0 || opc1 == 1);
Debug.Assert(opc2 == 0);
Debug.Assert(!u);
index = opc1 & 1u;
size = 2;
}
index |= (rn & 1u) << (int)(3 - size);
Operand rtOperand = InstEmitCommon.GetOutputGpr(context, rt);
Operand rnReg = context.RegisterAllocator.RemapSimdRegister((int)(rn >> 1));
if (u || size > 1)
{
context.Arm64Assembler.Umov(rtOperand, rnReg, (int)index, (int)size);
}
else
{
context.Arm64Assembler.Smov(rtOperand, rnReg, (int)index, (int)size);
}
}
public static void VmovSs(CodeGenContext context, uint rt, uint rt2, uint rm, bool op)
{
if ((rm & 1) == 0)
{
// If we are moving an aligned pair of single-precision registers,
// we can just move a single double-precision register.
VmovD(context, rt, rt2, rm >> 1, op);
return;
}
if (op)
{
Operand rtOperand = InstEmitCommon.GetOutputGpr(context, rt);
Operand rt2Operand = InstEmitCommon.GetOutputGpr(context, rt2);
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, true);
using ScopedRegister rmReg2 = InstEmitNeonCommon.MoveScalarToSide(context, rm + 1, true);
context.Arm64Assembler.FmovFloatGen(rtOperand, rmReg.Operand, 0, 0, 0, 0);
context.Arm64Assembler.FmovFloatGen(rt2Operand, rmReg2.Operand, 0, 0, 0, 0);
}
else
{
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
Operand rt2Operand = InstEmitCommon.GetInputGpr(context, rt2);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
context.Arm64Assembler.FmovFloatGen(tempRegister.Operand, rtOperand, 0, 0, 1, 0);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rm, true);
context.Arm64Assembler.FmovFloatGen(tempRegister.Operand, rt2Operand, 0, 0, 1, 0);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rm + 1, true);
}
}
public static void VmvnI(CodeGenContext context, uint rd, uint cmode, uint imm8, uint q)
{
(uint a, uint b, uint c, uint d, uint e, uint f, uint g, uint h) = Split(imm8);
if (q == 0)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
context.Arm64Assembler.Mvni(tempRegister.Operand, h, g, f, e, d, cmode, c, b, a, q);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
}
else
{
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
context.Arm64Assembler.Mvni(rdOperand, h, g, f, e, d, cmode, c, b, a, q);
}
}
public static void VmvnR(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, q, context.Arm64Assembler.Not);
}
public static void Vswp(CodeGenContext context, uint rd, uint rm, uint q)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
if (q == 0)
{
InstEmitNeonCommon.MoveScalarToSide(context, tempRegister.Operand, rd, false);
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, false);
InstEmitNeonCommon.InsertResult(context, rmReg.Operand, rd, false);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rm, false);
}
else
{
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
Operand rmOperand = context.RegisterAllocator.RemapSimdRegister((int)(rm >> 1));
context.Arm64Assembler.Orr(tempRegister.Operand, rdOperand, rdOperand); // Temp = Rd
context.Arm64Assembler.Orr(rdOperand, rmOperand, rmOperand); // Rd = Rm
context.Arm64Assembler.Orr(rmOperand, tempRegister.Operand, tempRegister.Operand); // Rm = Temp
}
}
public static void Vtbl(CodeGenContext context, uint rd, uint rn, uint rm, bool op, uint len)
{
// On AArch64, TBL/TBX works with 128-bit vectors, while on AArch32 it works with 64-bit vectors.
// We must combine the 64-bit vectors into a larger 128-bit one in some cases.
// TODO: Peephole optimization to combine adjacent TBL instructions?
Debug.Assert(len <= 3);
bool isTbl = !op;
len = Math.Min(len, 31 - rn);
bool rangeMismatch = !isTbl && (len & 1) == 0;
using ScopedRegister indicesReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, false, rangeMismatch);
if (rangeMismatch)
{
// Force any index >= 8 * regs to be the maximum value, since on AArch64 we are working with a full vector,
// and the out of range value is 16 * regs, not 8 * regs.
Debug.Assert(indicesReg.IsAllocated);
using ScopedRegister tempRegister2 = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
if (len == 0)
{
(uint immb, uint immh) = InstEmitNeonCommon.GetImmbImmhForShift(3, 0, isShl: false);
context.Arm64Assembler.UshrV(tempRegister2.Operand, indicesReg.Operand, immb, immh, 0);
context.Arm64Assembler.CmeqZeroV(tempRegister2.Operand, tempRegister2.Operand, 0, 0);
context.Arm64Assembler.Orn(indicesReg.Operand, indicesReg.Operand, tempRegister2.Operand, 0);
}
else
{
(uint a, uint b, uint c, uint d, uint e, uint f, uint g, uint h) = Split(8u * (len + 1));
context.Arm64Assembler.Movi(tempRegister2.Operand, h, g, f, e, d, 0xe, c, b, a, 0, 0);
context.Arm64Assembler.CmgeRegV(tempRegister2.Operand, indicesReg.Operand, tempRegister2.Operand, 0, 0);
context.Arm64Assembler.OrrReg(indicesReg.Operand, indicesReg.Operand, tempRegister2.Operand, 0);
}
}
ScopedRegister tableReg1 = default;
ScopedRegister tableReg2 = default;
switch (len)
{
case 0:
tableReg1 = MoveHalfToSideZeroUpper(context, rn);
break;
case 1:
tableReg1 = MoveDoublewords(context, rn, rn + 1);
break;
case 2:
tableReg1 = MoveDoublewords(context, rn, rn + 1, isOdd: true);
tableReg2 = MoveHalfToSideZeroUpper(context, rn + 2);
break;
case 3:
tableReg1 = MoveDoublewords(context, rn, rn + 1);
tableReg2 = MoveDoublewords(context, rn + 2, rn + 3);
break;
}
// TBL works with consecutive registers, it is assumed that two consecutive calls to the register allocator
// will return consecutive registers.
Debug.Assert(len < 2 || tableReg1.Operand.GetRegister().Index + 1 == tableReg2.Operand.GetRegister().Index);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
if (isTbl)
{
context.Arm64Assembler.Tbl(tempRegister.Operand, tableReg1.Operand, len >> 1, indicesReg.Operand, 0);
}
else
{
InstEmitNeonCommon.MoveScalarToSide(context, tempRegister.Operand, rd, false);
context.Arm64Assembler.Tbx(tempRegister.Operand, tableReg1.Operand, len >> 1, indicesReg.Operand, 0);
}
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
tableReg1.Dispose();
if (len > 1)
{
tableReg2.Dispose();
}
}
public static void Vtrn(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
EmitVectorBinaryInterleavedTrn(context, rd, rm, size, q, context.Arm64Assembler.Trn1, context.Arm64Assembler.Trn2);
}
public static void Vuzp(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
EmitVectorBinaryInterleaved(context, rd, rm, size, q, context.Arm64Assembler.Uzp1, context.Arm64Assembler.Uzp2);
}
public static void Vzip(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
EmitVectorBinaryInterleaved(context, rd, rm, size, q, context.Arm64Assembler.Zip1, context.Arm64Assembler.Zip2);
}
public static (uint, uint, uint, uint, uint, uint, uint, uint) Split(uint imm8)
{
uint a = (imm8 >> 7) & 1;
uint b = (imm8 >> 6) & 1;
uint c = (imm8 >> 5) & 1;
uint d = (imm8 >> 4) & 1;
uint e = (imm8 >> 3) & 1;
uint f = (imm8 >> 2) & 1;
uint g = (imm8 >> 1) & 1;
uint h = imm8 & 1;
return (a, b, c, d, e, f, g, h);
}
private static ScopedRegister MoveHalfToSideZeroUpper(CodeGenContext context, uint srcReg)
{
uint elt = srcReg & 1u;
Operand source = context.RegisterAllocator.RemapSimdRegister((int)(srcReg >> 1));
ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempFpRegisterScoped(false);
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(elt, false);
context.Arm64Assembler.DupEltScalarFromElement(tempRegister.Operand, source, imm5);
return tempRegister;
}
private static ScopedRegister MoveDoublewords(CodeGenContext context, uint lowerReg, uint upperReg, bool isOdd = false)
{
if ((lowerReg & 1) == 0 && upperReg == lowerReg + 1 && !isOdd)
{
return new ScopedRegister(context.RegisterAllocator, context.RegisterAllocator.RemapSimdRegister((int)(lowerReg >> 1)), false);
}
Operand lowerSrc = context.RegisterAllocator.RemapSimdRegister((int)(lowerReg >> 1));
Operand upperSrc = context.RegisterAllocator.RemapSimdRegister((int)(upperReg >> 1));
ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempFpRegisterScoped(false);
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(lowerReg & 1u, false);
context.Arm64Assembler.DupEltScalarFromElement(tempRegister.Operand, lowerSrc, imm5);
imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(1, false);
context.Arm64Assembler.InsElt(tempRegister.Operand, upperSrc, (upperReg & 1u) << 3, imm5);
return tempRegister;
}
private static void EmitVectorBinaryInterleavedTrn(
CodeGenContext context,
uint rd,
uint rm,
uint size,
uint q,
Action<Operand, Operand, Operand, uint, uint> action1,
Action<Operand, Operand, Operand, uint, uint> action2)
{
if (rd == rm)
{
// The behaviour when the registers are the same is "unpredictable" according to the manual.
if (q == 0)
{
using ScopedRegister rdReg = InstEmitNeonCommon.MoveScalarToSide(context, rd, false);
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, false);
using ScopedRegister tempRegister1 = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
using ScopedRegister tempRegister2 = InstEmitNeonCommon.PickSimdRegister(context.RegisterAllocator, rdReg, rmReg);
action1(tempRegister1.Operand, rdReg.Operand, rmReg.Operand, size, q);
action2(tempRegister2.Operand, rdReg.Operand, tempRegister1.Operand, size, q);
InstEmitNeonCommon.InsertResult(context, tempRegister2.Operand, rd, false);
}
else
{
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
Operand rmOperand = context.RegisterAllocator.RemapSimdRegister((int)(rm >> 1));
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
action1(tempRegister.Operand, rdOperand, rmOperand, size, q);
action2(rmOperand, rdOperand, tempRegister.Operand, size, q);
}
}
else
{
EmitVectorBinaryInterleaved(context, rd, rm, size, q, action1, action2);
}
}
private static void EmitVectorBinaryInterleaved(
CodeGenContext context,
uint rd,
uint rm,
uint size,
uint q,
Action<Operand, Operand, Operand, uint, uint> action1,
Action<Operand, Operand, Operand, uint, uint> action2)
{
if (q == 0)
{
using ScopedRegister rdReg = InstEmitNeonCommon.MoveScalarToSide(context, rd, false);
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, false);
using ScopedRegister tempRegister1 = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
using ScopedRegister tempRegister2 = InstEmitNeonCommon.PickSimdRegister(context.RegisterAllocator, rdReg, rmReg);
action1(tempRegister1.Operand, rdReg.Operand, rmReg.Operand, size, q);
action2(tempRegister2.Operand, rdReg.Operand, rmReg.Operand, size, q);
if (rd != rm)
{
InstEmitNeonCommon.InsertResult(context, tempRegister1.Operand, rd, false);
}
InstEmitNeonCommon.InsertResult(context, tempRegister2.Operand, rm, false);
}
else
{
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
Operand rmOperand = context.RegisterAllocator.RemapSimdRegister((int)(rm >> 1));
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
action1(tempRegister.Operand, rdOperand, rmOperand, size, q);
action2(rmOperand, rdOperand, rmOperand, size, q);
if (rd != rm)
{
context.Arm64Assembler.OrrReg(rdOperand, tempRegister.Operand, tempRegister.Operand, 1);
}
}
}
}
}

View File

@@ -0,0 +1,105 @@
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonRound
{
public static void Vraddhn(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryNarrow(context, rd, rn, rm, size, context.Arm64Assembler.Raddhn);
}
public static void Vrhadd(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Urhadd : context.Arm64Assembler.Srhadd, null);
}
public static void Vrshl(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(
context,
rd,
rm,
rn,
size,
q,
u ? context.Arm64Assembler.UrshlV : context.Arm64Assembler.SrshlV,
u ? context.Arm64Assembler.UrshlS : context.Arm64Assembler.SrshlS);
}
public static void Vrshr(CodeGenContext context, uint rd, uint rm, bool u, uint l, uint imm6, uint q)
{
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
uint shift = InstEmitNeonShift.GetShiftRight(imm6, size);
InstEmitNeonCommon.EmitVectorBinaryShift(
context,
rd,
rm,
shift,
size,
q,
isShl: false,
u ? context.Arm64Assembler.UrshrV : context.Arm64Assembler.SrshrV,
u ? context.Arm64Assembler.UrshrS : context.Arm64Assembler.SrshrS);
}
public static void Vrshrn(CodeGenContext context, uint rd, uint rm, uint imm6)
{
uint size = InstEmitNeonCommon.GetSizeFromImm6(imm6);
uint shift = InstEmitNeonShift.GetShiftRight(imm6, size);
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.Rshrn);
}
public static void Vrsra(CodeGenContext context, uint rd, uint rm, bool u, uint l, uint imm6, uint q)
{
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
uint shift = InstEmitNeonShift.GetShiftRight(imm6, size);
InstEmitNeonCommon.EmitVectorTernaryRdShift(
context,
rd,
rm,
shift,
size,
q,
isShl: false,
u ? context.Arm64Assembler.UrsraV : context.Arm64Assembler.SrsraV,
u ? context.Arm64Assembler.UrsraS : context.Arm64Assembler.SrsraS);
}
public static void Vrsubhn(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryNarrow(context, rd, rn, rm, size, context.Arm64Assembler.Rsubhn);
}
public static void Vrinta(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrintaSingleAndDouble, context.Arm64Assembler.FrintaHalf);
}
public static void Vrintm(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrintmSingleAndDouble, context.Arm64Assembler.FrintmHalf);
}
public static void Vrintn(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrintnSingleAndDouble, context.Arm64Assembler.FrintnHalf);
}
public static void Vrintp(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrintpSingleAndDouble, context.Arm64Assembler.FrintpHalf);
}
public static void Vrintx(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrintxSingleAndDouble, context.Arm64Assembler.FrintxHalf);
}
public static void Vrintz(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrintzSingleAndDouble, context.Arm64Assembler.FrintzHalf);
}
}
}

View File

@@ -0,0 +1,205 @@
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonSaturate
{
public static void Vqabs(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.SqabsV);
}
public static void Vqadd(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(
context,
rd,
rn,
rm,
size,
q,
u ? context.Arm64Assembler.UqaddV : context.Arm64Assembler.SqaddV,
u ? context.Arm64Assembler.UqaddS : context.Arm64Assembler.SqaddS);
}
public static void Vqdmlal(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, context.Arm64Assembler.SqdmlalVecV);
}
public static void VqdmlalS(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLongByScalar(context, rd, rn, rm, size, context.Arm64Assembler.SqdmlalElt2regElement);
}
public static void Vqdmlsl(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, context.Arm64Assembler.SqdmlslVecV);
}
public static void VqdmlslS(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLongByScalar(context, rd, rn, rm, size, context.Arm64Assembler.SqdmlslElt2regElement);
}
public static void Vqdmulh(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.SqdmulhVecV, context.Arm64Assembler.SqdmulhVecS);
}
public static void VqdmulhS(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryByScalar(context, rd, rn, rm, size, q, context.Arm64Assembler.SqdmulhElt2regElement);
}
public static void Vqdmull(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, context.Arm64Assembler.SqdmullVecV);
}
public static void VqdmullS(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLongByScalar(context, rd, rn, rm, size, context.Arm64Assembler.SqdmullElt2regElement);
}
public static void Vqmovn(CodeGenContext context, uint rd, uint rm, uint op, uint size)
{
if (op == 3)
{
InstEmitNeonCommon.EmitVectorUnaryNarrow(context, rd, rm, size, context.Arm64Assembler.UqxtnV);
}
else
{
InstEmitNeonCommon.EmitVectorUnaryNarrow(context, rd, rm, size, op == 1 ? context.Arm64Assembler.SqxtunV : context.Arm64Assembler.SqxtnV);
}
}
public static void Vqneg(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.SqnegV);
}
public static void Vqrdmlah(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRd(context, rd, rn, rm, size, q, context.Arm64Assembler.SqrdmlahVecV);
}
public static void VqrdmlahS(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRdByScalar(context, rd, rn, rm, size, q, context.Arm64Assembler.SqrdmlahElt2regElement);
}
public static void Vqrdmlsh(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRd(context, rd, rn, rm, size, q, context.Arm64Assembler.SqrdmlshVecV);
}
public static void VqrdmlshS(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRdByScalar(context, rd, rn, rm, size, q, context.Arm64Assembler.SqrdmlshElt2regElement);
}
public static void Vqrdmulh(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.SqrdmulhVecV, context.Arm64Assembler.SqrdmulhVecS);
}
public static void VqrdmulhS(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryByScalar(context, rd, rn, rm, size, q, context.Arm64Assembler.SqrdmulhElt2regElement);
}
public static void Vqrshl(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rm, rn, size, q, context.Arm64Assembler.SqrshlV, context.Arm64Assembler.SqrshlS);
}
public static void Vqrshrn(CodeGenContext context, uint rd, uint rm, bool u, uint op, uint imm6)
{
uint size = InstEmitNeonCommon.GetSizeFromImm6(imm6);
uint shift = InstEmitNeonShift.GetShiftRight(imm6, size);
if (u && op == 0)
{
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.SqrshrunV);
}
else if (!u && op == 1)
{
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.SqrshrnV);
}
else
{
Debug.Assert(u && op == 1); // !u && op == 0 is the encoding for another instruction.
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.UqrshrnV);
}
}
public static void VqshlI(CodeGenContext context, uint rd, uint rm, bool u, uint op, uint l, uint imm6, uint q)
{
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
uint shift = InstEmitNeonShift.GetShiftLeft(imm6, size);
if (u && op == 0)
{
InstEmitNeonCommon.EmitVectorBinaryShift(context, rd, rm, shift, size, q, isShl: true, context.Arm64Assembler.SqshluV, context.Arm64Assembler.SqshluS);
}
else if (!u && op == 1)
{
InstEmitNeonCommon.EmitVectorBinaryShift(context, rd, rm, shift, size, q, isShl: true, context.Arm64Assembler.SqshlImmV, context.Arm64Assembler.SqshlImmS);
}
else
{
Debug.Assert(u && op == 1); // !u && op == 0 is the encoding for another instruction.
InstEmitNeonCommon.EmitVectorBinaryShift(context, rd, rm, shift, size, q, isShl: true, context.Arm64Assembler.UqshlImmV, context.Arm64Assembler.UqshlImmS);
}
}
public static void VqshlR(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
if (u)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rm, rn, size, q, context.Arm64Assembler.UqshlRegV, context.Arm64Assembler.UqshlRegS);
}
else
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rm, rn, size, q, context.Arm64Assembler.SqshlRegV, context.Arm64Assembler.SqshlRegS);
}
}
public static void Vqshrn(CodeGenContext context, uint rd, uint rm, bool u, uint op, uint imm6)
{
uint size = InstEmitNeonCommon.GetSizeFromImm6(imm6);
uint shift = InstEmitNeonShift.GetShiftRight(imm6, size);
if (u && op == 0)
{
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.SqshrunV);
}
else if (!u && op == 1)
{
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.SqshrnV);
}
else
{
Debug.Assert(u && op == 1); // !u && op == 0 is the encoding for another instruction.
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.UqshrnV);
}
}
public static void Vqsub(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(
context,
rd,
rn,
rm,
size,
q,
u ? context.Arm64Assembler.UqsubV : context.Arm64Assembler.SqsubV,
u ? context.Arm64Assembler.UqsubS : context.Arm64Assembler.SqsubS);
}
}
}

View File

@@ -0,0 +1,123 @@
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonShift
{
public static void Vshll(CodeGenContext context, uint rd, uint rm, uint imm6, bool u)
{
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6);
uint shift = GetShiftLeft(imm6, size);
InstEmitNeonCommon.EmitVectorBinaryLongShift(context, rd, rm, shift, size, isShl: true, u ? context.Arm64Assembler.Ushll : context.Arm64Assembler.Sshll);
}
public static void Vshll2(CodeGenContext context, uint rd, uint rm, uint size)
{
// Shift can't be encoded, so shift by value - 1 first, then first again by 1.
// Doesn't matter if we do a signed or unsigned shift in this case since all sign bits will be shifted out.
uint shift = 8u << (int)size;
InstEmitNeonCommon.EmitVectorBinaryLongShift(context, rd, rm, shift - 1, size, isShl: true, context.Arm64Assembler.Sshll);
InstEmitNeonCommon.EmitVectorBinaryLongShift(context, rd, rd, 1, size, isShl: true, context.Arm64Assembler.Sshll);
}
public static void VshlI(CodeGenContext context, uint rd, uint rm, uint l, uint imm6, uint q)
{
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
uint shift = GetShiftLeft(imm6, size);
InstEmitNeonCommon.EmitVectorBinaryShift(context, rd, rm, shift, size, q, isShl: true, context.Arm64Assembler.ShlV, context.Arm64Assembler.ShlS);
}
public static void VshlR(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(
context,
rd,
rm,
rn,
size,
q,
u ? context.Arm64Assembler.UshlV : context.Arm64Assembler.SshlV,
u ? context.Arm64Assembler.UshlS : context.Arm64Assembler.SshlS);
}
public static void Vshr(CodeGenContext context, uint rd, uint rm, bool u, uint l, uint imm6, uint q)
{
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
uint shift = GetShiftRight(imm6, size);
InstEmitNeonCommon.EmitVectorBinaryShift(
context,
rd,
rm,
shift,
size,
q,
isShl: false,
u ? context.Arm64Assembler.UshrV : context.Arm64Assembler.SshrV,
u ? context.Arm64Assembler.UshrS : context.Arm64Assembler.SshrS);
}
public static void Vshrn(CodeGenContext context, uint rd, uint rm, uint imm6)
{
uint size = InstEmitNeonCommon.GetSizeFromImm6(imm6);
uint shift = GetShiftRight(imm6, size);
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.Shrn);
}
public static void Vsli(CodeGenContext context, uint rd, uint rm, uint l, uint imm6, uint q)
{
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
uint shift = GetShiftLeft(imm6, size);
InstEmitNeonCommon.EmitVectorBinaryShift(
context,
rd,
rm,
shift,
size,
q,
isShl: true,
context.Arm64Assembler.SliV,
context.Arm64Assembler.SliS);
}
public static void Vsra(CodeGenContext context, uint rd, uint rm, bool u, uint l, uint imm6, uint q)
{
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
uint shift = GetShiftRight(imm6, size);
InstEmitNeonCommon.EmitVectorTernaryRdShift(
context,
rd,
rm,
shift,
size,
q,
isShl: false,
u ? context.Arm64Assembler.UsraV : context.Arm64Assembler.SsraV,
u ? context.Arm64Assembler.UsraS : context.Arm64Assembler.SsraS);
}
public static void Vsri(CodeGenContext context, uint rd, uint rm, uint l, uint imm6, uint q)
{
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
uint shift = GetShiftRight(imm6, size);
InstEmitNeonCommon.EmitVectorBinaryShift(context, rd, rm, shift, size, q, isShl: false, context.Arm64Assembler.SriV, context.Arm64Assembler.SriS);
}
public static uint GetShiftLeft(uint imm6, uint size)
{
return size < 3 ? imm6 - (8u << (int)size) : imm6;
}
public static uint GetShiftRight(uint imm6, uint size)
{
return (size == 3 ? 64u : (16u << (int)size)) - imm6;
;
}
}
}

View File

@@ -0,0 +1,77 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonSystem
{
public static void Vmrs(CodeGenContext context, uint rt, uint reg)
{
if (context.ConsumeSkipNextInstruction())
{
// This case means that we managed to combine a VCMP and VMRS instruction,
// so we have nothing to do here as FCMP/FCMPE already set PSTATE.NZCV.
context.SetNzcvModified();
return;
}
if (reg == 1)
{
// FPSCR
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
if (rt == RegisterUtils.PcRegister)
{
using ScopedRegister fpsrRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.LdrRiUn(fpsrRegister.Operand, ctx, NativeContextOffsets.FpFlagsBaseOffset);
context.Arm64Assembler.Lsr(fpsrRegister.Operand, fpsrRegister.Operand, InstEmitCommon.Const(28));
InstEmitCommon.RestoreNzcvFlags(context, fpsrRegister.Operand);
context.SetNzcvModified();
}
else
{
// FPSCR is a combination of the FPCR and FPSR registers.
// We also need to set the FPSR NZCV bits that no longer exist on AArch64.
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand rtOperand = InstEmitCommon.GetOutputGpr(context, rt);
context.Arm64Assembler.MrsFpsr(rtOperand);
context.Arm64Assembler.MrsFpcr(tempRegister.Operand);
context.Arm64Assembler.Orr(rtOperand, rtOperand, tempRegister.Operand);
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FpFlagsBaseOffset);
context.Arm64Assembler.Bfc(tempRegister.Operand, 0, 28);
context.Arm64Assembler.Orr(rtOperand, rtOperand, tempRegister.Operand);
}
}
else
{
Operand rtOperand = InstEmitCommon.GetOutputGpr(context, rt);
context.Arm64Assembler.Mov(rtOperand, 0u);
}
}
public static void Vmsr(CodeGenContext context, uint rt, uint reg)
{
if (reg == 1)
{
// FPSCR
// TODO: Do not set bits related to features that are not supported (like FP16)?
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
context.Arm64Assembler.MsrFpcr(rtOperand);
context.Arm64Assembler.MsrFpsr(rtOperand);
context.Arm64Assembler.StrRiUn(rtOperand, ctx, NativeContextOffsets.FpFlagsBaseOffset);
}
}
}
}

View File

@@ -0,0 +1,452 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitSaturate
{
public static void Qadd(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSubSaturate(context, rd, rn, rm, doubling: false, add: true);
}
public static void Qadd16(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitSigned16BitPair(context, rd, rn, rm, (d, n, m) =>
{
context.Arm64Assembler.Add(d, n, m);
EmitSaturateRange(context, d, d, 16, unsigned: false, setQ: false);
});
}
public static void Qadd8(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitSigned8BitPair(context, rd, rn, rm, (d, n, m) =>
{
context.Arm64Assembler.Add(d, n, m);
EmitSaturateRange(context, d, d, 8, unsigned: false, setQ: false);
});
}
public static void Qasx(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitSigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
{
if (e == 0)
{
context.Arm64Assembler.Sub(d, n, m);
}
else
{
context.Arm64Assembler.Add(d, n, m);
}
EmitSaturateRange(context, d, d, 16, unsigned: false, setQ: false);
});
}
public static void Qdadd(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSubSaturate(context, rd, rn, rm, doubling: true, add: true);
}
public static void Qdsub(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSubSaturate(context, rd, rn, rm, doubling: true, add: false);
}
public static void Qsax(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitSigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
{
if (e == 0)
{
context.Arm64Assembler.Add(d, n, m);
}
else
{
context.Arm64Assembler.Sub(d, n, m);
}
EmitSaturateRange(context, d, d, 16, unsigned: false, setQ: false);
});
}
public static void Qsub(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSubSaturate(context, rd, rn, rm, doubling: false, add: false);
}
public static void Qsub16(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitSigned16BitPair(context, rd, rn, rm, (d, n, m) =>
{
context.Arm64Assembler.Sub(d, n, m);
EmitSaturateRange(context, d, d, 16, unsigned: false, setQ: false);
});
}
public static void Qsub8(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitSigned8BitPair(context, rd, rn, rm, (d, n, m) =>
{
context.Arm64Assembler.Sub(d, n, m);
EmitSaturateRange(context, d, d, 8, unsigned: false, setQ: false);
});
}
public static void Ssat(CodeGenContext context, uint rd, uint imm, uint rn, bool sh, uint shift)
{
EmitSaturate(context, rd, imm + 1, rn, sh, shift, unsigned: false);
}
public static void Ssat16(CodeGenContext context, uint rd, uint imm, uint rn)
{
InstEmitCommon.EmitSigned16BitPair(context, rd, rn, (d, n) =>
{
EmitSaturateRange(context, d, n, imm + 1, unsigned: false);
});
}
public static void Uqadd16(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitUnsigned16BitPair(context, rd, rn, rm, (d, n, m) =>
{
context.Arm64Assembler.Add(d, n, m);
EmitSaturateUnsignedRange(context, d, 16);
});
}
public static void Uqadd8(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitUnsigned8BitPair(context, rd, rn, rm, (d, n, m) =>
{
context.Arm64Assembler.Add(d, n, m);
EmitSaturateUnsignedRange(context, d, 8);
});
}
public static void Uqasx(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitUnsigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
{
if (e == 0)
{
context.Arm64Assembler.Sub(d, n, m);
}
else
{
context.Arm64Assembler.Add(d, n, m);
}
EmitSaturateUnsignedRange(context, d, 16);
});
}
public static void Uqsax(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitUnsigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
{
if (e == 0)
{
context.Arm64Assembler.Add(d, n, m);
}
else
{
context.Arm64Assembler.Sub(d, n, m);
}
EmitSaturateUnsignedRange(context, d, 16);
});
}
public static void Uqsub16(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitSigned16BitPair(context, rd, rn, rm, (d, n, m) =>
{
context.Arm64Assembler.Sub(d, n, m);
EmitSaturateUnsignedRange(context, d, 16);
});
}
public static void Uqsub8(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitSigned8BitPair(context, rd, rn, rm, (d, n, m) =>
{
context.Arm64Assembler.Sub(d, n, m);
EmitSaturateUnsignedRange(context, d, 8);
});
}
public static void Usat(CodeGenContext context, uint rd, uint imm, uint rn, bool sh, uint shift)
{
EmitSaturate(context, rd, imm, rn, sh, shift, unsigned: true);
}
public static void Usat16(CodeGenContext context, uint rd, uint imm, uint rn)
{
InstEmitCommon.EmitSigned16BitPair(context, rd, rn, (d, n) =>
{
EmitSaturateRange(context, d, n, imm, unsigned: true);
});
}
private static void EmitAddSubSaturate(CodeGenContext context, uint rd, uint rn, uint rm, bool doubling, bool add)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempN64 = new(OperandKind.Register, OperandType.I64, tempN.Operand.Value);
Operand tempM64 = new(OperandKind.Register, OperandType.I64, tempM.Operand.Value);
context.Arm64Assembler.Sxtw(tempN64, rnOperand);
context.Arm64Assembler.Sxtw(tempM64, rmOperand);
if (doubling)
{
context.Arm64Assembler.Lsl(tempN64, tempN64, InstEmitCommon.Const(1));
EmitSaturateLongToInt(context, tempN64, tempN64);
}
if (add)
{
context.Arm64Assembler.Add(tempN64, tempN64, tempM64);
}
else
{
context.Arm64Assembler.Sub(tempN64, tempN64, tempM64);
}
EmitSaturateLongToInt(context, rdOperand, tempN64);
}
private static void EmitSaturate(CodeGenContext context, uint rd, uint imm, uint rn, bool sh, uint shift, bool unsigned)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
if (sh && shift == 0)
{
shift = 31;
}
if (shift != 0)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
if (sh)
{
context.Arm64Assembler.Asr(tempRegister.Operand, rnOperand, InstEmitCommon.Const((int)shift));
}
else
{
context.Arm64Assembler.Lsl(tempRegister.Operand, rnOperand, InstEmitCommon.Const((int)shift));
}
EmitSaturateRange(context, rdOperand, tempRegister.Operand, imm, unsigned);
}
else
{
EmitSaturateRange(context, rdOperand, rnOperand, imm, unsigned);
}
}
private static void EmitSaturateRange(CodeGenContext context, Operand result, Operand value, uint saturateTo, bool unsigned, bool setQ = true)
{
Debug.Assert(saturateTo <= 32);
Debug.Assert(!unsigned || saturateTo < 32);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
ScopedRegister tempValue = default;
bool resultValueOverlap = result.Value == value.Value;
if (!unsigned && saturateTo == 32)
{
// No saturation possible for this case.
if (!resultValueOverlap)
{
context.Arm64Assembler.Mov(result, value);
}
return;
}
else if (saturateTo == 0)
{
// Result is always zero if we saturate 0 bits.
context.Arm64Assembler.Mov(result, 0u);
return;
}
if (resultValueOverlap)
{
tempValue = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Mov(tempValue.Operand, value);
value = tempValue.Operand;
}
if (unsigned)
{
// Negative values always saturate (to zero).
// So we must always ignore the sign bit when masking, so that the truncated value will differ from the original one.
context.Arm64Assembler.And(result, value, InstEmitCommon.Const((int)(uint.MaxValue >> (32 - (int)saturateTo))));
}
else
{
context.Arm64Assembler.Sbfx(result, value, 0, (int)saturateTo);
}
context.Arm64Assembler.Sub(tempRegister.Operand, value, result);
int branchIndex = context.CodeWriter.InstructionPointer;
// If the result is 0, the values are equal and we don't need saturation.
context.Arm64Assembler.Cbz(tempRegister.Operand, 0);
// Saturate and set Q flag.
if (unsigned)
{
if (saturateTo == 31)
{
// Only saturation case possible when going from 32 bits signed to 32 or 31 bits unsigned
// is when the signed input is negative, as all positive values are representable on a 31 bits range.
context.Arm64Assembler.Mov(result, 0u);
}
else
{
context.Arm64Assembler.Asr(result, value, InstEmitCommon.Const(31));
context.Arm64Assembler.Mvn(result, result);
context.Arm64Assembler.Lsr(result, result, InstEmitCommon.Const(32 - (int)saturateTo));
}
}
else
{
if (saturateTo == 1)
{
context.Arm64Assembler.Asr(result, value, InstEmitCommon.Const(31));
}
else
{
context.Arm64Assembler.Mov(result, uint.MaxValue >> (33 - (int)saturateTo));
context.Arm64Assembler.Eor(result, result, value, ArmShiftType.Asr, 31);
}
}
if (setQ)
{
SetQFlag(context);
}
int delta = context.CodeWriter.InstructionPointer - branchIndex;
context.CodeWriter.WriteInstructionAt(branchIndex, context.CodeWriter.ReadInstructionAt(branchIndex) | (uint)((delta & 0x7ffff) << 5));
if (resultValueOverlap)
{
tempValue.Dispose();
}
}
private static void EmitSaturateUnsignedRange(CodeGenContext context, Operand value, uint saturateTo)
{
Debug.Assert(saturateTo <= 32);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
if (saturateTo == 32)
{
// No saturation possible for this case.
return;
}
else if (saturateTo == 0)
{
// Result is always zero if we saturate 0 bits.
context.Arm64Assembler.Mov(value, 0u);
return;
}
context.Arm64Assembler.Lsr(tempRegister.Operand, value, InstEmitCommon.Const(32 - (int)saturateTo));
int branchIndex = context.CodeWriter.InstructionPointer;
// If the result is 0, the values are equal and we don't need saturation.
context.Arm64Assembler.Cbz(tempRegister.Operand, 0);
// Saturate.
context.Arm64Assembler.Mov(value, uint.MaxValue >> (32 - (int)saturateTo));
int delta = context.CodeWriter.InstructionPointer - branchIndex;
context.CodeWriter.WriteInstructionAt(branchIndex, context.CodeWriter.ReadInstructionAt(branchIndex) | (uint)((delta & 0x7ffff) << 5));
}
private static void EmitSaturateLongToInt(CodeGenContext context, Operand result, Operand value)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
ScopedRegister tempValue = default;
bool resultValueOverlap = result.Value == value.Value;
if (resultValueOverlap)
{
tempValue = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempValue64 = new(OperandKind.Register, OperandType.I64, tempValue.Operand.Value);
context.Arm64Assembler.Mov(tempValue64, value);
value = tempValue64;
}
Operand temp64 = new(OperandKind.Register, OperandType.I64, tempRegister.Operand.Value);
Operand result64 = new(OperandKind.Register, OperandType.I64, result.Value);
context.Arm64Assembler.Sxtw(result64, value);
context.Arm64Assembler.Sub(temp64, value, result64);
int branchIndex = context.CodeWriter.InstructionPointer;
// If the result is 0, the values are equal and we don't need saturation.
context.Arm64Assembler.Cbz(temp64, 0);
// Saturate and set Q flag.
context.Arm64Assembler.Mov(result, uint.MaxValue >> 1);
context.Arm64Assembler.Eor(result64, result64, value, ArmShiftType.Asr, 63);
SetQFlag(context);
int delta = context.CodeWriter.InstructionPointer - branchIndex;
context.CodeWriter.WriteInstructionAt(branchIndex, context.CodeWriter.ReadInstructionAt(branchIndex) | (uint)((delta & 0x7ffff) << 5));
context.Arm64Assembler.Mov(result, result); // Zero-extend.
if (resultValueOverlap)
{
tempValue.Dispose();
}
}
public static void SetQFlag(CodeGenContext context)
{
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
context.Arm64Assembler.Orr(tempRegister.Operand, tempRegister.Operand, InstEmitCommon.Const(1 << 27));
context.Arm64Assembler.StrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
}
}
}

View File

@@ -0,0 +1,648 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.InteropServices;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitSystem
{
private delegate void SoftwareInterruptHandler(ulong address, int imm);
private delegate ulong Get64();
private delegate bool GetBool();
private const int SpIndex = 31;
public static void Bkpt(CodeGenContext context, uint imm)
{
context.AddPendingBkpt(imm);
context.Arm64Assembler.B(0);
}
public static void Cps(CodeGenContext context, uint imod, uint m, uint a, uint i, uint f, uint mode)
{
// NOP in user mode.
}
public static void Dbg(CodeGenContext context, uint option)
{
// NOP in ARMv8.
}
public static void Hlt(CodeGenContext context, uint imm)
{
}
public static void Mcr(CodeGenContext context, uint encoding, uint coproc, uint opc1, uint rt, uint crn, uint crm, uint opc2)
{
if (coproc != 15 || opc1 != 0)
{
Udf(context, encoding, 0);
return;
}
Operand ctx = Register(context.RegisterAllocator.FixedContextRegister);
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
switch (crn)
{
case 13: // Process and Thread Info.
if (crm == 0)
{
switch (opc2)
{
case 2:
context.Arm64Assembler.StrRiUn(rtOperand, ctx, NativeContextOffsets.TpidrEl0Offset);
return;
}
}
break;
}
}
public static void Mcrr(CodeGenContext context, uint encoding, uint coproc, uint opc1, uint rt, uint crm)
{
if (coproc != 15 || opc1 != 0)
{
Udf(context, encoding, 0);
return;
}
// We don't have any system register that needs to be modified using a 64-bit value.
}
public static void Mrc(CodeGenContext context, uint encoding, uint coproc, uint opc1, uint rt, uint crn, uint crm, uint opc2)
{
if (coproc != 15 || opc1 != 0)
{
Udf(context, encoding, 0);
return;
}
Operand ctx = Register(context.RegisterAllocator.FixedContextRegister);
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
bool hasValue = false;
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand dest = rt == RegisterUtils.PcRegister ? tempRegister.Operand : rtOperand;
switch (crn)
{
case 13: // Process and Thread Info.
if (crm == 0)
{
switch (opc2)
{
case 2:
context.Arm64Assembler.LdrRiUn(dest, ctx, NativeContextOffsets.TpidrEl0Offset);
hasValue = true;
break;
case 3:
context.Arm64Assembler.LdrRiUn(dest, ctx, NativeContextOffsets.TpidrroEl0Offset);
hasValue = true;
break;
}
}
break;
}
if (rt == RegisterUtils.PcRegister)
{
context.Arm64Assembler.MsrNzcv(dest);
context.SetNzcvModified();
}
else if (!hasValue)
{
context.Arm64Assembler.Mov(dest, 0u);
}
}
public static void Mrrc(CodeGenContext context, uint encoding, uint coproc, uint opc1, uint rt, uint rt2, uint crm)
{
if (coproc != 15)
{
Udf(context, encoding, 0);
return;
}
switch (crm)
{
case 14:
switch (opc1)
{
case 0:
context.AddPendingReadCntpct(rt, rt2);
context.Arm64Assembler.B(0);
return;
}
break;
}
// Unsupported system register.
context.Arm64Assembler.Mov(InstEmitCommon.GetOutputGpr(context, rt), 0u);
context.Arm64Assembler.Mov(InstEmitCommon.GetOutputGpr(context, rt2), 0u);
}
public static void Mrs(CodeGenContext context, uint rd, bool r)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
if (r)
{
// Reads SPSR, unpredictable in user mode.
context.Arm64Assembler.Mov(rdOperand, 0u);
}
else
{
Operand ctx = Register(context.RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
// Copy GE flags to destination register.
context.Arm64Assembler.Ubfx(rdOperand, tempRegister.Operand, 16, 4);
// Insert Q flag.
context.Arm64Assembler.And(tempRegister.Operand, tempRegister.Operand, InstEmitCommon.Const(1 << 27));
context.Arm64Assembler.Orr(rdOperand, rdOperand, tempRegister.Operand);
// Insert NZCV flags.
context.Arm64Assembler.MrsNzcv(tempRegister.Operand);
context.Arm64Assembler.Orr(rdOperand, rdOperand, tempRegister.Operand);
// All other flags can't be accessed in user mode or have "unknown" values.
}
}
public static void MrsBr(CodeGenContext context, uint rd, uint m1, bool r)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
// Reads banked register, unpredictable in user mode.
context.Arm64Assembler.Mov(rdOperand, 0u);
}
public static void MsrBr(CodeGenContext context, uint rn, uint m1, bool r)
{
// Writes banked register, unpredictable in user mode.
}
public static void MsrI(CodeGenContext context, uint imm, uint mask, bool r)
{
if (r)
{
// Writes SPSR, unpredictable in user mode.
}
else
{
Operand ctx = Register(context.RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempRegister2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
if ((mask & 2) != 0)
{
// Endian flag.
context.Arm64Assembler.Mov(tempRegister2.Operand, (imm >> 9) & 1);
context.Arm64Assembler.Bfi(tempRegister.Operand, tempRegister2.Operand, 9, 1);
}
if ((mask & 4) != 0)
{
// GE flags.
context.Arm64Assembler.Mov(tempRegister2.Operand, (imm >> 16) & 0xf);
context.Arm64Assembler.Bfi(tempRegister.Operand, tempRegister2.Operand, 16, 4);
}
if ((mask & 8) != 0)
{
// NZCVQ flags.
context.Arm64Assembler.Mov(tempRegister2.Operand, (imm >> 27) & 0x1f);
context.Arm64Assembler.Bfi(tempRegister.Operand, tempRegister2.Operand, 27, 5);
context.Arm64Assembler.Mov(tempRegister2.Operand, (imm >> 28) & 0xf);
InstEmitCommon.RestoreNzcvFlags(context, tempRegister2.Operand);
context.SetNzcvModified();
}
}
}
public static void MsrR(CodeGenContext context, uint rn, uint mask, bool r)
{
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
if (r)
{
// Writes SPSR, unpredictable in user mode.
}
else
{
Operand ctx = Register(context.RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempRegister2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
if ((mask & 2) != 0)
{
// Endian flag.
context.Arm64Assembler.Lsr(tempRegister2.Operand, rnOperand, InstEmitCommon.Const(9));
context.Arm64Assembler.Bfi(tempRegister.Operand, tempRegister2.Operand, 9, 1);
}
if ((mask & 4) != 0)
{
// GE flags.
context.Arm64Assembler.Lsr(tempRegister2.Operand, rnOperand, InstEmitCommon.Const(16));
context.Arm64Assembler.Bfi(tempRegister.Operand, tempRegister2.Operand, 16, 4);
}
if ((mask & 8) != 0)
{
// NZCVQ flags.
context.Arm64Assembler.Lsr(tempRegister2.Operand, rnOperand, InstEmitCommon.Const(27));
context.Arm64Assembler.Bfi(tempRegister.Operand, tempRegister2.Operand, 27, 5);
context.Arm64Assembler.Lsr(tempRegister2.Operand, rnOperand, InstEmitCommon.Const(28));
InstEmitCommon.RestoreNzcvFlags(context, tempRegister2.Operand);
context.SetNzcvModified();
}
}
}
public static void Setend(CodeGenContext context, bool e)
{
Operand ctx = Register(context.RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
if (e)
{
context.Arm64Assembler.Orr(tempRegister.Operand, tempRegister.Operand, InstEmitCommon.Const(1 << 9));
}
else
{
context.Arm64Assembler.Bfc(tempRegister.Operand, 9, 1);
}
context.Arm64Assembler.StrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
}
public static void Svc(CodeGenContext context, uint imm)
{
context.AddPendingSvc(imm);
context.Arm64Assembler.B(0);
}
public static void Udf(CodeGenContext context, uint encoding, uint imm)
{
context.AddPendingUdf(encoding);
context.Arm64Assembler.B(0);
}
public static void PrivilegedInstruction(CodeGenContext context, uint encoding)
{
Udf(context, encoding, 0);
}
private static IntPtr GetBkptHandlerPtr()
{
return Marshal.GetFunctionPointerForDelegate<SoftwareInterruptHandler>(NativeInterface.Break);
}
private static IntPtr GetSvcHandlerPtr()
{
return Marshal.GetFunctionPointerForDelegate<SoftwareInterruptHandler>(NativeInterface.SupervisorCall);
}
private static IntPtr GetUdfHandlerPtr()
{
return Marshal.GetFunctionPointerForDelegate<SoftwareInterruptHandler>(NativeInterface.Undefined);
}
private static IntPtr GetCntpctEl0Ptr()
{
return Marshal.GetFunctionPointerForDelegate<Get64>(NativeInterface.GetCntpctEl0);
}
private static IntPtr CheckSynchronizationPtr()
{
return Marshal.GetFunctionPointerForDelegate<GetBool>(NativeInterface.CheckSynchronization);
}
public static bool NeedsCall(InstName name)
{
// All instructions that might do a host call should be included here.
// That is required to reserve space on the stack for caller saved registers.
switch (name)
{
case InstName.Mcr:
case InstName.Mrc:
case InstName.Mrrc:
case InstName.Svc:
case InstName.Udf:
return true;
}
return false;
}
public static void WriteBkpt(CodeWriter writer, RegisterAllocator regAlloc, TailMerger tailMerger, int spillBaseOffset, uint pc, uint imm)
{
Assembler asm = new(writer);
WriteCall(ref asm, regAlloc, GetBkptHandlerPtr(), skipContext: true, spillBaseOffset, null, pc, imm);
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, skipContext: true, spillBaseOffset);
}
public static void WriteSvc(CodeWriter writer, RegisterAllocator regAlloc, TailMerger tailMerger, int spillBaseOffset, uint pc, uint svcId)
{
Assembler asm = new(writer);
WriteCall(ref asm, regAlloc, GetSvcHandlerPtr(), skipContext: true, spillBaseOffset, null, pc, svcId);
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, skipContext: true, spillBaseOffset);
}
public static void WriteUdf(CodeWriter writer, RegisterAllocator regAlloc, TailMerger tailMerger, int spillBaseOffset, uint pc, uint imm)
{
Assembler asm = new(writer);
WriteCall(ref asm, regAlloc, GetUdfHandlerPtr(), skipContext: true, spillBaseOffset, null, pc, imm);
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, skipContext: true, spillBaseOffset);
}
public static void WriteReadCntpct(CodeWriter writer, RegisterAllocator regAlloc, int spillBaseOffset, int rt, int rt2)
{
Assembler asm = new(writer);
uint resultMask = (1u << rt) | (1u << rt2);
int tempRegister = 0;
while ((resultMask & (1u << tempRegister)) != 0 && tempRegister < 32)
{
tempRegister++;
}
Debug.Assert(tempRegister < 32);
WriteSpill(ref asm, regAlloc, resultMask, skipContext: false, spillBaseOffset, tempRegister);
Operand rn = Register(tempRegister);
asm.Mov(rn, (ulong)GetCntpctEl0Ptr());
asm.Blr(rn);
if (rt != rt2)
{
asm.Lsr(Register(rt2), Register(0), InstEmitCommon.Const(32));
}
asm.Mov(Register(rt, OperandType.I32), Register(0, OperandType.I32)); // Zero-extend.
WriteFill(ref asm, regAlloc, resultMask, skipContext: false, spillBaseOffset, tempRegister);
}
public static void WriteSyncPoint(CodeWriter writer, RegisterAllocator regAlloc, TailMerger tailMerger, int spillBaseOffset)
{
Assembler asm = new(writer);
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, skipContext: false, spillBaseOffset);
}
private static void WriteSyncPoint(CodeWriter writer, ref Assembler asm, RegisterAllocator regAlloc, TailMerger tailMerger, bool skipContext, int spillBaseOffset)
{
int tempRegister = regAlloc.AllocateTempGprRegister();
Operand rt = Register(tempRegister, OperandType.I32);
asm.LdrRiUn(rt, Register(regAlloc.FixedContextRegister), NativeContextOffsets.CounterOffset);
int branchIndex = writer.InstructionPointer;
asm.Cbnz(rt, 0);
WriteSpill(ref asm, regAlloc, 1u << tempRegister, skipContext, spillBaseOffset, tempRegister);
Operand rn = Register(tempRegister == 0 ? 1 : 0);
asm.Mov(rn, (ulong)CheckSynchronizationPtr());
asm.Blr(rn);
tailMerger.AddConditionalZeroReturn(writer, asm, Register(0, OperandType.I32));
WriteFill(ref asm, regAlloc, 1u << tempRegister, skipContext, spillBaseOffset, tempRegister);
asm.LdrRiUn(rt, Register(regAlloc.FixedContextRegister), NativeContextOffsets.CounterOffset);
uint branchInst = writer.ReadInstructionAt(branchIndex);
writer.WriteInstructionAt(branchIndex, branchInst | (((uint)(writer.InstructionPointer - branchIndex) & 0x7ffff) << 5));
asm.Sub(rt, rt, new Operand(OperandKind.Constant, OperandType.I32, 1));
asm.StrRiUn(rt, Register(regAlloc.FixedContextRegister), NativeContextOffsets.CounterOffset);
regAlloc.FreeTempGprRegister(tempRegister);
}
private static void WriteCall(
ref Assembler asm,
RegisterAllocator regAlloc,
IntPtr funcPtr,
bool skipContext,
int spillBaseOffset,
int? resultRegister,
params ulong[] callArgs)
{
uint resultMask = 0u;
if (resultRegister.HasValue)
{
resultMask = 1u << resultRegister.Value;
}
int tempRegister = callArgs.Length;
if (resultRegister.HasValue && tempRegister == resultRegister.Value)
{
tempRegister++;
}
WriteSpill(ref asm, regAlloc, resultMask, skipContext, spillBaseOffset, tempRegister);
// We only support up to 7 arguments right now.
// ABI defines the first 8 integer arguments to be passed on registers X0-X7.
// We need at least one register to put the function address on, so that reduces the number of
// registers we can use for that by one.
Debug.Assert(callArgs.Length < 8);
for (int index = 0; index < callArgs.Length; index++)
{
asm.Mov(Register(index), callArgs[index]);
}
Operand rn = Register(tempRegister);
asm.Mov(rn, (ulong)funcPtr);
asm.Blr(rn);
if (resultRegister.HasValue && resultRegister.Value != 0)
{
asm.Mov(Register(resultRegister.Value), Register(0));
}
WriteFill(ref asm, regAlloc, resultMask, skipContext, spillBaseOffset, tempRegister);
}
private static void WriteSpill(ref Assembler asm, RegisterAllocator regAlloc, uint exceptMask, bool skipContext, int spillOffset, int tempRegister)
{
WriteSpillOrFill(ref asm, regAlloc, skipContext, exceptMask, spillOffset, tempRegister, spill: true);
}
private static void WriteFill(ref Assembler asm, RegisterAllocator regAlloc, uint exceptMask, bool skipContext, int spillOffset, int tempRegister)
{
WriteSpillOrFill(ref asm, regAlloc, skipContext, exceptMask, spillOffset, tempRegister, spill: false);
}
private static void WriteSpillOrFill(
ref Assembler asm,
RegisterAllocator regAlloc,
bool skipContext,
uint exceptMask,
int spillOffset,
int tempRegister,
bool spill)
{
uint gprMask = regAlloc.UsedGprsMask & ~(AbiConstants.GprCalleeSavedRegsMask | exceptMask);
if (skipContext)
{
gprMask &= ~Compiler.UsableGprsMask;
}
if (!spill)
{
// We must reload the status register before reloading the GPRs,
// since we might otherwise trash one of them by using it as temp register.
Operand rt = Register(tempRegister, OperandType.I32);
asm.LdrRiUn(rt, Register(SpIndex), spillOffset + BitOperations.PopCount(gprMask) * 8);
asm.MsrNzcv(rt);
}
while (gprMask != 0)
{
int reg = BitOperations.TrailingZeroCount(gprMask);
if (reg < 31 && (gprMask & (2u << reg)) != 0 && spillOffset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
{
if (spill)
{
asm.StpRiUn(Register(reg), Register(reg + 1), Register(SpIndex), spillOffset);
}
else
{
asm.LdpRiUn(Register(reg), Register(reg + 1), Register(SpIndex), spillOffset);
}
gprMask &= ~(3u << reg);
spillOffset += 16;
}
else
{
if (spill)
{
asm.StrRiUn(Register(reg), Register(SpIndex), spillOffset);
}
else
{
asm.LdrRiUn(Register(reg), Register(SpIndex), spillOffset);
}
gprMask &= ~(1u << reg);
spillOffset += 8;
}
}
if (spill)
{
Operand rt = Register(tempRegister, OperandType.I32);
asm.MrsNzcv(rt);
asm.StrRiUn(rt, Register(SpIndex), spillOffset);
}
spillOffset += 8;
if ((spillOffset & 8) != 0)
{
spillOffset += 8;
}
uint fpSimdMask = regAlloc.UsedFpSimdMask;
if (skipContext)
{
fpSimdMask &= ~Compiler.UsableFpSimdMask;
}
while (fpSimdMask != 0)
{
int reg = BitOperations.TrailingZeroCount(fpSimdMask);
if (reg < 31 && (fpSimdMask & (2u << reg)) != 0 && spillOffset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
{
if (spill)
{
asm.StpRiUn(Register(reg, OperandType.V128), Register(reg + 1, OperandType.V128), Register(SpIndex), spillOffset);
}
else
{
asm.LdpRiUn(Register(reg, OperandType.V128), Register(reg + 1, OperandType.V128), Register(SpIndex), spillOffset);
}
fpSimdMask &= ~(3u << reg);
spillOffset += 32;
}
else
{
if (spill)
{
asm.StrRiUn(Register(reg, OperandType.V128), Register(SpIndex), spillOffset);
}
else
{
asm.LdrRiUn(Register(reg, OperandType.V128), Register(SpIndex), spillOffset);
}
fpSimdMask &= ~(1u << reg);
spillOffset += 16;
}
}
}
public static Operand Register(int register, OperandType type = OperandType.I64)
{
return new Operand(register, RegisterType.Integer, type);
}
}
}

View File

@@ -0,0 +1,95 @@
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitVfpArithmetic
{
public static void VabsF(CodeGenContext context, uint rd, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FabsFloat);
}
public static void VaddF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarBinaryF(context, rd, rn, rm, size, context.Arm64Assembler.FaddFloat);
}
public static void VdivF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarBinaryF(context, rd, rn, rm, size, context.Arm64Assembler.FdivFloat);
}
public static void VfmaF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarTernaryRdF(context, rd, rn, rm, size, context.Arm64Assembler.FmaddFloat);
}
public static void VfmsF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarTernaryRdF(context, rd, rn, rm, size, context.Arm64Assembler.FmsubFloat);
}
public static void VfnmaF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarTernaryRdF(context, rd, rn, rm, size, context.Arm64Assembler.FnmaddFloat);
}
public static void VfnmsF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarTernaryRdF(context, rd, rn, rm, size, context.Arm64Assembler.FnmsubFloat);
}
public static void Vmaxnm(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarBinaryF(context, rd, rn, rm, size, context.Arm64Assembler.FmaxnmFloat);
}
public static void Vminnm(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarBinaryF(context, rd, rn, rm, size, context.Arm64Assembler.FminnmFloat);
}
public static void VmlaF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarTernaryMulNegRdF(context, rd, rn, rm, size, negD: false, negProduct: false);
}
public static void VmlsF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarTernaryMulNegRdF(context, rd, rn, rm, size, negD: false, negProduct: true);
}
public static void VmulF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarBinaryF(context, rd, rn, rm, size, context.Arm64Assembler.FmulFloat);
}
public static void VnegF(CodeGenContext context, uint rd, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FnegFloat);
}
public static void VnmlaF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarTernaryMulNegRdF(context, rd, rn, rm, size, negD: true, negProduct: true);
}
public static void VnmlsF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarTernaryMulNegRdF(context, rd, rn, rm, size, negD: true, negProduct: false);
}
public static void VnmulF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarBinaryF(context, rd, rn, rm, size, context.Arm64Assembler.FnmulFloat);
}
public static void VsqrtF(CodeGenContext context, uint rd, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FsqrtFloat);
}
public static void VsubF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarBinaryF(context, rd, rn, rm, size, context.Arm64Assembler.FsubFloat);
}
}
}

View File

@@ -0,0 +1,133 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using System;
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitVfpCompare
{
public static void VcmpI(CodeGenContext context, uint cond, uint rd, uint size)
{
EmitVcmpVcmpe(context, cond, rd, 0, size, zero: true, e: false);
}
public static void VcmpR(CodeGenContext context, uint cond, uint rd, uint rm, uint size)
{
EmitVcmpVcmpe(context, cond, rd, rm, size, zero: false, e: false);
}
public static void VcmpeI(CodeGenContext context, uint cond, uint rd, uint size)
{
EmitVcmpVcmpe(context, cond, rd, 0, size, zero: true, e: true);
}
public static void VcmpeR(CodeGenContext context, uint cond, uint rd, uint rm, uint size)
{
EmitVcmpVcmpe(context, cond, rd, rm, size, zero: false, e: true);
}
private static void EmitVcmpVcmpe(CodeGenContext context, uint cond, uint rd, uint rm, uint size, bool zero, bool e)
{
Debug.Assert(size == 1 || size == 2 || size == 3);
bool singleRegs = size != 3;
uint ftype = size ^ 2u;
uint opc = zero ? 1u : 0u;
using ScopedRegister rdReg = InstEmitNeonCommon.MoveScalarToSide(context, rd, singleRegs);
ScopedRegister rmReg;
Operand rmOrZero;
if (zero)
{
rmReg = default;
rmOrZero = new Operand(0, RegisterType.Vector, OperandType.V128);
}
else
{
rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, singleRegs);
rmOrZero = rmReg.Operand;
}
using ScopedRegister oldFlags = context.RegisterAllocator.AllocateTempGprRegisterScoped();
bool canPeepholeOptimize = CanFuseVcmpVmrs(context, cond);
if (!canPeepholeOptimize)
{
InstEmitCommon.GetCurrentFlags(context, oldFlags.Operand);
}
if (e)
{
context.Arm64Assembler.FcmpeFloat(rdReg.Operand, rmOrZero, opc, ftype);
}
else
{
context.Arm64Assembler.FcmpFloat(rdReg.Operand, rmOrZero, opc, ftype);
}
// Save result flags from the FCMP operation on FPSCR register, then restore the old flags if needed.
WriteUpdateFpsrNzcv(context);
if (!canPeepholeOptimize)
{
InstEmitCommon.RestoreNzcvFlags(context, oldFlags.Operand);
}
if (!zero)
{
rmReg.Dispose();
}
}
private static void WriteUpdateFpsrNzcv(CodeGenContext context)
{
using ScopedRegister fpsrRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
context.Arm64Assembler.LdrRiUn(fpsrRegister.Operand, ctx, NativeContextOffsets.FpFlagsBaseOffset);
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
context.Arm64Assembler.Bfi(fpsrRegister.Operand, flagsRegister.Operand, 28, 4);
context.Arm64Assembler.StrRiUn(fpsrRegister.Operand, ctx, NativeContextOffsets.FpFlagsBaseOffset);
}
private static bool CanFuseVcmpVmrs(CodeGenContext context, uint vcmpCond)
{
// Conditions might be different for the VCMP and VMRS instructions if they are inside a IT block,
// we don't bother to check right now, so just always skip if inside an IT block.
if (context.InITBlock)
{
return false;
}
InstInfo nextInfo = context.PeekNextInstruction();
// We're looking for a VMRS instructions.
if (nextInfo.Name != InstName.Vmrs)
{
return false;
}
// Conditions must match.
if (vcmpCond != (nextInfo.Encoding >> 28))
{
return false;
}
// Reg must be 1, Rt must be PC indicating VMRS to PSTATE.NZCV.
if (((nextInfo.Encoding >> 16) & 0xf) != 1 || ((nextInfo.Encoding >> 12) & 0xf) != RegisterUtils.PcRegister)
{
return false;
}
context.SetSkipNextInstruction();
return true;
}
}
}

View File

@@ -0,0 +1,305 @@
using System;
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitVfpConvert
{
public static void Vcvta(CodeGenContext context, uint rd, uint rm, bool op, uint size)
{
if (size == 3)
{
// F64 -> S32/U32 conversion on SIMD is not supported, so we convert it to a GPR, then insert it back into the SIMD register.
if (op)
{
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtasFloat);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtauFloat);
}
}
else if (op)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtasS, context.Arm64Assembler.FcvtasSH);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtauS, context.Arm64Assembler.FcvtauSH);
}
}
public static void Vcvtb(CodeGenContext context, uint rd, uint rm, uint sz, uint op)
{
EmitVcvtbVcvtt(context, rd, rm, sz, op, top: false);
}
public static void Vcvtm(CodeGenContext context, uint rd, uint rm, bool op, uint size)
{
if (size == 3)
{
// F64 -> S32/U32 conversion on SIMD is not supported, so we convert it to a GPR, then insert it back into the SIMD register.
if (op)
{
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtmsFloat);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtmuFloat);
}
}
else if (op)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtmsS, context.Arm64Assembler.FcvtmsSH);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtmuS, context.Arm64Assembler.FcvtmuSH);
}
}
public static void Vcvtn(CodeGenContext context, uint rd, uint rm, bool op, uint size)
{
if (size == 3)
{
// F64 -> S32/U32 conversion on SIMD is not supported, so we convert it to a GPR, then insert it back into the SIMD register.
if (op)
{
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtnsFloat);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtnuFloat);
}
}
else if (op)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtnsS, context.Arm64Assembler.FcvtnsSH);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtnuS, context.Arm64Assembler.FcvtnuSH);
}
}
public static void Vcvtp(CodeGenContext context, uint rd, uint rm, bool op, uint size)
{
if (size == 3)
{
// F64 -> S32/U32 conversion on SIMD is not supported, so we convert it to a GPR, then insert it back into the SIMD register.
if (op)
{
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtpsFloat);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtpuFloat);
}
}
else if (op)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtpsS, context.Arm64Assembler.FcvtpsSH);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtpuS, context.Arm64Assembler.FcvtpuSH);
}
}
public static void VcvtDs(CodeGenContext context, uint rd, uint rm, uint size)
{
bool doubleToSingle = size == 3;
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
if (doubleToSingle)
{
// Double to single.
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, false);
context.Arm64Assembler.FcvtFloat(tempRegister.Operand, rmReg.Operand, 0, 1);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, true);
}
else
{
// Single to double.
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, true);
context.Arm64Assembler.FcvtFloat(tempRegister.Operand, rmReg.Operand, 1, 0);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
}
}
public static void VcvtIv(CodeGenContext context, uint rd, uint rm, bool unsigned, uint size)
{
if (size == 3)
{
// F64 -> S32/U32 conversion on SIMD is not supported, so we convert it to a GPR, then insert it back into the SIMD register.
if (unsigned)
{
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtzuFloatInt);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtzsFloatInt);
}
}
else
{
if (unsigned)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtzuIntS, context.Arm64Assembler.FcvtzuIntSH);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtzsIntS, context.Arm64Assembler.FcvtzsIntSH);
}
}
}
public static void VcvtVi(CodeGenContext context, uint rd, uint rm, bool unsigned, uint size)
{
if (size == 3)
{
// S32/U32 -> F64 conversion on SIMD is not supported, so we convert it to a GPR, then insert it back into the SIMD register.
if (unsigned)
{
InstEmitNeonCommon.EmitScalarUnaryFromGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.UcvtfFloatInt);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryFromGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.ScvtfFloatInt);
}
}
else
{
if (unsigned)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.UcvtfIntS, context.Arm64Assembler.UcvtfIntSH);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.ScvtfIntS, context.Arm64Assembler.ScvtfIntSH);
}
}
}
public static void VcvtXv(CodeGenContext context, uint rd, uint imm5, bool sx, uint sf, uint op, bool u)
{
Debug.Assert(op >> 1 == 0);
bool unsigned = u;
bool toFixed = op == 1;
uint size = sf;
uint fbits = Math.Clamp((sx ? 32u : 16u) - imm5, 1, 8u << (int)size);
if (toFixed)
{
if (unsigned)
{
InstEmitNeonCommon.EmitScalarUnaryFixedF(context, rd, rd, fbits, size, is16Bit: false, context.Arm64Assembler.FcvtzuFixS);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryFixedF(context, rd, rd, fbits, size, is16Bit: false, context.Arm64Assembler.FcvtzsFixS);
}
}
else
{
if (unsigned)
{
InstEmitNeonCommon.EmitScalarUnaryFixedF(context, rd, rd, fbits, size, is16Bit: !sx, context.Arm64Assembler.UcvtfFixS);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryFixedF(context, rd, rd, fbits, size, is16Bit: !sx, context.Arm64Assembler.ScvtfFixS);
}
}
}
public static void VcvtrIv(CodeGenContext context, uint rd, uint rm, uint op, uint size)
{
bool unsigned = (op & 1) == 0;
Debug.Assert(size == 1 || size == 2 || size == 3);
bool singleRegs = size != 3;
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, singleRegs);
using ScopedRegister tempRegister = InstEmitNeonCommon.PickSimdRegister(context.RegisterAllocator, rmReg);
// Round using the FPCR rounding mode first, since the FCVTZ instructions will use the round to zero mode.
context.Arm64Assembler.FrintiFloat(tempRegister.Operand, rmReg.Operand, size ^ 2u);
if (unsigned)
{
if (size == 1)
{
context.Arm64Assembler.FcvtzuIntSH(tempRegister.Operand, tempRegister.Operand);
}
else
{
context.Arm64Assembler.FcvtzuIntS(tempRegister.Operand, tempRegister.Operand, size & 1);
}
}
else
{
if (size == 1)
{
context.Arm64Assembler.FcvtzsIntSH(tempRegister.Operand, tempRegister.Operand);
}
else
{
context.Arm64Assembler.FcvtzsIntS(tempRegister.Operand, tempRegister.Operand, size & 1);
}
}
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, singleRegs);
}
public static void Vcvtt(CodeGenContext context, uint rd, uint rm, uint sz, uint op)
{
EmitVcvtbVcvtt(context, rd, rm, sz, op, top: true);
}
public static void EmitVcvtbVcvtt(CodeGenContext context, uint rd, uint rm, uint sz, uint op, bool top)
{
bool usesDouble = sz == 1;
bool convertFromHalf = op == 0;
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
if (convertFromHalf)
{
// Half to single/double.
using ScopedRegister rmReg = InstEmitNeonCommon.Move16BitScalarToSide(context, rm, top);
context.Arm64Assembler.FcvtFloat(tempRegister.Operand, rmReg.Operand, usesDouble ? 1u : 0u, 3u);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, !usesDouble);
}
else
{
// Single/double to half.
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, !usesDouble);
context.Arm64Assembler.FcvtFloat(tempRegister.Operand, rmReg.Operand, 3u, usesDouble ? 1u : 0u);
InstEmitNeonCommon.Insert16BitResult(context, tempRegister.Operand, rd, top);
}
}
}
}

View File

@@ -0,0 +1,22 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitVfpMove
{
public static void Vsel(CodeGenContext context, uint rd, uint rn, uint rm, uint cc, uint size)
{
bool singleRegs = size != 3;
uint cond = (cc << 2) | ((cc & 2) ^ ((cc << 1) & 2));
using ScopedRegister rnReg = InstEmitNeonCommon.MoveScalarToSide(context, rn, singleRegs);
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, singleRegs);
using ScopedRegister tempRegister = InstEmitNeonCommon.PickSimdRegister(context.RegisterAllocator, rnReg, rmReg);
context.Arm64Assembler.FcselFloat(tempRegister.Operand, rnReg.Operand, cond, rmReg.Operand, size ^ 2u);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, singleRegs);
}
}
}

View File

@@ -0,0 +1,40 @@
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitVfpRound
{
public static void Vrinta(CodeGenContext context, uint rd, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FrintaFloat);
}
public static void Vrintm(CodeGenContext context, uint rd, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FrintmFloat);
}
public static void Vrintn(CodeGenContext context, uint rd, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FrintnFloat);
}
public static void Vrintp(CodeGenContext context, uint rd, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FrintpFloat);
}
public static void Vrintr(CodeGenContext context, uint rd, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FrintiFloat);
}
public static void Vrintx(CodeGenContext context, uint rd, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FrintxFloat);
}
public static void Vrintz(CodeGenContext context, uint rd, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FrintzFloat);
}
}
}