-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[TargetInstrInfo] enable foldMemoryOperand for InlineAsm #70743
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-ir Author: Nick Desaulniers (nickdesaulniers) Changes[TargetInstrInfo] enable foldMemoryOperand for InlineAsm foldMemoryOperand looks at pairs of instructions (generally a load to A prior commit added the ability to mark such a MachineOperand as Thanks to @topperc who suggested this at last years LLVM US Dev Meeting Full diff: https://github.com/llvm/llvm-project/pull/70743.diff 5 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 4877f43e8578d1c..93e8ff389d65673 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -1364,6 +1364,9 @@ class MachineInstr
return getOpcode() == TargetOpcode::INLINEASM ||
getOpcode() == TargetOpcode::INLINEASM_BR;
}
+ /// Returns true if the memory operand can be folded. Does so by checking the
+ /// InlineAsm::Flag immediate operand at OpId - 1.
+ bool mayFoldInlineAsmMemOp(unsigned OpId) const;
bool isStackAligningInlineAsm() const;
InlineAsm::AsmDialect getInlineAsmDialect() const;
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 8e7499ac626a747..ba98f52c27bbd2e 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2186,6 +2186,16 @@ class TargetInstrInfo : public MCInstrInfo {
// Get the call frame size just before MI.
unsigned getCallFrameSizeAt(MachineInstr &MI) const;
+ /// Fills in the necessary MachineOperands to refer to a frame index.
+ /// The best way to understand this is to print `asm(""::"m"(x));` after
+ /// finalize-isel. Example:
+ /// INLINEASM ... 262190 /* mem:m */, %stack.0.x.addr, 1, $noreg, 0, $noreg ...
+ /// we would add placeholders for: ^ ^ ^ ^
+ virtual void
+ getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops) const {
+ llvm_unreachable("unknown number of operands necessary");
+ }
+
private:
mutable std::unique_ptr<MIRFormatter> Formatter;
unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
diff --git a/llvm/include/llvm/IR/InlineAsm.h b/llvm/include/llvm/IR/InlineAsm.h
index 969ad42816a7e52..2d395a53608b0b7 100644
--- a/llvm/include/llvm/IR/InlineAsm.h
+++ b/llvm/include/llvm/IR/InlineAsm.h
@@ -291,18 +291,23 @@ class InlineAsm final : public Value {
// Bits 30-16 - A ConstraintCode:: value indicating the original
// constraint code. (MemConstraintCode)
// Else:
- // Bits 30-16 - The register class ID to use for the operand. (RegClass)
+ // Bits 29-16 - The register class ID to use for the operand. (RegClass)
+ // Bit 30 - If the register is permitted to be spilled.
+ // (RegMayBeSpilled)
+ // Defaults to false "r", may be set for constraints like
+ // "rm" (or "g").
//
- // As such, MatchedOperandNo, MemConstraintCode, and RegClass are views of
- // the same slice of bits, but are mutually exclusive depending on the
- // fields IsMatched then KindField.
+ // As such, MatchedOperandNo, MemConstraintCode, and
+ // (RegClass+RegMayBeSpilled) are views of the same slice of bits, but are
+ // mutually exclusive depending on the fields IsMatched then KindField.
class Flag {
uint32_t Storage;
using KindField = Bitfield::Element<Kind, 0, 3, Kind::Func>;
using NumOperands = Bitfield::Element<unsigned, 3, 13>;
using MatchedOperandNo = Bitfield::Element<unsigned, 16, 15>;
using MemConstraintCode = Bitfield::Element<ConstraintCode, 16, 15, ConstraintCode::Max>;
- using RegClass = Bitfield::Element<unsigned, 16, 15>;
+ using RegClass = Bitfield::Element<unsigned, 16, 14>;
+ using RegMayBeSpilled = Bitfield::Element<bool, 30, 1>;
using IsMatched = Bitfield::Element<bool, 31, 1>;
@@ -413,6 +418,26 @@ class InlineAsm final : public Value {
"Flag is not a memory or function constraint!");
Bitfield::set<MemConstraintCode>(Storage, ConstraintCode::Unknown);
}
+
+ /// Set a bit to denote that while this operand is some kind of register
+ /// (use, def, ...), a memory flag did appear in the original constraint
+ /// list. This is set by the instruction selection framework, and consumed
+ /// by the register allocator. While the register allocator is generally
+ /// responsible for spilling registers, we need to be able to distinguish
+ /// between registers that the register allocator has permission to spill
+ /// ("rm") vs ones it does not ("r"). This is because the inline asm may use
+ /// instructions which don't support memory addressing modes for that
+ /// operand.
+ void setRegMayBeSpilled(bool B) {
+ assert((isRegDefKind() || isRegDefEarlyClobberKind() || isRegUseKind()) &&
+ "Must be reg");
+ Bitfield::set<RegMayBeSpilled>(Storage, B);
+ }
+ bool getRegMayBeSpilled() const {
+ assert((isRegDefKind() || isRegDefEarlyClobberKind() || isRegUseKind()) &&
+ "Must be reg");
+ return Bitfield::get<RegMayBeSpilled>(Storage);
+ }
};
static std::vector<StringRef> getExtraInfoNames(unsigned ExtraInfo) {
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 048563cc2bcc4e4..92c789e85a205b4 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -1792,6 +1792,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (F.isUseOperandTiedToDef(TiedTo))
OS << " tiedto:$" << TiedTo;
+ if ((F.isRegDefKind() || F.isRegDefEarlyClobberKind() ||
+ F.isRegUseKind()) &&
+ F.getRegMayBeSpilled()) {
+ OS << " spillable";
+ }
+
OS << ']';
// Compute the index of the next operand descriptor.
@@ -2526,3 +2532,20 @@ void MachineInstr::insert(mop_iterator InsertBefore,
tieOperands(Tie1, Tie2);
}
}
+
+bool MachineInstr::mayFoldInlineAsmMemOp(unsigned OpId) const {
+ assert(OpId && "expected non-zero operand id");
+ assert(isInlineAsm() && "should only be used on inline asm");
+
+ if (!getOperand(OpId).isReg())
+ return false;
+
+ const MachineOperand &MD = getOperand(OpId - 1);
+ if (!MD.isImm())
+ return false;
+
+ InlineAsm::Flag F(MD.getImm());
+ if (F.isRegUseKind() || F.isRegDefKind() || F.isRegDefEarlyClobberKind())
+ return F.getRegMayBeSpilled();
+ return false;
+}
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index fe7efb73a2dce83..5252eeaadb2aeb2 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -565,6 +565,64 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
return NewMI;
}
+static void foldInlineAsmMemOperand(MachineInstr *MI, unsigned OpNo, int FI,
+ const TargetInstrInfo &TII) {
+ MachineOperand &MO = MI->getOperand(OpNo);
+ const VirtRegInfo &RI = AnalyzeVirtRegInBundle(*MI, MO.getReg());
+
+ // If the machine operand is tied, untie it first.
+ if (MO.isTied()) {
+ unsigned TiedTo = MI->findTiedOperandIdx(OpNo);
+ MI->untieRegOperand(OpNo);
+ // Intentional recursion!
+ foldInlineAsmMemOperand(MI, TiedTo, FI, TII);
+ }
+
+ // Change the operand from a register to a frame index.
+ MO.ChangeToFrameIndex(FI, MO.getTargetFlags());
+
+ SmallVector<MachineOperand, 4> NewOps;
+ TII.getFrameIndexOperands(NewOps);
+ assert(!NewOps.empty() && "getFrameIndexOperands didn't create any operands");
+ MI->insert(MI->operands_begin() + OpNo + 1, NewOps);
+
+ // Change the previous operand to a MemKind InlineAsm::Flag. The second param
+ // is the per-target number of operands that represent the memory operand
+ // excluding this one (MD). This includes MO.
+ InlineAsm::Flag F(InlineAsm::Kind::Mem, NewOps.size() + 1);
+ F.setMemConstraint(InlineAsm::ConstraintCode::m);
+ MachineOperand &MD = MI->getOperand(OpNo - 1);
+ MD.setImm(F);
+
+ // Update mayload/maystore metadata.
+ MachineOperand &ExtraMO = MI->getOperand(InlineAsm::MIOp_ExtraInfo);
+ if (RI.Reads)
+ ExtraMO.setImm(ExtraMO.getImm() | InlineAsm::Extra_MayLoad);
+ if (RI.Writes)
+ ExtraMO.setImm(ExtraMO.getImm() | InlineAsm::Extra_MayStore);
+}
+
+// Returns nullptr if not possible to fold.
+static MachineInstr *foldInlineAsmMemOperand(MachineInstr &MI,
+ ArrayRef<unsigned> Ops, int FI,
+ const TargetInstrInfo &TII) {
+ assert(MI.isInlineAsm() && "wrong opcode");
+ if (Ops.size() > 1)
+ return nullptr;
+ unsigned Op = Ops[0];
+ assert(Op && "should never be first operand");
+ assert(MI.getOperand(Op).isReg() && "shouldn't be folding non-reg operands");
+
+ if (!MI.mayFoldInlineAsmMemOp(Op))
+ return nullptr;
+
+ MachineInstr &NewMI = TII.duplicate(*MI.getParent(), MI.getIterator(), MI);
+
+ foldInlineAsmMemOperand(&NewMI, Op, FI, TII);
+
+ return &NewMI;
+}
+
MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
ArrayRef<unsigned> Ops, int FI,
LiveIntervals *LIS,
@@ -612,6 +670,8 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
NewMI = foldPatchpoint(MF, MI, Ops, FI, *this);
if (NewMI)
MBB->insert(MI, NewMI);
+ } else if (MI.isInlineAsm()) {
+ NewMI = foldInlineAsmMemOperand(MI, Ops, FI, *this);
} else {
// Ask the target to do the actual folding.
NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS, VRM);
@@ -683,6 +743,8 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this);
if (NewMI)
NewMI = &*MBB.insert(MI, NewMI);
+ } else if (MI.isInlineAsm() && isLoadFromStackSlot(LoadMI, FrameIndex)) {
+ NewMI = foldInlineAsmMemOperand(MI, Ops, FrameIndex, *this);
} else {
// Ask the target to do the actual folding.
NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, LIS);
@@ -1639,6 +1701,10 @@ std::string TargetInstrInfo::createMIROperandComment(
if (F.isUseOperandTiedToDef(TiedTo))
OS << " tiedto:$" << TiedTo;
+ if ((F.isRegDefKind() || F.isRegDefEarlyClobberKind() || F.isRegUseKind()) &&
+ F.getRegMayBeSpilled())
+ OS << " spillable";
+
return OS.str();
}
|
[note to reviewers: this is a stacked commit. https://github.com//pull/70738 is the parent. Only https://github.com//pull/70743/commits/90474cc62a81e8273bcf3b985917c93d6c96db6e should be reviewed in this this PR]. Marking it as a draft as such, but feel free to comment. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
// If the machine operand is tied, untie it first. | ||
if (MO.isTied()) { | ||
unsigned TiedTo = MI->findTiedOperandIdx(OpNo); | ||
MI->untieRegOperand(OpNo); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it generally correct to untie an operand?
I don't know how the tie
constraints are set for inline asm so sorry if the question is dumb!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Only registers may be tied. Note how MachineInstr::untieRegOperand()
does nothing if !MachineOperand::isReg()
and calls MachineOperand::isTied()
which asserts if !isReg()
.
Because we are folding register MachineOperand
s into Frame Indices, we must untie them, otherwise we will assert in MachineOperand::ChangeToFrameIndex
(line 582). Many of the MachineOperand::ChangeTo*
methods assert if the input is a tied register and you try to change them (without manually untieing them first).
This is also the reason why I had to "remember" which operands were tied when splicing new ones in a41b149 ; if you splice new operands in the middle, you need to remember the existing ties, untie them, move them, insert new operands, then manually retie them.
Additional context on what even is a tied operand in the context of inline asm.
For inline asm, you can have inputs, outputs, in/outs, or 2 variables where one is an input, one is an output, but for the sake of register allocation the location chosen for the inline asm is one and the same.
asm ("# %0 %1":"=r"(x):"0"(y));
x
is an output, y
is an input. But they will be allocated the same storage location for the duration of the inline asm; %0
will be equivalent to %1
.
I don't know when that's ever preferable to just using one variable.
asm ("# %0":"+r"(x));
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Only registers may be tied.
Yes, what I'm saying is tied operands and untied operands don't have the same semantic and I was wondering if this code could break that.
What I was thinking is, with tied operand, we'll have something like:
a = a + ...
When we untie and fold, we'll end up with:
a = @a + ...
At this point, a
and @a
don't hold the same value, so we broke the tied operand semantic.
Now, I think this is not a problem for regular code because if @a
is used somewhere else, regalloc should have produced the correct store operation, i.e.,
a = @a + ...
store a, @a <-- values properly tied again
My concern is around inline asm that regalloc cannot unbox.
What happens if the inline asm statement is a sequence of instructions:
a = a + ...
... = a
That we turn into:
a = @a
... = @a <-- @a != a
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: A safe temporary solution could be to disallow the folding (i.e., use the m
variant only) for tied operands.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just realized that we call this recursively on the tied operand, so we would produce:
@a = @a + ...
So in theory this is fine.
In practice, I don't believe this works all the time because IIRC, for instance, x86 cannot have a memory operand both as definition and as argument of the same instruction.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: A safe temporary solution could be to disallow the folding (i.e., use the m variant only) for tied operands.
No. Support for tied operands is table stakes for resolving #20571 because clang will lower "+" modifiers ("in+out" variables) to tied operands. We need to support:
- inputs
- outputs
- in/outs
so 3 is table stakes.
In practice, I don't believe this works all the time because IIRC, for instance, x86 cannot have a memory operand both as definition and as argument of the same instruction.
If you have tied operands for inline asm, you may have 2 distinct variables (one as input, one as output), but for the duration of the inline asm blob, they have 1 (the same) storage location (register or stack slot). So in the inline asm, you'd only refer to the 1 storage location (be it register or memory) and not both values. So it doesn't matter if x86 cannot have a memory def and use as operands on the same instr. My example above wasn't clear in that regard, perhaps another example:
long foo (long x) {
long y;
asm (
"mov %0, r8\n\t"
"mov r8, %0"
:"=r"(y):"0"(x):"r8");
return y;
}
in the case, %1
is not referenced in the inline asm, only %0
is. But if it were, it would have the same value as %0
because that's what tying is (at least my understanding of it, which may be wrong).
what I'm saying is tied operands and untied operands don't have the same semantic
That is correct but I think only so for register operands. If you have 2 memory operands which refer to the same stack slot, they are in effect tied. IIUC, tied operands only matter for the purpose of register allocation. If an input value is tied to an output value, tieing them just means "put them in the same place (be that a register or memory location)."
So in MIR if we have:
INLINEASM &"# $0 $1", 0 /* attdialect */, 1076101130 /* regdef:GR32 spillable */, def %0, 2147483657 /* reguse tiedto:$0 */, %1(tied-def 3)
(i.e. %0
is tied to %1
) then that means the developer is asking the register allocator "please assign %0
and %1
to the same storage location (be that register or stack slot) for the duration of the INLINEASM blob.
Given that example as input, after my change (this change), we first notice that %0
is tied, so we untie it, and transform the above (the recursive call) to:
INLINEASM &"# $0 $1", 0 /* attdialect */, 1076101130 /* regdef:GR32 spillable */, def %0, 262190 /* mem:m */, %stack.0, 1, $noreg, 0, $noreg
which replaces %1
(a tied register def) with a frame index. Then upon return, we continue transforming %0
replacing that with the same frame index (in this case %stack.o
):
INLINEASM &"# $0 $1", 0 /* attdialect */, 262190 /* mem:m */, %stack.0, 1, $noreg, 0, $noreg, 262190 /* mem:m */, %stack.0, 1, $noreg, 0, $noreg
(this is better reflected in the test added in the child patch in #70832)
I just realized that we call this recursively on the tied operand, so we would produce:
@A = @A + ...
Yes! Essentially if the output is tied to the input, then @a = @a + ...
is produced. If it's not tied, then you'd get a = @a + ...
which is maybe what you want (for an input or output, but not an in/out).
It's still possible that I'm completely misunderstanding something about tied operands; I hope the above cleared up some questions?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
TL;DR Sounds fine to me, we're not creating new problems.
Agree on all what you're saying but my point is when we replace operands with memory locations, the resulting inline assembly may not be correct or put differently, I don't see where we check that the resulting asm is correct.
I actually realize now that this is not specific to tied operands.
E.g., consider the following inline asm constraints on x86 (with someop
being something that makes sense)
asm (
"someop %0, %1, %2"
: "=rm"(dst) : "rm"(a1), "rm"(a2));
The resulting code will be fine as long as regalloc spills at most one operand. If it spills two or more, we'll have more than one memory location on someop
. Assuming someop is a single x86 operation, I believe this will produce something invalid.
With tied operands, I believe we just make this more likely to happen, but then considering that previously we were just using the m
constraint in these cases, we already had this issue anyway.
Therefore, nothing to worry about, I guess this is the responsibility of the developers to set something that make sense.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The resulting code will be fine as long as regalloc spills at most one operand. If it spills two or more, we'll have more than one memory location on someop. Assuming someop is a single x86 operation, I believe this will produce something invalid.
Correct, but for that case, I would say that the inline asm is invalid and should only use m
on one or two of the three operands:
asm (
"someop %0, %1, %2"
- : "=rm"(dst) : "rm"(a1), "rm"(a2));
+ : "=rm"(dst) : "r"(a1), "r"(a2));
That's just another instance of "the constraints were wrongly specified, but you didn't notice until there was register pressure (and the compiler was forced to choose m rather than r)." And that's not specific to clang or GCC.
With tied operands, I believe we just make this more likely to happen
A lot of the cases you're thinking of seem to be related to any given single x86 instruction within inline asm likely not supporting more than 1 (or is it 2) memory operands. But for tied operands, you likely have more than one instruction in your inline asm, so that's less likely to be problematic.
asm (
"mov %1, %r8\n\t"
"mov %r8, %0"
: "=rm"(output) : "0"(input));
so if we can't allocate output
to a register, and input
is tied, the single stack slot chosen for the both of them is not an issue, since they are used in different instructions (which happen to support register or memory for those operands).
My point is that the use of tied operands is likely to be with inline asm w/ more than one instruction, so the limitations of most modern CISC ISAs (is x86 modern...well, relative to other CISC) around how many memory operands they can have I think is not a concern. (If it is, because only one instruction is being used, then it's probably a mistake for the programmer to specify "rm" rather than "r").
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Correct, but for that case, I would say that the inline asm is invalid [...]
Thanks for the confirmation.
Agree on everything you said :).
(And already approved the PR)
90474cc
to
efd5ebf
Compare
rebased on top of 778a484; ready for review |
foldMemoryOperand looks at pairs of instructions (generally a load to virt reg then use of the virtreg, or def of a virtreg then a store) and attempts to combine them. This can reduce register pressure. A prior commit added the ability to mark such a MachineOperand as spillable. In terms of INLINEASM, this means that "rm" was used (rather than just "r") to denote that the INLINEASM may use a memory operand rather than a register operand. This effectively undoes decisions made by the instruction selection framework. Callers will be added in the register allocation frameworks. This has been tested with all of the above (which will come as follow up patches). Thanks to @topperc who suggested this at last years LLVM US Dev Meeting and @qcolombet who confirmed this was the right approach.
efd5ebf
to
4d736be
Compare
foldMemoryOperand looks at pairs of instructions (generally a load to virt reg then use of the virtreg, or def of a virtreg then a store) and attempts to combine them. This can reduce register pressure. A prior commit added the ability to mark such a MachineOperand as foldable. In terms of INLINEASM, this means that "rm" was used (rather than just "r") to denote that the INLINEASM may use a memory operand rather than a register operand. This effectively undoes decisions made by the instruction selection framework. Callers will be added in the register allocation frameworks. This has been tested with all of the above (which will come as follow up patches). Thanks to @topperc who suggested this at last years LLVM US Dev Meeting and @qcolombet who confirmed this was the right approach. Link: llvm#20571
…m#70743)" This reverts commit 99ee2db. It's causing ICEs in the ARM tests. See the comment here: llvm@99ee2db
foldMemoryOperand looks at pairs of instructions (generally a load to virt reg then use of the virtreg, or def of a virtreg then a store) and attempts to combine them. This can reduce register pressure. A prior commit added the ability to mark such a MachineOperand as foldable. In terms of INLINEASM, this means that "rm" was used (rather than just "r") to denote that the INLINEASM may use a memory operand rather than a register operand. This effectively undoes decisions made by the instruction selection framework. Callers will be added in the register allocation frameworks. This has been tested with all of the above (which will come as follow up patches). Thanks to @topperc who suggested this at last years LLVM US Dev Meeting and @qcolombet who confirmed this was the right approach. Link: llvm#20571
…m#70743)" This reverts commit 99ee2db. It's causing ICEs in the ARM tests. See the comment here: llvm@99ee2db
@bwendling reverted this in 42204c9 99ee2db#commitcomment-132962097. Trying to figure out what happened ATM. |
[TargetInstrInfo] enable foldMemoryOperand for InlineAsm
foldMemoryOperand looks at pairs of instructions (generally a load to
virt reg then use of the virtreg, or def of a virtreg then a store) and
attempts to combine them. This can reduce register pressure.
A prior commit added the ability to mark such a MachineOperand as
foldable. In terms of INLINEASM, this means that "rm" was used (rather
than just "r") to denote that the INLINEASM may use a memory operand
rather than a register operand. This effectively undoes decisions made
by the instruction selection framework. Callers will be added in the
register allocation frameworks. This has been tested with all of the
above (which will come as follow up patches).
Thanks to @topperc who suggested this at last years LLVM US Dev Meeting
and @qcolombet who confirmed this was the right approach.
Link: #20571