-
Notifications
You must be signed in to change notification settings - Fork 14.3k
SplitKit: Take register class directly from instruction definition #129727
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SplitKit: Take register class directly from instruction definition #129727
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-arm @llvm/pr-subscribers-llvm-regalloc Author: Matt Arsenault (arsenm) ChangesThis fixes an expensive chesk failure after 8476a5d. The issue Also directly query the constraint from the def instruction, with a So far I've been unable to reproduce this with a standalone MIR test. In the Patch is 20.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/129727.diff 6 Files Affected:
diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp
index 63ca45b2e69ad..332c42abfea3f 100644
--- a/llvm/lib/CodeGen/SplitKit.cpp
+++ b/llvm/lib/CodeGen/SplitKit.cpp
@@ -598,25 +598,27 @@ bool SplitEditor::rematWillIncreaseRestriction(const MachineInstr *DefMI,
if (!UseMI)
return false;
- Register Reg = Edit->getReg();
- const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ // Currently code assumes rematerialization only happens for a def at 0.
+ const unsigned DefOperandIdx = 0;
+ // We want to compute the static register class constraint for the instruction
+ // def. If it is a smaller subclass than getLargestLegalSuperClass at the use
+ // site, then rematerializing it will increase the constraints.
+ const TargetRegisterClass *DefConstrainRC =
+ DefMI->getRegClassConstraint(DefOperandIdx, &TII, &TRI);
+ if (!DefConstrainRC)
+ return false;
+
+ const TargetRegisterClass *RC = MRI.getRegClass(Edit->getReg());
// We want to find the register class that can be inflated to after the split
// occurs in recomputeRegClass
const TargetRegisterClass *SuperRC =
TRI.getLargestLegalSuperClass(RC, *MBB.getParent());
- // We want to compute the static register class constraint for the instruction
- // def. If it is a smaller subclass than getLargestLegalSuperClass at the use
- // site, then rematerializing it will increase the constraints.
- const TargetRegisterClass *DefConstrainRC =
- DefMI->getRegClassConstraintEffectForVReg(Reg, SuperRC, &TII, &TRI,
- /*ExploreBundle=*/true);
-
+ Register DefReg = DefMI->getOperand(DefOperandIdx).getReg();
const TargetRegisterClass *UseConstrainRC =
- UseMI->getRegClassConstraintEffectForVReg(Reg, SuperRC, &TII, &TRI,
+ UseMI->getRegClassConstraintEffectForVReg(DefReg, SuperRC, &TII, &TRI,
/*ExploreBundle=*/true);
-
return UseConstrainRC->hasSubClass(DefConstrainRC);
}
diff --git a/llvm/test/CodeGen/ARM/splitkit-remat-regclass-constraint-regression.ll b/llvm/test/CodeGen/ARM/splitkit-remat-regclass-constraint-regression.ll
new file mode 100644
index 0000000000000..1ef6322bb48c8
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/splitkit-remat-regclass-constraint-regression.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -stress-regalloc=2 -o - %s | FileCheck %s
+
+; Check that no register class constraint error is produced during
+; rematerialization
+
+target triple = "thumbv7-apple-ios"
+
+declare ptr @_Znwm()
+
+define void @func() {
+; CHECK-LABEL: func:
+; CHECK: @ %bb.0: @ %bb14
+; CHECK-NEXT: str lr, [sp, #-4]!
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: movs r1, #4
+; CHECK-NEXT: str r0, [r1]
+; CHECK-NEXT: movs r1, #8
+; CHECK-NEXT: str r0, [r1]
+; CHECK-NEXT: str r0, [r0]
+; CHECK-NEXT: bl __Znwm
+; CHECK-NEXT: movs r1, #0
+; CHECK-NEXT: movs r0, #4
+; CHECK-NEXT: str r1, [r0]
+; CHECK-NEXT: movs r0, #8
+; CHECK-NEXT: str r1, [r0]
+; CHECK-NEXT: str r1, [r1]
+; CHECK-NEXT: ldr lr, [sp], #4
+; CHECK-NEXT: bx lr
+bb14:
+ call void @llvm.memset.p0.i32(ptr null, i8 0, i32 12, i1 false)
+ %tmp34 = call ptr @_Znwm()
+ call void @llvm.memset.p0.i32(ptr null, i8 0, i32 12, i1 false)
+ ret void
+}
+
+declare void @llvm.memset.p0.i32(ptr writeonly captures(none), i8, i32, i1 immarg) #0
+
+attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: write) }
diff --git a/llvm/test/CodeGen/ARM/splitkit.ll b/llvm/test/CodeGen/ARM/splitkit.ll
index d9db9f7cf4151..6bf3a64b3ed3e 100644
--- a/llvm/test/CodeGen/ARM/splitkit.ll
+++ b/llvm/test/CodeGen/ARM/splitkit.ll
@@ -94,7 +94,7 @@ declare ptr @bar(ptr returned)
declare i32 @__cxa_atexit(ptr, ptr, ptr)
-declare ptr @wobble(ptr returned, ptr )
+declare ptr @wobble(ptr returned, ptr )
declare i32 @quux(...)
@@ -236,6 +236,20 @@ bbunwind:
resume { ptr, i32 } undef
}
+; CHECK-LABEL: func_reduced_remat_regclass_error:
+define void @func_reduced_remat_regclass_error(ptr %global.10, ptr %global.15) {
+bb14:
+ store i32 999, ptr %global.10, align 4
+ call void @llvm.memset.p0.i32(ptr null, i8 0, i32 12, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr null, ptr null, i32 60, i1 false)
+ %tmp34 = call ptr @_Znwm()
+ store i32 999, ptr %global.15, align 4
+ call void @llvm.memcpy.p0.p0.i32(ptr %global.10, ptr null, i32 52, i1 false)
+ call void @llvm.memset.p0.i32(ptr null, i8 0, i32 12, i1 false)
+ call void @llvm.memset.p0.i32(ptr null, i8 0, i32 12, i1 false)
+ ret void
+}
+
declare void @llvm.trap()
declare void @llvm.memcpy.p0.p0.i32(ptr , ptr , i32, i1)
diff --git a/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll b/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
index d2faed51bc502..536a1ae3b918d 100644
--- a/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
+++ b/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
@@ -241,21 +241,21 @@ define <4 x i128> @test_signed_v4i128_v4f32(<4 x float> %f) nounwind {
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: callq __fixsfti@PLT
; CHECK-NEXT: movq %rdx, %r15
-; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: xorl %r14d, %r14d
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbq %rdx, %rax
-; CHECK-NEXT: movabsq $-9223372036854775808, %r14 # imm = 0x8000000000000000
-; CHECK-NEXT: cmovbq %r14, %r15
+; CHECK-NEXT: cmovbq %r14, %rax
+; CHECK-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; CHECK-NEXT: cmovbq %rcx, %r15
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: movabsq $9223372036854775807, %rbp # imm = 0x7FFFFFFFFFFFFFFF
; CHECK-NEXT: cmovaq %rbp, %r15
; CHECK-NEXT: movq $-1, %rcx
; CHECK-NEXT: cmovaq %rcx, %rax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
-; CHECK-NEXT: cmovpq %rdx, %rax
+; CHECK-NEXT: cmovpq %r14, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: cmovpq %rdx, %r15
+; CHECK-NEXT: cmovpq %r14, %r15
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -264,16 +264,16 @@ define <4 x i128> @test_signed_v4i128_v4f32(<4 x float> %f) nounwind {
; CHECK-NEXT: movq %rdx, %r13
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: movl $0, %eax
-; CHECK-NEXT: cmovbq %rax, %r12
-; CHECK-NEXT: cmovbq %r14, %r13
+; CHECK-NEXT: cmovbq %r14, %r12
+; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; CHECK-NEXT: cmovbq %rax, %r13
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %rbp, %r13
-; CHECK-NEXT: movq $-1, %rcx
-; CHECK-NEXT: cmovaq %rcx, %r12
+; CHECK-NEXT: movq $-1, %rax
+; CHECK-NEXT: cmovaq %rax, %r12
; CHECK-NEXT: ucomiss %xmm0, %xmm0
-; CHECK-NEXT: cmovpq %rax, %r12
-; CHECK-NEXT: cmovpq %rax, %r13
+; CHECK-NEXT: cmovpq %r14, %r12
+; CHECK-NEXT: cmovpq %r14, %r13
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1187,14 +1187,14 @@ define <8 x i128> @test_signed_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r12, %rax
-; CHECK-NEXT: movabsq $-9223372036854775808, %r13 # imm = 0x8000000000000000
-; CHECK-NEXT: cmovbq %r13, %rdx
+; CHECK-NEXT: movabsq $-9223372036854775808, %rbp # imm = 0x8000000000000000
+; CHECK-NEXT: cmovbq %rbp, %rdx
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: movabsq $9223372036854775807, %r15 # imm = 0x7FFFFFFFFFFFFFFF
; CHECK-NEXT: cmovaq %r15, %rdx
; CHECK-NEXT: movq $-1, %rcx
; CHECK-NEXT: cmovaq %rcx, %rax
-; CHECK-NEXT: movq $-1, %rbp
+; CHECK-NEXT: movq $-1, %r13
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: cmovpq %r12, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -1209,10 +1209,10 @@ define <8 x i128> @test_signed_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r12, %rax
-; CHECK-NEXT: cmovbq %r13, %rdx
+; CHECK-NEXT: cmovbq %rbp, %rdx
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r15, %rdx
-; CHECK-NEXT: cmovaq %rbp, %rax
+; CHECK-NEXT: cmovaq %r13, %rax
; CHECK-NEXT: movq $-1, %r14
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: cmovpq %r12, %rax
@@ -1228,7 +1228,7 @@ define <8 x i128> @test_signed_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r12, %rax
-; CHECK-NEXT: cmovbq %r13, %rdx
+; CHECK-NEXT: cmovbq %rbp, %rdx
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r15, %rdx
; CHECK-NEXT: cmovaq %r14, %rax
@@ -1247,7 +1247,8 @@ define <8 x i128> @test_signed_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r12, %rax
-; CHECK-NEXT: cmovbq %r13, %rdx
+; CHECK-NEXT: cmovbq %rbp, %rdx
+; CHECK-NEXT: movq %rbp, %r13
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r15, %rdx
; CHECK-NEXT: cmovaq %r14, %rax
@@ -1286,17 +1287,16 @@ define <8 x i128> @test_signed_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: movl $0, %eax
-; CHECK-NEXT: cmovbq %rax, %r14
-; CHECK-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
-; CHECK-NEXT: cmovbq %rcx, %r15
+; CHECK-NEXT: cmovbq %r12, %r14
+; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; CHECK-NEXT: cmovbq %rax, %r15
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r13, %r15
-; CHECK-NEXT: movq $-1, %rcx
-; CHECK-NEXT: cmovaq %rcx, %r14
+; CHECK-NEXT: movq $-1, %rax
+; CHECK-NEXT: cmovaq %rax, %r14
; CHECK-NEXT: ucomiss %xmm0, %xmm0
-; CHECK-NEXT: cmovpq %rax, %r14
-; CHECK-NEXT: cmovpq %rax, %r15
+; CHECK-NEXT: cmovpq %r12, %r14
+; CHECK-NEXT: cmovpq %r12, %r15
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2@PLT
diff --git a/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll b/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll
index 4d28ef7884954..4305886168abe 100644
--- a/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll
+++ b/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll
@@ -263,17 +263,17 @@ define <4 x i128> @test_unsigned_v4i128_v4f32(<4 x float> %f) nounwind {
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: callq __fixunssfti@PLT
; CHECK-NEXT: movq %rdx, %r15
-; CHECK-NEXT: xorl %ebp, %ebp
+; CHECK-NEXT: xorl %r14d, %r14d
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; CHECK-NEXT: ucomiss %xmm0, %xmm1
-; CHECK-NEXT: cmovbq %rbp, %r15
-; CHECK-NEXT: cmovbq %rbp, %rax
+; CHECK-NEXT: cmovbq %r14, %r15
+; CHECK-NEXT: cmovbq %r14, %rax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-NEXT: movq $-1, %r14
-; CHECK-NEXT: cmovaq %r14, %rax
+; CHECK-NEXT: movq $-1, %rbp
+; CHECK-NEXT: cmovaq %rbp, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: cmovaq %r14, %r15
+; CHECK-NEXT: cmovaq %rbp, %r15
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -282,11 +282,11 @@ define <4 x i128> @test_unsigned_v4i128_v4f32(<4 x float> %f) nounwind {
; CHECK-NEXT: movq %rdx, %r13
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbq %rbp, %r13
-; CHECK-NEXT: cmovbq %rbp, %r12
+; CHECK-NEXT: cmovbq %r14, %r13
+; CHECK-NEXT: cmovbq %r14, %r12
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovaq %r14, %r12
-; CHECK-NEXT: cmovaq %r14, %r13
+; CHECK-NEXT: cmovaq %rbp, %r12
+; CHECK-NEXT: cmovaq %rbp, %r13
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1149,18 +1149,18 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
; CHECK-NEXT: callq __fixunssfti@PLT
-; CHECK-NEXT: xorl %r13d, %r13d
+; CHECK-NEXT: xorl %r12d, %r12d
; CHECK-NEXT: pxor %xmm0, %xmm0
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss %xmm0, %xmm1
-; CHECK-NEXT: cmovbq %r13, %rdx
-; CHECK-NEXT: cmovbq %r13, %rax
+; CHECK-NEXT: cmovbq %r12, %rdx
+; CHECK-NEXT: cmovbq %r12, %rax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-NEXT: movq $-1, %r12
-; CHECK-NEXT: cmovaq %r12, %rax
+; CHECK-NEXT: movq $-1, %r13
+; CHECK-NEXT: cmovaq %r13, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: cmovaq %r12, %rdx
+; CHECK-NEXT: cmovaq %r13, %rdx
; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
@@ -1170,12 +1170,12 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbq %r13, %rdx
-; CHECK-NEXT: cmovbq %r13, %rax
+; CHECK-NEXT: cmovbq %r12, %rdx
+; CHECK-NEXT: cmovbq %r12, %rax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovaq %r12, %rax
+; CHECK-NEXT: cmovaq %r13, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: cmovaq %r12, %rdx
+; CHECK-NEXT: cmovaq %r13, %rdx
; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrlq $48, %xmm0
@@ -1185,12 +1185,12 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbq %r13, %rdx
-; CHECK-NEXT: cmovbq %r13, %rax
+; CHECK-NEXT: cmovbq %r12, %rdx
+; CHECK-NEXT: cmovbq %r12, %rax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovaq %r12, %rax
+; CHECK-NEXT: cmovaq %r13, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: cmovaq %r12, %rdx
+; CHECK-NEXT: cmovaq %r13, %rdx
; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
@@ -1200,12 +1200,12 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbq %r13, %rdx
-; CHECK-NEXT: cmovbq %r13, %rax
+; CHECK-NEXT: cmovbq %r12, %rdx
+; CHECK-NEXT: cmovbq %r12, %rax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovaq %r12, %rax
+; CHECK-NEXT: cmovaq %r13, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: cmovaq %r12, %rdx
+; CHECK-NEXT: cmovaq %r13, %rdx
; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
@@ -1216,12 +1216,12 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbq %r13, %rbp
-; CHECK-NEXT: cmovbq %r13, %rax
+; CHECK-NEXT: cmovbq %r12, %rbp
+; CHECK-NEXT: cmovbq %r12, %rax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovaq %r12, %rax
+; CHECK-NEXT: cmovaq %r13, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: cmovaq %r12, %rbp
+; CHECK-NEXT: cmovaq %r13, %rbp
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; CHECK-NEXT: callq __extendhfsf2@PLT
@@ -1232,11 +1232,11 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbq %r13, %r15
-; CHECK-NEXT: cmovbq %r13, %r14
+; CHECK-NEXT: cmovbq %r12, %r15
+; CHECK-NEXT: cmovbq %r12, %r14
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovaq %r12, %r14
-; CHECK-NEXT: cmovaq %r12, %r15
+; CHECK-NEXT: cmovaq %r13, %r14
+; CHECK-NEXT: cmovaq %r13, %r15
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2@PLT
diff --git a/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir b/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir
index fc0efd275ae25..5f05270729fde 100644
--- a/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir
+++ b/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir
@@ -343,9 +343,8 @@ body: |
; CHECK-NEXT: [[MOV64rm4:%[0-9]+]]:gr64 = NOT64r [[MOV64rm4]]
; CHECK-NEXT: CMP64rr [[MOV64rm4]], [[COPY7]], implicit-def $eflags
; CHECK-NEXT: undef [[MOV32ri1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32ri 0
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:gr64 = COPY [[MOV32ri1]]
- ; CHECK-NEXT: [[MOV64rm4:%[0-9]+]]:gr64 = CMOV64rr [[MOV64rm4]], [[COPY8]], 4, implicit killed $eflags
- ; CHECK-NEXT: INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4784137 /* reguse:GR64 */, [[COPY8]], 4784137 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags
+ ; CHECK-NEXT: [[MOV64rm4:%[0-9]+]]:gr64 = CMOV64rr [[MOV64rm4]], [[MOV32ri1]], 4, implicit killed $eflags
+ ; CHECK-NEXT: INLINEASM &"lock btsq $0,($1)", 1 /* side...
[truncated]
|
I've verified that this patch solves downstream problems I bisected to |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add -verify-machineinstrs to the x86 sat-vector tests?
I don't think it's worth it, EXPENSIVE_CHECKS is working as intended |
OK - can you see if you can create useful test coverage from #129740? |
This fixes an expensive chesk failure after 8476a5d. The issue was essentially that getRegClassConstraintEffectForVReg was not doing anything useful, sometimes. If the register passed to it is not present in the instruction, it is a no-op and returns the original classe. The Edit->getReg() register may not be the register as it appears in either the use or def instruction. It may be some split register, so take the register directly from the instruction being rematerialized. Also directly query the constraint from the def instruction, with a hardcoded operand index. This isn't ideal, but all the other rematerialize code makes the same assumption. So far I've been unable to reproduce this with a standalone MIR test. In the original case, stop-before=greedy and running the one pass is not working.
14aeafc
to
432fb2d
Compare
As I mentioned in
And this happens for our downstream target so I can't share a reproducer. A bit problematic. :/ |
FWIW I suspect we have other constraint bugs somewhere. In principle this patch is making the code more conservative. Does your problem go away if you add a getCommonSubClass check to the use and def rc? |
I'm happy to try something out but you'd need to be more specific about what and where in that case. I reduced the problem down to a smallish mir-reproducer and from "-debug" printouts (with added LIS->dump() in RAGreedy::selectOrSplit) for that I see that we go from
in the input to
which might still be correct, even if I don't understand why it has started using the register class "anh_rn" so much, but then after dealing with %30 we get
and this is where the verifer protests on the %46 instruction since the ldx instruction should produce an "anh_0"_7 register and not "anh_rn". Our machine has "a" and "r" registers and the "ldx" instruction can only write to "a" (i.e. "anh_0_7") registers and not to both "a" and "r" ("anh_rn"). (I've used "nop" instructions with implicit defs/uses as much as possible to get rid of our instruction set as much as I could in my reduced reproducer) I also attached the regalloc log (with added LIS->dump()) in case that is interesting. |
…lvm#129727) This fixes an expensive chesk failure after 8476a5d. The issue was essentially that getRegClassConstraintEffectForVReg was not doing anything useful, sometimes. If the register passed to it is not present in the instruction, it is a no-op and returns the original classe. The Edit->getReg() register may not be the register as it appears in either the use or def instruction. It may be some split register, so take the register directly from the instruction being rematerialized. Also directly query the constraint from the def instruction, with a hardcoded operand index. This isn't ideal, but all the other rematerialize code makes the same assumption. So far I've been unable to reproduce this with a standalone MIR test. In the original case, stop-before=greedy and running the one pass is not working.
This fixes an expensive chesk failure after 8476a5d. The issue
was essentially that getRegClassConstraintEffectForVReg was not doing
anything useful, sometimes. If the register passed to it is not present
in the instruction, it is a no-op and returns the original classe. The
Edit->getReg() register may not be the register as it appears in either
the use or def instruction. It may be some split register, so take
the register directly from the instruction being rematerialized.
Also directly query the constraint from the def instruction, with a
hardcoded operand index. This isn't ideal, but all the other rematerialize
code makes the same assumption.
So far I've been unable to reproduce this with a standalone MIR test. In the
original case, stop-before=greedy and running the one pass is not working.