Skip to content

Commit b21663c

Browse files
authored
SplitKit: Take register class directly from instruction definition (#129727)
This fixes an expensive chesk failure after 8476a5d. The issue was essentially that getRegClassConstraintEffectForVReg was not doing anything useful, sometimes. If the register passed to it is not present in the instruction, it is a no-op and returns the original classe. The Edit->getReg() register may not be the register as it appears in either the use or def instruction. It may be some split register, so take the register directly from the instruction being rematerialized. Also directly query the constraint from the def instruction, with a hardcoded operand index. This isn't ideal, but all the other rematerialize code makes the same assumption. So far I've been unable to reproduce this with a standalone MIR test. In the original case, stop-before=greedy and running the one pass is not working.
1 parent 4022d78 commit b21663c

7 files changed

+178
-79
lines changed

llvm/lib/CodeGen/SplitKit.cpp

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -598,25 +598,27 @@ bool SplitEditor::rematWillIncreaseRestriction(const MachineInstr *DefMI,
598598
if (!UseMI)
599599
return false;
600600

601-
Register Reg = Edit->getReg();
602-
const TargetRegisterClass *RC = MRI.getRegClass(Reg);
601+
// Currently code assumes rematerialization only happens for a def at 0.
602+
const unsigned DefOperandIdx = 0;
603+
// We want to compute the static register class constraint for the instruction
604+
// def. If it is a smaller subclass than getLargestLegalSuperClass at the use
605+
// site, then rematerializing it will increase the constraints.
606+
const TargetRegisterClass *DefConstrainRC =
607+
DefMI->getRegClassConstraint(DefOperandIdx, &TII, &TRI);
608+
if (!DefConstrainRC)
609+
return false;
610+
611+
const TargetRegisterClass *RC = MRI.getRegClass(Edit->getReg());
603612

604613
// We want to find the register class that can be inflated to after the split
605614
// occurs in recomputeRegClass
606615
const TargetRegisterClass *SuperRC =
607616
TRI.getLargestLegalSuperClass(RC, *MBB.getParent());
608617

609-
// We want to compute the static register class constraint for the instruction
610-
// def. If it is a smaller subclass than getLargestLegalSuperClass at the use
611-
// site, then rematerializing it will increase the constraints.
612-
const TargetRegisterClass *DefConstrainRC =
613-
DefMI->getRegClassConstraintEffectForVReg(Reg, SuperRC, &TII, &TRI,
614-
/*ExploreBundle=*/true);
615-
618+
Register DefReg = DefMI->getOperand(DefOperandIdx).getReg();
616619
const TargetRegisterClass *UseConstrainRC =
617-
UseMI->getRegClassConstraintEffectForVReg(Reg, SuperRC, &TII, &TRI,
620+
UseMI->getRegClassConstraintEffectForVReg(DefReg, SuperRC, &TII, &TRI,
618621
/*ExploreBundle=*/true);
619-
620622
return UseConstrainRC->hasSubClass(DefConstrainRC);
621623
}
622624

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -verify-machineinstrs -stress-regalloc=2 -o - %s | FileCheck %s
3+
4+
; Check that no register class constraint error is produced during
5+
; rematerialization
6+
7+
target triple = "thumbv7-apple-ios"
8+
9+
declare ptr @_Znwm()
10+
11+
define void @func() {
12+
; CHECK-LABEL: func:
13+
; CHECK: @ %bb.0: @ %bb14
14+
; CHECK-NEXT: str lr, [sp, #-4]!
15+
; CHECK-NEXT: movs r0, #0
16+
; CHECK-NEXT: movs r1, #4
17+
; CHECK-NEXT: str r0, [r1]
18+
; CHECK-NEXT: movs r1, #8
19+
; CHECK-NEXT: str r0, [r1]
20+
; CHECK-NEXT: str r0, [r0]
21+
; CHECK-NEXT: bl __Znwm
22+
; CHECK-NEXT: movs r1, #0
23+
; CHECK-NEXT: movs r0, #4
24+
; CHECK-NEXT: str r1, [r0]
25+
; CHECK-NEXT: movs r0, #8
26+
; CHECK-NEXT: str r1, [r0]
27+
; CHECK-NEXT: str r1, [r1]
28+
; CHECK-NEXT: ldr lr, [sp], #4
29+
; CHECK-NEXT: bx lr
30+
bb14:
31+
call void @llvm.memset.p0.i32(ptr null, i8 0, i32 12, i1 false)
32+
%tmp34 = call ptr @_Znwm()
33+
call void @llvm.memset.p0.i32(ptr null, i8 0, i32 12, i1 false)
34+
ret void
35+
}
36+
37+
declare void @llvm.memset.p0.i32(ptr writeonly captures(none), i8, i32, i1 immarg) #0
38+
39+
attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: write) }

llvm/test/CodeGen/ARM/splitkit.ll

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ declare ptr @bar(ptr returned)
9494

9595
declare i32 @__cxa_atexit(ptr, ptr, ptr)
9696

97-
declare ptr @wobble(ptr returned, ptr )
97+
declare ptr @wobble(ptr returned, ptr )
9898

9999
declare i32 @quux(...)
100100

@@ -236,6 +236,20 @@ bbunwind:
236236
resume { ptr, i32 } undef
237237
}
238238

239+
; CHECK-LABEL: func_reduced_remat_regclass_error:
240+
define void @func_reduced_remat_regclass_error(ptr %global.10, ptr %global.15) {
241+
bb14:
242+
store i32 999, ptr %global.10, align 4
243+
call void @llvm.memset.p0.i32(ptr null, i8 0, i32 12, i1 false)
244+
call void @llvm.memcpy.p0.p0.i32(ptr null, ptr null, i32 60, i1 false)
245+
%tmp34 = call ptr @_Znwm()
246+
store i32 999, ptr %global.15, align 4
247+
call void @llvm.memcpy.p0.p0.i32(ptr %global.10, ptr null, i32 52, i1 false)
248+
call void @llvm.memset.p0.i32(ptr null, i8 0, i32 12, i1 false)
249+
call void @llvm.memset.p0.i32(ptr null, i8 0, i32 12, i1 false)
250+
ret void
251+
}
252+
239253
declare void @llvm.trap()
240254

241255
declare void @llvm.memcpy.p0.p0.i32(ptr , ptr , i32, i1)

llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -241,21 +241,21 @@ define <4 x i128> @test_signed_v4i128_v4f32(<4 x float> %f) nounwind {
241241
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
242242
; CHECK-NEXT: callq __fixsfti@PLT
243243
; CHECK-NEXT: movq %rdx, %r15
244-
; CHECK-NEXT: xorl %edx, %edx
244+
; CHECK-NEXT: xorl %r14d, %r14d
245245
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
246246
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
247-
; CHECK-NEXT: cmovbq %rdx, %rax
248-
; CHECK-NEXT: movabsq $-9223372036854775808, %r14 # imm = 0x8000000000000000
249-
; CHECK-NEXT: cmovbq %r14, %r15
247+
; CHECK-NEXT: cmovbq %r14, %rax
248+
; CHECK-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
249+
; CHECK-NEXT: cmovbq %rcx, %r15
250250
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
251251
; CHECK-NEXT: movabsq $9223372036854775807, %rbp # imm = 0x7FFFFFFFFFFFFFFF
252252
; CHECK-NEXT: cmovaq %rbp, %r15
253253
; CHECK-NEXT: movq $-1, %rcx
254254
; CHECK-NEXT: cmovaq %rcx, %rax
255255
; CHECK-NEXT: ucomiss %xmm0, %xmm0
256-
; CHECK-NEXT: cmovpq %rdx, %rax
256+
; CHECK-NEXT: cmovpq %r14, %rax
257257
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
258-
; CHECK-NEXT: cmovpq %rdx, %r15
258+
; CHECK-NEXT: cmovpq %r14, %r15
259259
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
260260
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
261261
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -264,16 +264,16 @@ define <4 x i128> @test_signed_v4i128_v4f32(<4 x float> %f) nounwind {
264264
; CHECK-NEXT: movq %rdx, %r13
265265
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
266266
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
267-
; CHECK-NEXT: movl $0, %eax
268-
; CHECK-NEXT: cmovbq %rax, %r12
269-
; CHECK-NEXT: cmovbq %r14, %r13
267+
; CHECK-NEXT: cmovbq %r14, %r12
268+
; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
269+
; CHECK-NEXT: cmovbq %rax, %r13
270270
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
271271
; CHECK-NEXT: cmovaq %rbp, %r13
272-
; CHECK-NEXT: movq $-1, %rcx
273-
; CHECK-NEXT: cmovaq %rcx, %r12
272+
; CHECK-NEXT: movq $-1, %rax
273+
; CHECK-NEXT: cmovaq %rax, %r12
274274
; CHECK-NEXT: ucomiss %xmm0, %xmm0
275-
; CHECK-NEXT: cmovpq %rax, %r12
276-
; CHECK-NEXT: cmovpq %rax, %r13
275+
; CHECK-NEXT: cmovpq %r14, %r12
276+
; CHECK-NEXT: cmovpq %r14, %r13
277277
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
278278
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
279279
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1187,14 +1187,14 @@ define <8 x i128> @test_signed_v8i128_v8f16(<8 x half> %f) nounwind {
11871187
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
11881188
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11891189
; CHECK-NEXT: cmovbq %r12, %rax
1190-
; CHECK-NEXT: movabsq $-9223372036854775808, %r13 # imm = 0x8000000000000000
1191-
; CHECK-NEXT: cmovbq %r13, %rdx
1190+
; CHECK-NEXT: movabsq $-9223372036854775808, %rbp # imm = 0x8000000000000000
1191+
; CHECK-NEXT: cmovbq %rbp, %rdx
11921192
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11931193
; CHECK-NEXT: movabsq $9223372036854775807, %r15 # imm = 0x7FFFFFFFFFFFFFFF
11941194
; CHECK-NEXT: cmovaq %r15, %rdx
11951195
; CHECK-NEXT: movq $-1, %rcx
11961196
; CHECK-NEXT: cmovaq %rcx, %rax
1197-
; CHECK-NEXT: movq $-1, %rbp
1197+
; CHECK-NEXT: movq $-1, %r13
11981198
; CHECK-NEXT: ucomiss %xmm0, %xmm0
11991199
; CHECK-NEXT: cmovpq %r12, %rax
12001200
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -1209,10 +1209,10 @@ define <8 x i128> @test_signed_v8i128_v8f16(<8 x half> %f) nounwind {
12091209
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
12101210
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12111211
; CHECK-NEXT: cmovbq %r12, %rax
1212-
; CHECK-NEXT: cmovbq %r13, %rdx
1212+
; CHECK-NEXT: cmovbq %rbp, %rdx
12131213
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12141214
; CHECK-NEXT: cmovaq %r15, %rdx
1215-
; CHECK-NEXT: cmovaq %rbp, %rax
1215+
; CHECK-NEXT: cmovaq %r13, %rax
12161216
; CHECK-NEXT: movq $-1, %r14
12171217
; CHECK-NEXT: ucomiss %xmm0, %xmm0
12181218
; CHECK-NEXT: cmovpq %r12, %rax
@@ -1228,7 +1228,7 @@ define <8 x i128> @test_signed_v8i128_v8f16(<8 x half> %f) nounwind {
12281228
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
12291229
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12301230
; CHECK-NEXT: cmovbq %r12, %rax
1231-
; CHECK-NEXT: cmovbq %r13, %rdx
1231+
; CHECK-NEXT: cmovbq %rbp, %rdx
12321232
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12331233
; CHECK-NEXT: cmovaq %r15, %rdx
12341234
; CHECK-NEXT: cmovaq %r14, %rax
@@ -1247,7 +1247,8 @@ define <8 x i128> @test_signed_v8i128_v8f16(<8 x half> %f) nounwind {
12471247
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
12481248
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12491249
; CHECK-NEXT: cmovbq %r12, %rax
1250-
; CHECK-NEXT: cmovbq %r13, %rdx
1250+
; CHECK-NEXT: cmovbq %rbp, %rdx
1251+
; CHECK-NEXT: movq %rbp, %r13
12511252
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12521253
; CHECK-NEXT: cmovaq %r15, %rdx
12531254
; CHECK-NEXT: cmovaq %r14, %rax
@@ -1286,17 +1287,16 @@ define <8 x i128> @test_signed_v8i128_v8f16(<8 x half> %f) nounwind {
12861287
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
12871288
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
12881289
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1289-
; CHECK-NEXT: movl $0, %eax
1290-
; CHECK-NEXT: cmovbq %rax, %r14
1291-
; CHECK-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
1292-
; CHECK-NEXT: cmovbq %rcx, %r15
1290+
; CHECK-NEXT: cmovbq %r12, %r14
1291+
; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
1292+
; CHECK-NEXT: cmovbq %rax, %r15
12931293
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12941294
; CHECK-NEXT: cmovaq %r13, %r15
1295-
; CHECK-NEXT: movq $-1, %rcx
1296-
; CHECK-NEXT: cmovaq %rcx, %r14
1295+
; CHECK-NEXT: movq $-1, %rax
1296+
; CHECK-NEXT: cmovaq %rax, %r14
12971297
; CHECK-NEXT: ucomiss %xmm0, %xmm0
1298-
; CHECK-NEXT: cmovpq %rax, %r14
1299-
; CHECK-NEXT: cmovpq %rax, %r15
1298+
; CHECK-NEXT: cmovpq %r12, %r14
1299+
; CHECK-NEXT: cmovpq %r12, %r15
13001300
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
13011301
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
13021302
; CHECK-NEXT: callq __extendhfsf2@PLT

llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -263,17 +263,17 @@ define <4 x i128> @test_unsigned_v4i128_v4f32(<4 x float> %f) nounwind {
263263
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
264264
; CHECK-NEXT: callq __fixunssfti@PLT
265265
; CHECK-NEXT: movq %rdx, %r15
266-
; CHECK-NEXT: xorl %ebp, %ebp
266+
; CHECK-NEXT: xorl %r14d, %r14d
267267
; CHECK-NEXT: xorps %xmm0, %xmm0
268268
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
269269
; CHECK-NEXT: ucomiss %xmm0, %xmm1
270-
; CHECK-NEXT: cmovbq %rbp, %r15
271-
; CHECK-NEXT: cmovbq %rbp, %rax
270+
; CHECK-NEXT: cmovbq %r14, %r15
271+
; CHECK-NEXT: cmovbq %r14, %rax
272272
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
273-
; CHECK-NEXT: movq $-1, %r14
274-
; CHECK-NEXT: cmovaq %r14, %rax
273+
; CHECK-NEXT: movq $-1, %rbp
274+
; CHECK-NEXT: cmovaq %rbp, %rax
275275
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
276-
; CHECK-NEXT: cmovaq %r14, %r15
276+
; CHECK-NEXT: cmovaq %rbp, %r15
277277
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
278278
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
279279
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -282,11 +282,11 @@ define <4 x i128> @test_unsigned_v4i128_v4f32(<4 x float> %f) nounwind {
282282
; CHECK-NEXT: movq %rdx, %r13
283283
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
284284
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
285-
; CHECK-NEXT: cmovbq %rbp, %r13
286-
; CHECK-NEXT: cmovbq %rbp, %r12
285+
; CHECK-NEXT: cmovbq %r14, %r13
286+
; CHECK-NEXT: cmovbq %r14, %r12
287287
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
288-
; CHECK-NEXT: cmovaq %r14, %r12
289-
; CHECK-NEXT: cmovaq %r14, %r13
288+
; CHECK-NEXT: cmovaq %rbp, %r12
289+
; CHECK-NEXT: cmovaq %rbp, %r13
290290
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
291291
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
292292
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1149,18 +1149,18 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
11491149
; CHECK-NEXT: callq __extendhfsf2@PLT
11501150
; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
11511151
; CHECK-NEXT: callq __fixunssfti@PLT
1152-
; CHECK-NEXT: xorl %r13d, %r13d
1152+
; CHECK-NEXT: xorl %r12d, %r12d
11531153
; CHECK-NEXT: pxor %xmm0, %xmm0
11541154
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
11551155
; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
11561156
; CHECK-NEXT: ucomiss %xmm0, %xmm1
1157-
; CHECK-NEXT: cmovbq %r13, %rdx
1158-
; CHECK-NEXT: cmovbq %r13, %rax
1157+
; CHECK-NEXT: cmovbq %r12, %rdx
1158+
; CHECK-NEXT: cmovbq %r12, %rax
11591159
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1160-
; CHECK-NEXT: movq $-1, %r12
1161-
; CHECK-NEXT: cmovaq %r12, %rax
1160+
; CHECK-NEXT: movq $-1, %r13
1161+
; CHECK-NEXT: cmovaq %r13, %rax
11621162
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1163-
; CHECK-NEXT: cmovaq %r12, %rdx
1163+
; CHECK-NEXT: cmovaq %r13, %rdx
11641164
; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
11651165
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
11661166
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
@@ -1170,12 +1170,12 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
11701170
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
11711171
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
11721172
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1173-
; CHECK-NEXT: cmovbq %r13, %rdx
1174-
; CHECK-NEXT: cmovbq %r13, %rax
1173+
; CHECK-NEXT: cmovbq %r12, %rdx
1174+
; CHECK-NEXT: cmovbq %r12, %rax
11751175
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1176-
; CHECK-NEXT: cmovaq %r12, %rax
1176+
; CHECK-NEXT: cmovaq %r13, %rax
11771177
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1178-
; CHECK-NEXT: cmovaq %r12, %rdx
1178+
; CHECK-NEXT: cmovaq %r13, %rdx
11791179
; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
11801180
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
11811181
; CHECK-NEXT: psrlq $48, %xmm0
@@ -1185,12 +1185,12 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
11851185
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
11861186
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
11871187
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1188-
; CHECK-NEXT: cmovbq %r13, %rdx
1189-
; CHECK-NEXT: cmovbq %r13, %rax
1188+
; CHECK-NEXT: cmovbq %r12, %rdx
1189+
; CHECK-NEXT: cmovbq %r12, %rax
11901190
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1191-
; CHECK-NEXT: cmovaq %r12, %rax
1191+
; CHECK-NEXT: cmovaq %r13, %rax
11921192
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1193-
; CHECK-NEXT: cmovaq %r12, %rdx
1193+
; CHECK-NEXT: cmovaq %r13, %rdx
11941194
; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
11951195
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
11961196
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
@@ -1200,12 +1200,12 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
12001200
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
12011201
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
12021202
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1203-
; CHECK-NEXT: cmovbq %r13, %rdx
1204-
; CHECK-NEXT: cmovbq %r13, %rax
1203+
; CHECK-NEXT: cmovbq %r12, %rdx
1204+
; CHECK-NEXT: cmovbq %r12, %rax
12051205
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1206-
; CHECK-NEXT: cmovaq %r12, %rax
1206+
; CHECK-NEXT: cmovaq %r13, %rax
12071207
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1208-
; CHECK-NEXT: cmovaq %r12, %rdx
1208+
; CHECK-NEXT: cmovaq %r13, %rdx
12091209
; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
12101210
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
12111211
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
@@ -1216,12 +1216,12 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
12161216
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
12171217
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
12181218
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1219-
; CHECK-NEXT: cmovbq %r13, %rbp
1220-
; CHECK-NEXT: cmovbq %r13, %rax
1219+
; CHECK-NEXT: cmovbq %r12, %rbp
1220+
; CHECK-NEXT: cmovbq %r12, %rax
12211221
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1222-
; CHECK-NEXT: cmovaq %r12, %rax
1222+
; CHECK-NEXT: cmovaq %r13, %rax
12231223
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1224-
; CHECK-NEXT: cmovaq %r12, %rbp
1224+
; CHECK-NEXT: cmovaq %r13, %rbp
12251225
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
12261226
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
12271227
; CHECK-NEXT: callq __extendhfsf2@PLT
@@ -1232,11 +1232,11 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
12321232
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
12331233
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
12341234
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1235-
; CHECK-NEXT: cmovbq %r13, %r15
1236-
; CHECK-NEXT: cmovbq %r13, %r14
1235+
; CHECK-NEXT: cmovbq %r12, %r15
1236+
; CHECK-NEXT: cmovbq %r12, %r14
12371237
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1238-
; CHECK-NEXT: cmovaq %r12, %r14
1239-
; CHECK-NEXT: cmovaq %r12, %r15
1238+
; CHECK-NEXT: cmovaq %r13, %r14
1239+
; CHECK-NEXT: cmovaq %r13, %r15
12401240
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
12411241
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
12421242
; CHECK-NEXT: callq __extendhfsf2@PLT

0 commit comments

Comments
 (0)