Skip to content

SplitKit: Take register class directly from instruction definition #129727

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 13 additions & 11 deletions llvm/lib/CodeGen/SplitKit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -598,25 +598,27 @@ bool SplitEditor::rematWillIncreaseRestriction(const MachineInstr *DefMI,
if (!UseMI)
return false;

Register Reg = Edit->getReg();
const TargetRegisterClass *RC = MRI.getRegClass(Reg);
// Currently code assumes rematerialization only happens for a def at 0.
const unsigned DefOperandIdx = 0;
// We want to compute the static register class constraint for the instruction
// def. If it is a smaller subclass than getLargestLegalSuperClass at the use
// site, then rematerializing it will increase the constraints.
const TargetRegisterClass *DefConstrainRC =
DefMI->getRegClassConstraint(DefOperandIdx, &TII, &TRI);
if (!DefConstrainRC)
return false;

const TargetRegisterClass *RC = MRI.getRegClass(Edit->getReg());

// We want to find the register class that can be inflated to after the split
// occurs in recomputeRegClass
const TargetRegisterClass *SuperRC =
TRI.getLargestLegalSuperClass(RC, *MBB.getParent());

// We want to compute the static register class constraint for the instruction
// def. If it is a smaller subclass than getLargestLegalSuperClass at the use
// site, then rematerializing it will increase the constraints.
const TargetRegisterClass *DefConstrainRC =
DefMI->getRegClassConstraintEffectForVReg(Reg, SuperRC, &TII, &TRI,
/*ExploreBundle=*/true);

Register DefReg = DefMI->getOperand(DefOperandIdx).getReg();
const TargetRegisterClass *UseConstrainRC =
UseMI->getRegClassConstraintEffectForVReg(Reg, SuperRC, &TII, &TRI,
UseMI->getRegClassConstraintEffectForVReg(DefReg, SuperRC, &TII, &TRI,
/*ExploreBundle=*/true);

return UseConstrainRC->hasSubClass(DefConstrainRC);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -verify-machineinstrs -stress-regalloc=2 -o - %s | FileCheck %s

; Check that no register class constraint error is produced during
; rematerialization

target triple = "thumbv7-apple-ios"

declare ptr @_Znwm()

define void @func() {
; CHECK-LABEL: func:
; CHECK: @ %bb.0: @ %bb14
; CHECK-NEXT: str lr, [sp, #-4]!
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: movs r1, #4
; CHECK-NEXT: str r0, [r1]
; CHECK-NEXT: movs r1, #8
; CHECK-NEXT: str r0, [r1]
; CHECK-NEXT: str r0, [r0]
; CHECK-NEXT: bl __Znwm
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: movs r0, #4
; CHECK-NEXT: str r1, [r0]
; CHECK-NEXT: movs r0, #8
; CHECK-NEXT: str r1, [r0]
; CHECK-NEXT: str r1, [r1]
; CHECK-NEXT: ldr lr, [sp], #4
; CHECK-NEXT: bx lr
bb14:
call void @llvm.memset.p0.i32(ptr null, i8 0, i32 12, i1 false)
%tmp34 = call ptr @_Znwm()
call void @llvm.memset.p0.i32(ptr null, i8 0, i32 12, i1 false)
ret void
}

declare void @llvm.memset.p0.i32(ptr writeonly captures(none), i8, i32, i1 immarg) #0

attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: write) }
16 changes: 15 additions & 1 deletion llvm/test/CodeGen/ARM/splitkit.ll
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ declare ptr @bar(ptr returned)

declare i32 @__cxa_atexit(ptr, ptr, ptr)

declare ptr @wobble(ptr returned, ptr )
declare ptr @wobble(ptr returned, ptr )

declare i32 @quux(...)

Expand Down Expand Up @@ -236,6 +236,20 @@ bbunwind:
resume { ptr, i32 } undef
}

; CHECK-LABEL: func_reduced_remat_regclass_error:
define void @func_reduced_remat_regclass_error(ptr %global.10, ptr %global.15) {
bb14:
store i32 999, ptr %global.10, align 4
call void @llvm.memset.p0.i32(ptr null, i8 0, i32 12, i1 false)
call void @llvm.memcpy.p0.p0.i32(ptr null, ptr null, i32 60, i1 false)
%tmp34 = call ptr @_Znwm()
store i32 999, ptr %global.15, align 4
call void @llvm.memcpy.p0.p0.i32(ptr %global.10, ptr null, i32 52, i1 false)
call void @llvm.memset.p0.i32(ptr null, i8 0, i32 12, i1 false)
call void @llvm.memset.p0.i32(ptr null, i8 0, i32 12, i1 false)
ret void
}

declare void @llvm.trap()

declare void @llvm.memcpy.p0.p0.i32(ptr , ptr , i32, i1)
Expand Down
56 changes: 28 additions & 28 deletions llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -241,21 +241,21 @@ define <4 x i128> @test_signed_v4i128_v4f32(<4 x float> %f) nounwind {
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: callq __fixsfti@PLT
; CHECK-NEXT: movq %rdx, %r15
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: xorl %r14d, %r14d
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %rdx, %rax
; CHECK-NEXT: movabsq $-9223372036854775808, %r14 # imm = 0x8000000000000000
; CHECK-NEXT: cmovbq %r14, %r15
; CHECK-NEXT: cmovbq %r14, %rax
; CHECK-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
; CHECK-NEXT: cmovbq %rcx, %r15
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: movabsq $9223372036854775807, %rbp # imm = 0x7FFFFFFFFFFFFFFF
; CHECK-NEXT: cmovaq %rbp, %r15
; CHECK-NEXT: movq $-1, %rcx
; CHECK-NEXT: cmovaq %rcx, %rax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: cmovpq %rdx, %rax
; CHECK-NEXT: cmovpq %r14, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: cmovpq %rdx, %r15
; CHECK-NEXT: cmovpq %r14, %r15
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand All @@ -264,16 +264,16 @@ define <4 x i128> @test_signed_v4i128_v4f32(<4 x float> %f) nounwind {
; CHECK-NEXT: movq %rdx, %r13
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: movl $0, %eax
; CHECK-NEXT: cmovbq %rax, %r12
; CHECK-NEXT: cmovbq %r14, %r13
; CHECK-NEXT: cmovbq %r14, %r12
; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; CHECK-NEXT: cmovbq %rax, %r13
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %rbp, %r13
; CHECK-NEXT: movq $-1, %rcx
; CHECK-NEXT: cmovaq %rcx, %r12
; CHECK-NEXT: movq $-1, %rax
; CHECK-NEXT: cmovaq %rax, %r12
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: cmovpq %rax, %r12
; CHECK-NEXT: cmovpq %rax, %r13
; CHECK-NEXT: cmovpq %r14, %r12
; CHECK-NEXT: cmovpq %r14, %r13
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand Down Expand Up @@ -1187,14 +1187,14 @@ define <8 x i128> @test_signed_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r12, %rax
; CHECK-NEXT: movabsq $-9223372036854775808, %r13 # imm = 0x8000000000000000
; CHECK-NEXT: cmovbq %r13, %rdx
; CHECK-NEXT: movabsq $-9223372036854775808, %rbp # imm = 0x8000000000000000
; CHECK-NEXT: cmovbq %rbp, %rdx
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: movabsq $9223372036854775807, %r15 # imm = 0x7FFFFFFFFFFFFFFF
; CHECK-NEXT: cmovaq %r15, %rdx
; CHECK-NEXT: movq $-1, %rcx
; CHECK-NEXT: cmovaq %rcx, %rax
; CHECK-NEXT: movq $-1, %rbp
; CHECK-NEXT: movq $-1, %r13
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: cmovpq %r12, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
Expand All @@ -1209,10 +1209,10 @@ define <8 x i128> @test_signed_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r12, %rax
; CHECK-NEXT: cmovbq %r13, %rdx
; CHECK-NEXT: cmovbq %rbp, %rdx
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r15, %rdx
; CHECK-NEXT: cmovaq %rbp, %rax
; CHECK-NEXT: cmovaq %r13, %rax
; CHECK-NEXT: movq $-1, %r14
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: cmovpq %r12, %rax
Expand All @@ -1228,7 +1228,7 @@ define <8 x i128> @test_signed_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r12, %rax
; CHECK-NEXT: cmovbq %r13, %rdx
; CHECK-NEXT: cmovbq %rbp, %rdx
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r15, %rdx
; CHECK-NEXT: cmovaq %r14, %rax
Expand All @@ -1247,7 +1247,8 @@ define <8 x i128> @test_signed_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r12, %rax
; CHECK-NEXT: cmovbq %r13, %rdx
; CHECK-NEXT: cmovbq %rbp, %rdx
; CHECK-NEXT: movq %rbp, %r13
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r15, %rdx
; CHECK-NEXT: cmovaq %r14, %rax
Expand Down Expand Up @@ -1286,17 +1287,16 @@ define <8 x i128> @test_signed_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: movl $0, %eax
; CHECK-NEXT: cmovbq %rax, %r14
; CHECK-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
; CHECK-NEXT: cmovbq %rcx, %r15
; CHECK-NEXT: cmovbq %r12, %r14
; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; CHECK-NEXT: cmovbq %rax, %r15
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r13, %r15
; CHECK-NEXT: movq $-1, %rcx
; CHECK-NEXT: cmovaq %rcx, %r14
; CHECK-NEXT: movq $-1, %rax
; CHECK-NEXT: cmovaq %rax, %r14
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: cmovpq %rax, %r14
; CHECK-NEXT: cmovpq %rax, %r15
; CHECK-NEXT: cmovpq %r12, %r14
; CHECK-NEXT: cmovpq %r12, %r15
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2@PLT
Expand Down
72 changes: 36 additions & 36 deletions llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -263,17 +263,17 @@ define <4 x i128> @test_unsigned_v4i128_v4f32(<4 x float> %f) nounwind {
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: callq __fixunssfti@PLT
; CHECK-NEXT: movq %rdx, %r15
; CHECK-NEXT: xorl %ebp, %ebp
; CHECK-NEXT: xorl %r14d, %r14d
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; CHECK-NEXT: ucomiss %xmm0, %xmm1
; CHECK-NEXT: cmovbq %rbp, %r15
; CHECK-NEXT: cmovbq %rbp, %rax
; CHECK-NEXT: cmovbq %r14, %r15
; CHECK-NEXT: cmovbq %r14, %rax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-NEXT: movq $-1, %r14
; CHECK-NEXT: cmovaq %r14, %rax
; CHECK-NEXT: movq $-1, %rbp
; CHECK-NEXT: cmovaq %rbp, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: cmovaq %r14, %r15
; CHECK-NEXT: cmovaq %rbp, %r15
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand All @@ -282,11 +282,11 @@ define <4 x i128> @test_unsigned_v4i128_v4f32(<4 x float> %f) nounwind {
; CHECK-NEXT: movq %rdx, %r13
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %rbp, %r13
; CHECK-NEXT: cmovbq %rbp, %r12
; CHECK-NEXT: cmovbq %r14, %r13
; CHECK-NEXT: cmovbq %r14, %r12
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r14, %r12
; CHECK-NEXT: cmovaq %r14, %r13
; CHECK-NEXT: cmovaq %rbp, %r12
; CHECK-NEXT: cmovaq %rbp, %r13
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
Expand Down Expand Up @@ -1149,18 +1149,18 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: callq __extendhfsf2@PLT
; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
; CHECK-NEXT: callq __fixunssfti@PLT
; CHECK-NEXT: xorl %r13d, %r13d
; CHECK-NEXT: xorl %r12d, %r12d
; CHECK-NEXT: pxor %xmm0, %xmm0
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss %xmm0, %xmm1
; CHECK-NEXT: cmovbq %r13, %rdx
; CHECK-NEXT: cmovbq %r13, %rax
; CHECK-NEXT: cmovbq %r12, %rdx
; CHECK-NEXT: cmovbq %r12, %rax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-NEXT: movq $-1, %r12
; CHECK-NEXT: cmovaq %r12, %rax
; CHECK-NEXT: movq $-1, %r13
; CHECK-NEXT: cmovaq %r13, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: cmovaq %r12, %rdx
; CHECK-NEXT: cmovaq %r13, %rdx
; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
Expand All @@ -1170,12 +1170,12 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r13, %rdx
; CHECK-NEXT: cmovbq %r13, %rax
; CHECK-NEXT: cmovbq %r12, %rdx
; CHECK-NEXT: cmovbq %r12, %rax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r12, %rax
; CHECK-NEXT: cmovaq %r13, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: cmovaq %r12, %rdx
; CHECK-NEXT: cmovaq %r13, %rdx
; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrlq $48, %xmm0
Expand All @@ -1185,12 +1185,12 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r13, %rdx
; CHECK-NEXT: cmovbq %r13, %rax
; CHECK-NEXT: cmovbq %r12, %rdx
; CHECK-NEXT: cmovbq %r12, %rax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r12, %rax
; CHECK-NEXT: cmovaq %r13, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: cmovaq %r12, %rdx
; CHECK-NEXT: cmovaq %r13, %rdx
; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
Expand All @@ -1200,12 +1200,12 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r13, %rdx
; CHECK-NEXT: cmovbq %r13, %rax
; CHECK-NEXT: cmovbq %r12, %rdx
; CHECK-NEXT: cmovbq %r12, %rax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r12, %rax
; CHECK-NEXT: cmovaq %r13, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: cmovaq %r12, %rdx
; CHECK-NEXT: cmovaq %r13, %rdx
; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
Expand All @@ -1216,12 +1216,12 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r13, %rbp
; CHECK-NEXT: cmovbq %r13, %rax
; CHECK-NEXT: cmovbq %r12, %rbp
; CHECK-NEXT: cmovbq %r12, %rax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r12, %rax
; CHECK-NEXT: cmovaq %r13, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: cmovaq %r12, %rbp
; CHECK-NEXT: cmovaq %r13, %rbp
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; CHECK-NEXT: callq __extendhfsf2@PLT
Expand All @@ -1232,11 +1232,11 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r13, %r15
; CHECK-NEXT: cmovbq %r13, %r14
; CHECK-NEXT: cmovbq %r12, %r15
; CHECK-NEXT: cmovbq %r12, %r14
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r12, %r14
; CHECK-NEXT: cmovaq %r12, %r15
; CHECK-NEXT: cmovaq %r13, %r14
; CHECK-NEXT: cmovaq %r13, %r15
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2@PLT
Expand Down
Loading
Loading