Skip to content

Commit 30afb21

Browse files
committed
Revert "[MCP] Enhance MCP copy Instruction removal for special case (#70778)"
This reverts commit cae46f6. Reverted due to miscompiles. See #73512
1 parent 5f31dbd commit 30afb21

15 files changed

+63
-108
lines changed

llvm/lib/CodeGen/MachineCopyPropagation.cpp

Lines changed: 3 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -175,43 +175,8 @@ class CopyTracker {
175175
if (MachineInstr *MI = I->second.MI) {
176176
std::optional<DestSourcePair> CopyOperands =
177177
isCopyInstr(*MI, TII, UseCopyInstr);
178-
179-
MCRegister Def = CopyOperands->Destination->getReg().asMCReg();
180-
MCRegister Src = CopyOperands->Source->getReg().asMCReg();
181-
182-
markRegsUnavailable(Def, TRI);
183-
184-
// Since we clobber the destination of a copy, the semantic of Src's
185-
// "DefRegs" to contain Def is no longer effectual. We will also need
186-
// to remove the record from the copy maps that indicates Src defined
187-
// Def. Failing to do so might cause the target to miss some
188-
// opportunities to further eliminate redundant copy instructions.
189-
// Consider the following sequence during the
190-
// ForwardCopyPropagateBlock procedure:
191-
// L1: r0 = COPY r9 <- TrackMI
192-
// L2: r0 = COPY r8 <- TrackMI (Remove r9 defined r0 from tracker)
193-
// L3: use r0 <- Remove L2 from MaybeDeadCopies
194-
// L4: early-clobber r9 <- Clobber r9 (L2 is still valid in tracker)
195-
// L5: r0 = COPY r8 <- Remove NopCopy
196-
for (MCRegUnit SrcUnit : TRI.regunits(Src)) {
197-
auto SrcCopy = Copies.find(SrcUnit);
198-
if (SrcCopy != Copies.end() && SrcCopy->second.LastSeenUseInCopy) {
199-
// If SrcCopy defines multiple values, we only need
200-
// to erase the record for Def in DefRegs.
201-
for (auto itr = SrcCopy->second.DefRegs.begin();
202-
itr != SrcCopy->second.DefRegs.end(); itr++) {
203-
if (*itr == Def) {
204-
SrcCopy->second.DefRegs.erase(itr);
205-
// If DefReg becomes empty after removal, we can directly
206-
// remove SrcCopy from the tracker's copy maps.
207-
if (SrcCopy->second.DefRegs.empty()) {
208-
Copies.erase(SrcCopy);
209-
}
210-
break;
211-
}
212-
}
213-
}
214-
}
178+
markRegsUnavailable({CopyOperands->Destination->getReg().asMCReg()},
179+
TRI);
215180
}
216181
// Now we can erase the copy.
217182
Copies.erase(I);
@@ -820,7 +785,6 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
820785
// ...
821786
// %xmm2 = copy %xmm9
822787
Tracker.clobberRegister(Def, *TRI, *TII, UseCopyInstr);
823-
824788
for (const MachineOperand &MO : MI.implicit_operands()) {
825789
if (!MO.isReg() || !MO.isDef())
826790
continue;
@@ -831,6 +795,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
831795
}
832796

833797
Tracker.trackCopy(&MI, *TRI, *TII, UseCopyInstr);
798+
834799
continue;
835800
}
836801
}

llvm/test/CodeGen/PowerPC/mma-acc-spill.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
3737
; CHECK-NEXT: std r30, 160(r1) # 8-byte Folded Spill
3838
; CHECK-NEXT: ld r30, 272(r1)
3939
; CHECK-NEXT: xxmtacc acc0
40-
; CHECK-NEXT: xvf16ger2pp acc0, v28, v30
40+
; CHECK-NEXT: xvf16ger2pp acc0, v2, v4
4141
; CHECK-NEXT: xxmfacc acc0
4242
; CHECK-NEXT: stxvp vsp0, 64(r1)
4343
; CHECK-NEXT: stxvp vsp2, 32(r1)
@@ -88,7 +88,7 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
8888
; CHECK-BE-NEXT: std r30, 240(r1) # 8-byte Folded Spill
8989
; CHECK-BE-NEXT: ld r30, 368(r1)
9090
; CHECK-BE-NEXT: xxmtacc acc0
91-
; CHECK-BE-NEXT: xvf16ger2pp acc0, v28, v30
91+
; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v4
9292
; CHECK-BE-NEXT: xxmfacc acc0
9393
; CHECK-BE-NEXT: stxvp vsp0, 112(r1)
9494
; CHECK-BE-NEXT: stxvp vsp2, 144(r1)

llvm/test/CodeGen/PowerPC/mma-outer-product.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,17 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
1414
; CHECK-NEXT: vmr v1, v4
1515
; CHECK-NEXT: vmr v4, v3
1616
; CHECK-NEXT: vmr v0, v2
17-
; CHECK-NEXT: vmr v3, v0
18-
; CHECK-NEXT: ld r3, 96(r1)
1917
; CHECK-NEXT: xxlor vs3, v5, v5
20-
; CHECK-NEXT: vmr v2, v5
18+
; CHECK-NEXT: ld r3, 96(r1)
2119
; CHECK-NEXT: xxlor vs0, v0, v0
2220
; CHECK-NEXT: xxlor vs1, v1, v1
2321
; CHECK-NEXT: xxlor vs2, v4, v4
2422
; CHECK-NEXT: xxmtacc acc0
25-
; CHECK-NEXT: xvi4ger8pp acc0, v0, v4
26-
; CHECK-NEXT: xvf16ger2pp acc0, v0, v1
27-
; CHECK-NEXT: pmxvf32gerpn acc0, v4, v5, 0, 0
23+
; CHECK-NEXT: xvi4ger8pp acc0, v2, v3
24+
; CHECK-NEXT: xvf16ger2pp acc0, v2, v1
25+
; CHECK-NEXT: pmxvf32gerpn acc0, v3, v5, 0, 0
26+
; CHECK-NEXT: vmr v3, v2
27+
; CHECK-NEXT: vmr v2, v5
2828
; CHECK-NEXT: pmxvf64gernp acc0, vsp34, v0, 0, 0
2929
; CHECK-NEXT: xxmfacc acc0
3030
; CHECK-NEXT: stxv vs0, 48(r3)
@@ -38,17 +38,17 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
3838
; CHECK-BE-NEXT: vmr v1, v4
3939
; CHECK-BE-NEXT: vmr v4, v3
4040
; CHECK-BE-NEXT: vmr v0, v2
41-
; CHECK-BE-NEXT: vmr v3, v0
42-
; CHECK-BE-NEXT: ld r3, 112(r1)
4341
; CHECK-BE-NEXT: xxlor vs3, v5, v5
44-
; CHECK-BE-NEXT: vmr v2, v5
42+
; CHECK-BE-NEXT: ld r3, 112(r1)
4543
; CHECK-BE-NEXT: xxlor vs0, v0, v0
4644
; CHECK-BE-NEXT: xxlor vs1, v1, v1
4745
; CHECK-BE-NEXT: xxlor vs2, v4, v4
4846
; CHECK-BE-NEXT: xxmtacc acc0
49-
; CHECK-BE-NEXT: xvi4ger8pp acc0, v0, v4
50-
; CHECK-BE-NEXT: xvf16ger2pp acc0, v0, v1
51-
; CHECK-BE-NEXT: pmxvf32gerpn acc0, v4, v5, 0, 0
47+
; CHECK-BE-NEXT: xvi4ger8pp acc0, v2, v3
48+
; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v1
49+
; CHECK-BE-NEXT: pmxvf32gerpn acc0, v3, v5, 0, 0
50+
; CHECK-BE-NEXT: vmr v3, v2
51+
; CHECK-BE-NEXT: vmr v2, v5
5252
; CHECK-BE-NEXT: pmxvf64gernp acc0, vsp34, v0, 0, 0
5353
; CHECK-BE-NEXT: xxmfacc acc0
5454
; CHECK-BE-NEXT: stxv vs1, 16(r3)

llvm/test/CodeGen/RISCV/machine-cp.mir

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,6 @@
99
entry:
1010
ret void
1111
}
12-
define void @bar() {
13-
entry:
14-
ret void
15-
}
1612
...
1713
---
1814
name: foo
@@ -25,7 +21,6 @@ body: |
2521
; RV32-NEXT: renamable $v4_v5_v6_v7_v8_v9_v10_v11 = COPY killed renamable $v0_v1_v2_v3_v4_v5_v6_v7
2622
; RV32-NEXT: renamable $v28 = COPY renamable $v8, implicit killed $v28_v29_v30, implicit-def $v28_v29_v30
2723
; RV32-NEXT: PseudoRET implicit $v28
28-
;
2924
; RV64-LABEL: name: foo
3025
; RV64: liveins: $v28_v29_v30, $v8_v9, $v1
3126
; RV64-NEXT: {{ $}}
@@ -37,30 +32,3 @@ body: |
3732
renamable $v28 = COPY renamable $v8, implicit killed $v28_v29_v30, implicit-def $v28_v29_v30
3833
PseudoRET implicit $v28
3934
...
40-
---
41-
name: bar
42-
body: |
43-
bb.0.entry:
44-
liveins: $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x28, $x29, $x30, $x31
45-
; RV32-LABEL: name: bar
46-
; RV32: liveins: $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x28, $x29, $x30, $x31
47-
; RV32-NEXT: {{ $}}
48-
; RV32-NEXT: $v0 = COPY renamable $v8
49-
; RV32-NEXT: renamable $v14m2 = PseudoVLE32_V_M2_MASK undef renamable $v14m2, renamable $x15, $v0, -1, 5 /* e32 */, 1 /* ta, mu */, implicit $vl, implicit $vtype
50-
; RV32-NEXT: early-clobber renamable $v9 = PseudoVMSLE_VI_M2 killed renamable $v10m2, -1, -1, 5 /* e32 */, implicit $vl, implicit $vtype
51-
; RV32-NEXT: PseudoVSE32_V_M2_MASK killed renamable $v14m2, renamable $x9, $v0, -1, 5 /* e32 */, implicit $vl, implicit $vtype
52-
;
53-
; RV64-LABEL: name: bar
54-
; RV64: liveins: $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x28, $x29, $x30, $x31
55-
; RV64-NEXT: {{ $}}
56-
; RV64-NEXT: $v0 = COPY renamable $v8
57-
; RV64-NEXT: renamable $v14m2 = PseudoVLE32_V_M2_MASK undef renamable $v14m2, renamable $x15, $v0, -1, 5 /* e32 */, 1 /* ta, mu */, implicit $vl, implicit $vtype
58-
; RV64-NEXT: early-clobber renamable $v9 = PseudoVMSLE_VI_M2 killed renamable $v10m2, -1, -1, 5 /* e32 */, implicit $vl, implicit $vtype
59-
; RV64-NEXT: PseudoVSE32_V_M2_MASK killed renamable $v14m2, renamable $x9, $v0, -1, 5 /* e32 */, implicit $vl, implicit $vtype
60-
$v0 = COPY killed renamable $v9
61-
$v0 = COPY renamable $v8
62-
renamable $v14m2 = PseudoVLE32_V_M2_MASK undef renamable $v14m2, renamable $x15, $v0, -1, 5, 1, implicit $vl, implicit $vtype
63-
early-clobber renamable $v9 = PseudoVMSLE_VI_M2 killed renamable $v10m2, -1, -1, 5, implicit $vl, implicit $vtype
64-
$v0 = COPY killed renamable $v8
65-
PseudoVSE32_V_M2_MASK killed renamable $v14m2, renamable $x9, $v0, -1, 5, implicit $vl, implicit $vtype
66-
...

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,7 @@ define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 z
637637
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
638638
; CHECK-NEXT: vfabs.v v16, v24, v0.t
639639
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
640+
; CHECK-NEXT: vmv1r.v v0, v1
640641
; CHECK-NEXT: vmflt.vf v1, v16, fa5, v0.t
641642
; CHECK-NEXT: frflags a0
642643
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma

llvm/test/CodeGen/X86/shift-i128.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ define void @test_lshr_v2i128(<2 x i128> %x, <2 x i128> %a, ptr nocapture %r) no
347347
; i686-NEXT: movl %edx, %ecx
348348
; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
349349
; i686-NEXT: shrdl %cl, %eax, (%esp) # 4-byte Folded Spill
350+
; i686-NEXT: movl %edx, %ecx
350351
; i686-NEXT: shrl %cl, %esi
351352
; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx
352353
; i686-NEXT: movl %esi, 28(%ecx)
@@ -488,6 +489,7 @@ define void @test_ashr_v2i128(<2 x i128> %x, <2 x i128> %a, ptr nocapture %r) no
488489
; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
489490
; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
490491
; i686-NEXT: shrdl %cl, %esi, %ebx
492+
; i686-NEXT: movl %edx, %ecx
491493
; i686-NEXT: sarl %cl, %ebp
492494
; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx
493495
; i686-NEXT: movl %ebp, 28(%ecx)
@@ -621,9 +623,11 @@ define void @test_shl_v2i128(<2 x i128> %x, <2 x i128> %a, ptr nocapture %r) nou
621623
; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
622624
; i686-NEXT: shll %cl, %edi
623625
; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
626+
; i686-NEXT: movl %ecx, %edi
624627
; i686-NEXT: shldl %cl, %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
625628
; i686-NEXT: negl %ebp
626629
; i686-NEXT: movl 64(%esp,%ebp), %esi
630+
; i686-NEXT: movl %edi, %ecx
627631
; i686-NEXT: # kill: def $cl killed $cl killed $ecx
628632
; i686-NEXT: movl (%esp), %edi # 4-byte Reload
629633
; i686-NEXT: shldl %cl, %edi, %esi

llvm/test/CodeGen/X86/shift-i256.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ define void @shift1(i256 %x, i256 %a, ptr nocapture %r) nounwind readnone {
7878
; CHECK-NEXT: movl %eax, %ecx
7979
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
8080
; CHECK-NEXT: shrdl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
81+
; CHECK-NEXT: movl %eax, %ecx
8182
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
8283
; CHECK-NEXT: shrdl %cl, %edx, (%esp) # 4-byte Folded Spill
8384
; CHECK-NEXT: movl 28(%esp,%ebp), %edx

llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1201,7 +1201,7 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
12011201
; X86-NEXT: movl %edx, %ebp
12021202
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
12031203
; X86-NEXT: movl %eax, %ebx
1204-
; X86-NEXT: addl %edx, %ebx
1204+
; X86-NEXT: addl %ebp, %ebx
12051205
; X86-NEXT: adcl $0, %ebp
12061206
; X86-NEXT: movl %ecx, %eax
12071207
; X86-NEXT: movl %ecx, %esi

llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-7.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14979,7 +14979,7 @@ define void @load_i16_stride7_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
1497914979
; AVX512DQ-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm13[0],xmm0[1],xmm13[2,3,4,5],xmm0[6],xmm13[7]
1498014980
; AVX512DQ-SLOW-NEXT: vmovdqa %ymm12, %ymm7
1498114981
; AVX512DQ-SLOW-NEXT: vpblendd {{.*#+}} ymm13 = ymm12[0,1],ymm8[2,3],ymm12[4,5],ymm8[6,7]
14982-
; AVX512DQ-SLOW-NEXT: vmovdqu64 %ymm16, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
14982+
; AVX512DQ-SLOW-NEXT: vmovdqu %ymm8, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1498314983
; AVX512DQ-SLOW-NEXT: vpermq {{.*#+}} ymm14 = ymm12[0,1,0,1]
1498414984
; AVX512DQ-SLOW-NEXT: vmovdqu %ymm12, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1498514985
; AVX512DQ-SLOW-NEXT: vpblendw {{.*#+}} ymm13 = ymm13[0,1,2],ymm14[3],ymm13[4,5,6,7,8,9,10],ymm14[11],ymm13[12,13,14,15]
@@ -15734,7 +15734,7 @@ define void @load_i16_stride7_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
1573415734
; AVX512DQ-FAST-NEXT: vextracti128 $1, %ymm0, %xmm12
1573515735
; AVX512DQ-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm12[0],xmm0[1],xmm12[2,3,4,5],xmm0[6],xmm12[7]
1573615736
; AVX512DQ-FAST-NEXT: vpblendd {{.*#+}} ymm12 = ymm4[0],ymm15[1],ymm4[2,3],ymm15[4],ymm4[5,6,7]
15737-
; AVX512DQ-FAST-NEXT: vmovdqa64 %ymm16, %ymm17
15737+
; AVX512DQ-FAST-NEXT: vmovdqa64 %ymm4, %ymm17
1573815738
; AVX512DQ-FAST-NEXT: vmovdqa %ymm15, %ymm13
1573915739
; AVX512DQ-FAST-NEXT: vextracti128 $1, %ymm12, %xmm14
1574015740
; AVX512DQ-FAST-NEXT: vpblendw {{.*#+}} xmm12 = xmm12[0],xmm14[1],xmm12[2,3,4,5],xmm14[6],xmm12[7]

llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-7.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4642,7 +4642,7 @@ define void @load_i64_stride7_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
46424642
; AVX512BW-ONLY-NEXT: # zmm4 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
46434643
; AVX512BW-ONLY-NEXT: vpermt2q %zmm1, %zmm4, %zmm10
46444644
; AVX512BW-ONLY-NEXT: vmovdqa64 %zmm0, %zmm9
4645-
; AVX512BW-ONLY-NEXT: vpermt2q %zmm5, %zmm29, %zmm9
4645+
; AVX512BW-ONLY-NEXT: vpermt2q %zmm8, %zmm29, %zmm9
46464646
; AVX512BW-ONLY-NEXT: vpermt2q %zmm0, %zmm4, %zmm8
46474647
; AVX512BW-ONLY-NEXT: vmovdqa64 %zmm7, %zmm1
46484648
; AVX512BW-ONLY-NEXT: vpermt2q %zmm12, %zmm7, %zmm3
@@ -5087,7 +5087,7 @@ define void @load_i64_stride7_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
50875087
; AVX512DQBW-ONLY-NEXT: # zmm4 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
50885088
; AVX512DQBW-ONLY-NEXT: vpermt2q %zmm1, %zmm4, %zmm10
50895089
; AVX512DQBW-ONLY-NEXT: vmovdqa64 %zmm0, %zmm9
5090-
; AVX512DQBW-ONLY-NEXT: vpermt2q %zmm5, %zmm29, %zmm9
5090+
; AVX512DQBW-ONLY-NEXT: vpermt2q %zmm8, %zmm29, %zmm9
50915091
; AVX512DQBW-ONLY-NEXT: vpermt2q %zmm0, %zmm4, %zmm8
50925092
; AVX512DQBW-ONLY-NEXT: vmovdqa64 %zmm7, %zmm1
50935093
; AVX512DQBW-ONLY-NEXT: vpermt2q %zmm12, %zmm7, %zmm3

llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-5.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2476,6 +2476,7 @@ define void @load_i8_stride5_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
24762476
; SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,7,7,7]
24772477
; SSE-NEXT: psllq $48, %xmm0
24782478
; SSE-NEXT: packuswb %xmm1, %xmm0
2479+
; SSE-NEXT: movdqa %xmm7, %xmm4
24792480
; SSE-NEXT: movdqa %xmm7, %xmm1
24802481
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 16-byte Reload
24812482
; SSE-NEXT: pandn %xmm5, %xmm1
@@ -2532,7 +2533,7 @@ define void @load_i8_stride5_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
25322533
; SSE-NEXT: pandn %xmm1, %xmm2
25332534
; SSE-NEXT: movdqa %xmm8, %xmm1
25342535
; SSE-NEXT: pandn {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
2535-
; SSE-NEXT: movdqa %xmm7, %xmm0
2536+
; SSE-NEXT: movdqa %xmm4, %xmm0
25362537
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 16-byte Reload
25372538
; SSE-NEXT: pandn %xmm4, %xmm0
25382539
; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill

llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-7.ll

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,8 +1024,8 @@ define void @load_i8_stride7_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
10241024
; SSE-NEXT: movdqa {{.*#+}} xmm14 = [65535,0,65535,65535,0,65535,65535,65535]
10251025
; SSE-NEXT: movdqa %xmm9, %xmm7
10261026
; SSE-NEXT: pand %xmm14, %xmm7
1027-
; SSE-NEXT: movdqa %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1028-
; SSE-NEXT: movdqa %xmm5, %xmm15
1027+
; SSE-NEXT: movdqa %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1028+
; SSE-NEXT: movdqa %xmm6, %xmm15
10291029
; SSE-NEXT: pand %xmm14, %xmm15
10301030
; SSE-NEXT: movdqa %xmm11, %xmm3
10311031
; SSE-NEXT: pandn %xmm8, %xmm3
@@ -2148,6 +2148,7 @@ define void @load_i8_stride7_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
21482148
; SSE-NEXT: movdqa %xmm5, %xmm9
21492149
; SSE-NEXT: pand %xmm13, %xmm9
21502150
; SSE-NEXT: por %xmm0, %xmm9
2151+
; SSE-NEXT: movdqa %xmm6, %xmm3
21512152
; SSE-NEXT: movdqa %xmm6, %xmm0
21522153
; SSE-NEXT: pand %xmm13, %xmm0
21532154
; SSE-NEXT: pandn %xmm10, %xmm13
@@ -2184,7 +2185,7 @@ define void @load_i8_stride7_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
21842185
; SSE-NEXT: movdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
21852186
; SSE-NEXT: movdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
21862187
; SSE-NEXT: movdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
2187-
; SSE-NEXT: pandn %xmm6, %xmm2
2188+
; SSE-NEXT: pandn %xmm3, %xmm2
21882189
; SSE-NEXT: por %xmm10, %xmm2
21892190
; SSE-NEXT: movdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
21902191
; SSE-NEXT: movdqa {{.*#+}} xmm7 = [65535,0,65535,65535,65535,65535,65535,65535]
@@ -5450,19 +5451,19 @@ define void @load_i8_stride7_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
54505451
; SSE-NEXT: movdqa %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
54515452
; SSE-NEXT: pand %xmm14, %xmm6
54525453
; SSE-NEXT: movdqa %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5453-
; SSE-NEXT: movdqa %xmm14, %xmm3
5454+
; SSE-NEXT: movdqa %xmm0, %xmm3
54545455
; SSE-NEXT: movdqa %xmm11, %xmm6
54555456
; SSE-NEXT: pandn %xmm11, %xmm3
54565457
; SSE-NEXT: movdqa %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5457-
; SSE-NEXT: pand %xmm14, %xmm5
5458+
; SSE-NEXT: pand %xmm0, %xmm5
54585459
; SSE-NEXT: movdqa %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
54595460
; SSE-NEXT: movdqa %xmm2, %xmm3
5460-
; SSE-NEXT: pand %xmm14, %xmm3
5461+
; SSE-NEXT: pand %xmm0, %xmm3
54615462
; SSE-NEXT: movdqa %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5462-
; SSE-NEXT: movdqa %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5463-
; SSE-NEXT: movdqa %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5464-
; SSE-NEXT: movdqa %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5465-
; SSE-NEXT: movdqa %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5463+
; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5464+
; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5465+
; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5466+
; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
54665467
; SSE-NEXT: pandn %xmm1, %xmm0
54675468
; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
54685469
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Reload
@@ -9964,6 +9965,7 @@ define void @load_i8_stride7_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
99649965
; AVX512F-ONLY-FAST-NEXT: vpternlogq $248, %ymm23, %ymm3, %ymm6
99659966
; AVX512F-ONLY-FAST-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
99669967
; AVX512F-ONLY-FAST-NEXT: vpshufb %xmm9, %xmm13, %xmm3
9968+
; AVX512F-ONLY-FAST-NEXT: vmovdqa %xmm0, %xmm14
99679969
; AVX512F-ONLY-FAST-NEXT: vpshufb {{.*#+}} xmm11 = xmm0[2,9,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
99689970
; AVX512F-ONLY-FAST-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm11[0],xmm3[1],xmm11[1],xmm3[2],xmm11[2],xmm3[3],xmm11[3]
99699971
; AVX512F-ONLY-FAST-NEXT: vinserti64x4 $1, %ymm3, %zmm8, %zmm8
@@ -9992,12 +9994,12 @@ define void @load_i8_stride7_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
99929994
; AVX512F-ONLY-FAST-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm5
99939995
; AVX512F-ONLY-FAST-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
99949996
; AVX512F-ONLY-FAST-NEXT: vpternlogq $184, %ymm5, %ymm25, %ymm2
9995-
; AVX512F-ONLY-FAST-NEXT: vpshufb {{.*#+}} xmm5 = xmm0[3,10,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
9997+
; AVX512F-ONLY-FAST-NEXT: vpshufb {{.*#+}} xmm5 = xmm14[3,10,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
99969998
; AVX512F-ONLY-FAST-NEXT: vpshufb {{.*#+}} xmm7 = xmm13[5,12,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
99979999
; AVX512F-ONLY-FAST-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm7[0],xmm5[0],xmm7[1],xmm5[1],xmm7[2],xmm5[2],xmm7[3],xmm5[3]
999810000
; AVX512F-ONLY-FAST-NEXT: vinserti64x4 $1, %ymm5, %zmm11, %zmm5
999910001
; AVX512F-ONLY-FAST-NEXT: vpternlogq $184, %zmm10, %zmm3, %zmm5
10000-
; AVX512F-ONLY-FAST-NEXT: vpshufb %xmm9, %xmm0, %xmm7
10002+
; AVX512F-ONLY-FAST-NEXT: vpshufb %xmm9, %xmm14, %xmm7
1000110003
; AVX512F-ONLY-FAST-NEXT: vpshufb {{.*#+}} xmm9 = xmm13[6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
1000210004
; AVX512F-ONLY-FAST-NEXT: vpunpcklwd {{.*#+}} xmm7 = xmm9[0],xmm7[0],xmm9[1],xmm7[1],xmm9[2],xmm7[2],xmm9[3],xmm7[3]
1000310005
; AVX512F-ONLY-FAST-NEXT: vinserti64x4 $1, %ymm7, %zmm2, %zmm2

0 commit comments

Comments
 (0)