Skip to content

Commit fa8038a

Browse files
committed
[X86] X86FixupVectorConstants - shrink vector load to movsd/movsd/movd/movq 'zero upper' instructions
If we're loading a vector constant that is known to be zero in the upper elements, then attempt to shrink the constant and just scalar load the lower 32/64 bits. Prefer this over a broadcast load (even if the broadcast would have used a smaller constant), as scalar loads can sometimes be performed on more ports than broadcast loads.
1 parent 55a7bb0 commit fa8038a

File tree

102 files changed

+1735
-2266
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+1735
-2266
lines changed

llvm/lib/Target/X86/X86FixupVectorConstants.cpp

Lines changed: 86 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ FunctionPass *llvm::createX86FixupVectorConstants() {
6767
static std::optional<APInt> extractConstantBits(const Constant *C) {
6868
unsigned NumBits = C->getType()->getPrimitiveSizeInBits();
6969

70+
if (auto *CUndef = dyn_cast<UndefValue>(C))
71+
return APInt::getZero(NumBits);
72+
7073
if (auto *CInt = dyn_cast<ConstantInt>(C))
7174
return CInt->getValue();
7275

@@ -80,6 +83,18 @@ static std::optional<APInt> extractConstantBits(const Constant *C) {
8083
return APInt::getSplat(NumBits, *Bits);
8184
}
8285
}
86+
87+
APInt Bits = APInt::getZero(NumBits);
88+
for (unsigned I = 0, E = CV->getNumOperands(); I != E; ++I) {
89+
Constant *Elt = CV->getOperand(I);
90+
std::optional<APInt> SubBits = extractConstantBits(Elt);
91+
if (!SubBits)
92+
return std::nullopt;
93+
assert(NumBits == (E * SubBits->getBitWidth()) &&
94+
"Illegal vector element size");
95+
Bits.insertBits(*SubBits, I * SubBits->getBitWidth());
96+
}
97+
return Bits;
8398
}
8499

85100
if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
@@ -223,6 +238,33 @@ static Constant *rebuildSplatableConstant(const Constant *C,
223238
return rebuildConstant(OriginalType->getContext(), SclTy, *Splat, NumSclBits);
224239
}
225240

241+
static Constant *rebuildZeroUpperConstant(const Constant *C,
242+
unsigned ScalarBitWidth) {
243+
Type *Ty = C->getType();
244+
Type *SclTy = Ty->getScalarType();
245+
unsigned NumBits = Ty->getPrimitiveSizeInBits();
246+
unsigned NumSclBits = SclTy->getPrimitiveSizeInBits();
247+
LLVMContext &Ctx = C->getContext();
248+
249+
if (NumBits > ScalarBitWidth) {
250+
// Determine if the upper bits are all zero.
251+
if (std::optional<APInt> Bits = extractConstantBits(C)) {
252+
if (Bits->countLeadingZeros() >= (NumBits - ScalarBitWidth)) {
253+
// If the original constant was made of smaller elements, try to retain
254+
// those types.
255+
if (ScalarBitWidth > NumSclBits && (ScalarBitWidth % NumSclBits) == 0)
256+
return rebuildConstant(Ctx, SclTy, *Bits, NumSclBits);
257+
258+
// Fallback to raw integer bits.
259+
APInt RawBits = Bits->zextOrTrunc(ScalarBitWidth);
260+
return ConstantInt::get(Ctx, RawBits);
261+
}
262+
}
263+
}
264+
265+
return nullptr;
266+
}
267+
226268
bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
227269
MachineBasicBlock &MBB,
228270
MachineInstr &MI) {
@@ -263,6 +305,34 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
263305
return false;
264306
};
265307

308+
auto ConvertToZeroUpper = [&](unsigned OpUpper64, unsigned OpUpper32) {
309+
unsigned OperandNo = 1;
310+
assert(MI.getNumOperands() >= (OperandNo + X86::AddrNumOperands) &&
311+
"Unexpected number of operands!");
312+
313+
if (auto *C = X86::getConstantFromPool(MI, OperandNo)) {
314+
// Attempt to detect a suitable splat from increasing splat widths.
315+
std::pair<unsigned, unsigned> ZeroUppers[] = {
316+
{32, OpUpper32},
317+
{64, OpUpper64},
318+
};
319+
for (auto [BitWidth, OpUpper] : ZeroUppers) {
320+
if (OpUpper) {
321+
// Construct a suitable splat constant and adjust the MI to
322+
// use the new constant pool entry.
323+
if (Constant *NewCst = rebuildZeroUpperConstant(C, BitWidth)) {
324+
unsigned NewCPI =
325+
CP->getConstantPoolIndex(NewCst, Align(BitWidth / 8));
326+
MI.setDesc(TII->get(OpUpper));
327+
MI.getOperand(OperandNo + X86::AddrDisp).setIndex(NewCPI);
328+
return true;
329+
}
330+
}
331+
}
332+
}
333+
return false;
334+
};
335+
266336
// Attempt to convert full width vector loads into broadcast loads.
267337
switch (Opc) {
268338
/* FP Loads */
@@ -271,12 +341,13 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
271341
case X86::MOVUPDrm:
272342
case X86::MOVUPSrm:
273343
// TODO: SSE3 MOVDDUP Handling
274-
return false;
344+
return ConvertToZeroUpper(X86::MOVSDrm, X86::MOVSSrm);
275345
case X86::VMOVAPDrm:
276346
case X86::VMOVAPSrm:
277347
case X86::VMOVUPDrm:
278348
case X86::VMOVUPSrm:
279-
return ConvertToBroadcast(0, 0, X86::VMOVDDUPrm, X86::VBROADCASTSSrm, 0, 0,
349+
return ConvertToZeroUpper(X86::VMOVSDrm, X86::VMOVSSrm) ||
350+
ConvertToBroadcast(0, 0, X86::VMOVDDUPrm, X86::VBROADCASTSSrm, 0, 0,
280351
1);
281352
case X86::VMOVAPDYrm:
282353
case X86::VMOVAPSYrm:
@@ -288,7 +359,8 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
288359
case X86::VMOVAPSZ128rm:
289360
case X86::VMOVUPDZ128rm:
290361
case X86::VMOVUPSZ128rm:
291-
return ConvertToBroadcast(0, 0, X86::VMOVDDUPZ128rm,
362+
return ConvertToZeroUpper(X86::VMOVSDZrm, X86::VMOVSSZrm) ||
363+
ConvertToBroadcast(0, 0, X86::VMOVDDUPZ128rm,
292364
X86::VBROADCASTSSZ128rm, 0, 0, 1);
293365
case X86::VMOVAPDZ256rm:
294366
case X86::VMOVAPSZ256rm:
@@ -305,13 +377,17 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
305377
X86::VBROADCASTSDZrm, X86::VBROADCASTSSZrm, 0, 0,
306378
1);
307379
/* Integer Loads */
380+
case X86::MOVDQArm:
381+
case X86::MOVDQUrm:
382+
return ConvertToZeroUpper(X86::MOVQI2PQIrm, X86::MOVDI2PDIrm);
308383
case X86::VMOVDQArm:
309384
case X86::VMOVDQUrm:
310-
return ConvertToBroadcast(
311-
0, 0, HasAVX2 ? X86::VPBROADCASTQrm : X86::VMOVDDUPrm,
312-
HasAVX2 ? X86::VPBROADCASTDrm : X86::VBROADCASTSSrm,
313-
HasAVX2 ? X86::VPBROADCASTWrm : 0, HasAVX2 ? X86::VPBROADCASTBrm : 0,
314-
1);
385+
return ConvertToZeroUpper(X86::VMOVQI2PQIrm, X86::VMOVDI2PDIrm) ||
386+
ConvertToBroadcast(
387+
0, 0, HasAVX2 ? X86::VPBROADCASTQrm : X86::VMOVDDUPrm,
388+
HasAVX2 ? X86::VPBROADCASTDrm : X86::VBROADCASTSSrm,
389+
HasAVX2 ? X86::VPBROADCASTWrm : 0,
390+
HasAVX2 ? X86::VPBROADCASTBrm : 0, 1);
315391
case X86::VMOVDQAYrm:
316392
case X86::VMOVDQUYrm:
317393
return ConvertToBroadcast(
@@ -324,7 +400,8 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
324400
case X86::VMOVDQA64Z128rm:
325401
case X86::VMOVDQU32Z128rm:
326402
case X86::VMOVDQU64Z128rm:
327-
return ConvertToBroadcast(0, 0, X86::VPBROADCASTQZ128rm,
403+
return ConvertToZeroUpper(X86::VMOVQI2PQIZrm, X86::VMOVDI2PDIZrm) ||
404+
ConvertToBroadcast(0, 0, X86::VPBROADCASTQZ128rm,
328405
X86::VPBROADCASTDZ128rm,
329406
HasBWI ? X86::VPBROADCASTWZ128rm : 0,
330407
HasBWI ? X86::VPBROADCASTBZ128rm : 0, 1);

llvm/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
define void @ui_to_fp_conv(ptr nocapture %aFOO, ptr nocapture %RET) nounwind {
88
; CHECK-LABEL: ui_to_fp_conv:
99
; CHECK: # %bb.0: # %allocas
10-
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1.0E+0,1.0E+0,0.0E+0,0.0E+0]
10+
; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,1.0E+0,0.0E+0,0.0E+0]
1111
; CHECK-NEXT: xorps %xmm1, %xmm1
1212
; CHECK-NEXT: movups %xmm1, 16(%rsi)
1313
; CHECK-NEXT: movups %xmm0, (%rsi)

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,7 +1053,7 @@ define void @vec256_i8_widen_to_i16_factor2_broadcast_to_v16i16_factor16(ptr %in
10531053
; SSE42-NEXT: paddb 48(%rsi), %xmm2
10541054
; SSE42-NEXT: paddb (%rsi), %xmm0
10551055
; SSE42-NEXT: paddb 32(%rsi), %xmm1
1056-
; SSE42-NEXT: movdqa {{.*#+}} xmm3 = [1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
1056+
; SSE42-NEXT: movq {{.*#+}} xmm3 = [1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0]
10571057
; SSE42-NEXT: pshufb %xmm3, %xmm1
10581058
; SSE42-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
10591059
; SSE42-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
@@ -1075,8 +1075,7 @@ define void @vec256_i8_widen_to_i16_factor2_broadcast_to_v16i16_factor16(ptr %in
10751075
; AVX-NEXT: vpaddb 48(%rsi), %xmm2, %xmm2
10761076
; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
10771077
; AVX-NEXT: vpaddb 32(%rsi), %xmm1, %xmm1
1078-
; AVX-NEXT: vmovddup {{.*#+}} xmm3 = [1,3,5,7,9,11,13,15,1,3,5,7,9,11,13,15]
1079-
; AVX-NEXT: # xmm3 = mem[0,0]
1078+
; AVX-NEXT: vmovq {{.*#+}} xmm3 = [1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0]
10801079
; AVX-NEXT: vpshufb %xmm3, %xmm1, %xmm1
10811080
; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
10821081
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -875,7 +875,7 @@ define void @vec256_i8_widen_to_i16_factor2_broadcast_to_v16i16_factor16(ptr %in
875875
; SSE42-NEXT: movdqa (%rdi), %xmm0
876876
; SSE42-NEXT: movdqa 32(%rdi), %xmm1
877877
; SSE42-NEXT: movdqa 48(%rdi), %xmm2
878-
; SSE42-NEXT: movdqa {{.*#+}} xmm3 = [1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
878+
; SSE42-NEXT: movq {{.*#+}} xmm3 = [1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0]
879879
; SSE42-NEXT: pshufb %xmm3, %xmm1
880880
; SSE42-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
881881
; SSE42-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
@@ -894,8 +894,7 @@ define void @vec256_i8_widen_to_i16_factor2_broadcast_to_v16i16_factor16(ptr %in
894894
; AVX-NEXT: vmovdqa (%rdi), %xmm0
895895
; AVX-NEXT: vmovdqa 32(%rdi), %xmm1
896896
; AVX-NEXT: vmovdqa 48(%rdi), %xmm2
897-
; AVX-NEXT: vmovddup {{.*#+}} xmm3 = [1,3,5,7,9,11,13,15,1,3,5,7,9,11,13,15]
898-
; AVX-NEXT: # xmm3 = mem[0,0]
897+
; AVX-NEXT: vmovq {{.*#+}} xmm3 = [1,3,5,7,9,11,13,15,0,0,0,0,0,0,0,0]
899898
; AVX-NEXT: vpshufb %xmm3, %xmm1, %xmm1
900899
; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
901900
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]

llvm/test/CodeGen/X86/avx-load-store.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ define void @f_f() nounwind {
220220
; CHECK-NEXT: testb %al, %al
221221
; CHECK-NEXT: jne .LBB9_4
222222
; CHECK-NEXT: # %bb.3: # %cif_mixed_test_all
223-
; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,0,0,0]
223+
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = [4294967295,0,0,0]
224224
; CHECK-NEXT: vmaskmovps %ymm0, %ymm0, (%rax)
225225
; CHECK-NEXT: .LBB9_4: # %cif_mixed_test_any_check
226226
;

llvm/test/CodeGen/X86/avx2-arith.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ define <8 x i16> @mul_const8(<8 x i16> %x) {
234234
define <8 x i32> @mul_const9(<8 x i32> %x) {
235235
; CHECK-LABEL: mul_const9:
236236
; CHECK: # %bb.0:
237-
; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [2,0,0,0]
237+
; CHECK-NEXT: vmovd {{.*#+}} xmm1 = [2,0,0,0]
238238
; CHECK-NEXT: vpmulld %ymm1, %ymm0, %ymm0
239239
; CHECK-NEXT: ret{{[l|q]}}
240240
%y = mul <8 x i32> %x, <i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>

llvm/test/CodeGen/X86/avx512-shuffles/shuffle-chained-bf16.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ define <2 x bfloat> @shuffle_chained_v32bf16_v2bf16(<32 x bfloat> %a) {
1313
; CHECK-NEXT: .cfi_def_cfa_register %rbp
1414
; CHECK-NEXT: andq $-64, %rsp
1515
; CHECK-NEXT: subq $128, %rsp
16-
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [0,16,0,16,0,16,0,16]
16+
; CHECK-NEXT: vmovd {{.*#+}} xmm1 = [0,16,0,0,0,0,0,0]
1717
; CHECK-NEXT: vpermw %zmm0, %zmm1, %zmm0
1818
; CHECK-NEXT: vmovdqa64 %zmm0, (%rsp)
1919
; CHECK-NEXT: vmovaps (%rsp), %xmm0

llvm/test/CodeGen/X86/bitreverse.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -587,17 +587,17 @@ define <2 x i16> @fold_v2i16() {
587587
;
588588
; X64-LABEL: fold_v2i16:
589589
; X64: # %bb.0:
590-
; X64-NEXT: movaps {{.*#+}} xmm0 = [61440,240,u,u,u,u,u,u]
590+
; X64-NEXT: movss {{.*#+}} xmm0 = [61440,240,0,0,0,0,0,0]
591591
; X64-NEXT: retq
592592
;
593593
; X86XOP-LABEL: fold_v2i16:
594594
; X86XOP: # %bb.0:
595-
; X86XOP-NEXT: vbroadcastss {{.*#+}} xmm0 = [61440,240,61440,240,61440,240,61440,240]
595+
; X86XOP-NEXT: vmovss {{.*#+}} xmm0 = [61440,240,0,0,0,0,0,0]
596596
; X86XOP-NEXT: retl
597597
;
598598
; GFNI-LABEL: fold_v2i16:
599599
; GFNI: # %bb.0:
600-
; GFNI-NEXT: vbroadcastss {{.*#+}} xmm0 = [61440,240,61440,240,61440,240,61440,240]
600+
; GFNI-NEXT: vmovss {{.*#+}} xmm0 = [61440,240,0,0,0,0,0,0]
601601
; GFNI-NEXT: retq
602602
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> <i16 15, i16 3840>)
603603
ret <2 x i16> %b

llvm/test/CodeGen/X86/combine-srl.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ define <4 x i32> @combine_vec_lshr_lzcnt_bit1(<4 x i32> %x) {
356356
; SSE-LABEL: combine_vec_lshr_lzcnt_bit1:
357357
; SSE: # %bb.0:
358358
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
359-
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
359+
; SSE-NEXT: movq {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
360360
; SSE-NEXT: movdqa %xmm1, %xmm2
361361
; SSE-NEXT: pshufb %xmm0, %xmm2
362362
; SSE-NEXT: psrlw $4, %xmm0
@@ -378,7 +378,7 @@ define <4 x i32> @combine_vec_lshr_lzcnt_bit1(<4 x i32> %x) {
378378
; AVX-LABEL: combine_vec_lshr_lzcnt_bit1:
379379
; AVX: # %bb.0:
380380
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
381-
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
381+
; AVX-NEXT: vmovq {{.*#+}} xmm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
382382
; AVX-NEXT: vpshufb %xmm0, %xmm1, %xmm2
383383
; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
384384
; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3

llvm/test/CodeGen/X86/combine-subo.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,13 +217,13 @@ define { <4 x i8>, <4 x i1> } @always_usub_const_vector() nounwind {
217217
define { <4 x i8>, <4 x i1> } @never_usub_const_vector() nounwind {
218218
; SSE-LABEL: never_usub_const_vector:
219219
; SSE: # %bb.0:
220-
; SSE-NEXT: movaps {{.*#+}} xmm0 = [127,255,0,254,u,u,u,u,u,u,u,u,u,u,u,u]
220+
; SSE-NEXT: movss {{.*#+}} xmm0 = [127,255,0,254,0,0,0,0,0,0,0,0,0,0,0,0]
221221
; SSE-NEXT: xorps %xmm1, %xmm1
222222
; SSE-NEXT: retq
223223
;
224224
; AVX-LABEL: never_usub_const_vector:
225225
; AVX: # %bb.0:
226-
; AVX-NEXT: vbroadcastss {{.*#+}} xmm0 = [127,255,0,254,127,255,0,254,127,255,0,254,127,255,0,254]
226+
; AVX-NEXT: vmovss {{.*#+}} xmm0 = [127,255,0,254,0,0,0,0,0,0,0,0,0,0,0,0]
227227
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
228228
; AVX-NEXT: retq
229229
%x = call { <4 x i8>, <4 x i1> } @llvm.usub.with.overflow.v4i8(<4 x i8> <i8 255, i8 255, i8 255, i8 255>, <4 x i8> <i8 128, i8 0, i8 255, i8 1>)

llvm/test/CodeGen/X86/constant-pool-sharing.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ define void @store_repeated_constants(ptr %lo, ptr %hi) {
7777
; SSE-LINUX: # %bb.0:
7878
; SSE-LINUX-NEXT: xorps %xmm0, %xmm0
7979
; SSE-LINUX-NEXT: movaps %xmm0, 48(%rdi)
80-
; SSE-LINUX-NEXT: movaps {{.*#+}} xmm1 = [18446744073709551615,0]
80+
; SSE-LINUX-NEXT: movsd {{.*#+}} xmm1 = [18446744073709551615,0]
8181
; SSE-LINUX-NEXT: movaps %xmm1, 32(%rdi)
8282
; SSE-LINUX-NEXT: movaps %xmm1, 16(%rdi)
8383
; SSE-LINUX-NEXT: movaps %xmm1, (%rdi)
@@ -92,7 +92,7 @@ define void @store_repeated_constants(ptr %lo, ptr %hi) {
9292
; SSE-MSVC: # %bb.0:
9393
; SSE-MSVC-NEXT: xorps %xmm0, %xmm0
9494
; SSE-MSVC-NEXT: movaps %xmm0, 48(%rcx)
95-
; SSE-MSVC-NEXT: movaps {{.*#+}} xmm1 = [18446744073709551615,0]
95+
; SSE-MSVC-NEXT: movsd {{.*#+}} xmm1 = [18446744073709551615,0]
9696
; SSE-MSVC-NEXT: movaps %xmm1, 32(%rcx)
9797
; SSE-MSVC-NEXT: movaps %xmm1, 16(%rcx)
9898
; SSE-MSVC-NEXT: movaps %xmm1, (%rcx)

llvm/test/CodeGen/X86/dpbusd.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ define i32 @vpdpbusd_2xi32(ptr%a, ptr%b, i32 %c, i32 %n) {
379379
; AVX512VNNI-LABEL: vpdpbusd_2xi32:
380380
; AVX512VNNI: # %bb.0: # %entry
381381
; AVX512VNNI-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
382-
; AVX512VNNI-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,0,0,0]
382+
; AVX512VNNI-NEXT: vmovd {{.*#+}} xmm1 = [65535,0,0,0]
383383
; AVX512VNNI-NEXT: vpandq %zmm1, %zmm0, %zmm0
384384
; AVX512VNNI-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
385385
; AVX512VNNI-NEXT: vpandq %zmm1, %zmm2, %zmm1

llvm/test/CodeGen/X86/dpbusd_const.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ define i32 @mul_4xi8_cs(<4 x i8> %a, i32 %c) {
108108
; AVXVNNI: # %bb.0: # %entry
109109
; AVXVNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
110110
; AVXVNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
111-
; AVXVNNI-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,255,0,0,0,0,0,0,0,0,0,0,0,0]
111+
; AVXVNNI-NEXT: vmovd {{.*#+}} xmm2 = [0,1,2,255,0,0,0,0,0,0,0,0,0,0,0,0]
112112
; AVXVNNI-NEXT: {vex} vpdpbusd %xmm0, %xmm2, %xmm1
113113
; AVXVNNI-NEXT: vmovd %xmm1, %eax
114114
; AVXVNNI-NEXT: addl %edi, %eax
@@ -118,7 +118,7 @@ define i32 @mul_4xi8_cs(<4 x i8> %a, i32 %c) {
118118
; AVX512VNNI: # %bb.0: # %entry
119119
; AVX512VNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
120120
; AVX512VNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
121-
; AVX512VNNI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,255,0,0,0,0,0,0,0,0,0,0,0,0]
121+
; AVX512VNNI-NEXT: vmovd {{.*#+}} xmm1 = [0,1,2,255,0,0,0,0,0,0,0,0,0,0,0,0]
122122
; AVX512VNNI-NEXT: vpxor %xmm2, %xmm2, %xmm2
123123
; AVX512VNNI-NEXT: vpdpbusd %zmm0, %zmm1, %zmm2
124124
; AVX512VNNI-NEXT: vmovd %xmm2, %eax
@@ -130,7 +130,7 @@ define i32 @mul_4xi8_cs(<4 x i8> %a, i32 %c) {
130130
; AVX512VLVNNI: # %bb.0: # %entry
131131
; AVX512VLVNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
132132
; AVX512VLVNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
133-
; AVX512VLVNNI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,255,0,0,0,0,0,0,0,0,0,0,0,0]
133+
; AVX512VLVNNI-NEXT: vmovd {{.*#+}} xmm1 = [0,1,2,255,0,0,0,0,0,0,0,0,0,0,0,0]
134134
; AVX512VLVNNI-NEXT: vpxor %xmm2, %xmm2, %xmm2
135135
; AVX512VLVNNI-NEXT: vpdpbusd %xmm0, %xmm1, %xmm2
136136
; AVX512VLVNNI-NEXT: vmovd %xmm2, %eax

llvm/test/CodeGen/X86/expand-vp-cast-intrinsics.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,7 @@ define <2 x half> @vfptrunc_v2f16_v2f64(<2 x double> %a, <2 x i1> %m, i32 zeroex
532532
; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
533533
; AVX512-NEXT: callq __truncdfhf2@PLT
534534
; AVX512-NEXT: vpbroadcastw %xmm0, %xmm1
535-
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [4,0,0,0]
535+
; AVX512-NEXT: vmovss {{.*#+}} xmm0 = [4,0,0,0]
536536
; AVX512-NEXT: vpermi2ps (%rsp), %xmm1, %xmm0 # 16-byte Folded Reload
537537
; AVX512-NEXT: addq $40, %rsp
538538
; AVX512-NEXT: .cfi_def_cfa_offset 8

llvm/test/CodeGen/X86/fcmp-constant.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ define <2 x i64> @fcmp_ueq_v2f64_undef() {
9292
define <2 x i64> @fcmp_ueq_v2f64_undef_elt() {
9393
; CHECK-LABEL: fcmp_ueq_v2f64_undef_elt:
9494
; CHECK: # %bb.0:
95-
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551615,0]
95+
; CHECK-NEXT: movsd {{.*#+}} xmm0 = [18446744073709551615,0]
9696
; CHECK-NEXT: retq
9797
%1 = fcmp ueq <2 x double> <double 0x3FF0000000000000, double 0xFFEFFFFFFFFFFFFF>, <double undef, double 0x3FF0000000000000>
9898
%2 = sext <2 x i1> %1 to <2 x i64>

0 commit comments

Comments
 (0)