Skip to content

Commit bef25ae

Browse files
committed
[X86] X86FixupVectorConstants - use explicit register bitwidth for the loaded vector instead of using constant pool bitwidth
Fixes llvm#81136 - we might be loading from a constant pool entry wider than the destination register bitwidth, affecting the vextload scale calculation. ConvertToBroadcastAVX512 doesn't yet set an explicit bitwidth (it will default to the constant pool bitwidth) due to difficulties in looking up the original register width through the fold tables, but as we only use rebuildSplatCst this shouldn't cause any miscompilations, although it might prevent folding to broadcast if only the lower bits match a splatable pattern.
1 parent af97edf commit bef25ae

File tree

2 files changed

+21
-17
lines changed

2 files changed

+21
-17
lines changed

llvm/lib/Target/X86/X86FixupVectorConstants.cpp

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ static Constant *rebuildConstant(LLVMContext &Ctx, Type *SclTy,
226226
// width, built up of potentially smaller scalar values.
227227
static Constant *rebuildSplatCst(const Constant *C, unsigned /*NumBits*/,
228228
unsigned /*NumElts*/, unsigned SplatBitWidth) {
229+
// TODO: Truncate to NumBits once ConvertToBroadcastAVX512 support this.
229230
std::optional<APInt> Splat = getSplatableConstant(C, SplatBitWidth);
230231
if (!Splat)
231232
return nullptr;
@@ -328,7 +329,8 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
328329
std::function<Constant *(const Constant *, unsigned, unsigned, unsigned)>
329330
RebuildConstant;
330331
};
331-
auto FixupConstant = [&](ArrayRef<FixupEntry> Fixups, unsigned OperandNo) {
332+
auto FixupConstant = [&](ArrayRef<FixupEntry> Fixups, unsigned RegBitWidth,
333+
unsigned OperandNo) {
332334
#ifdef EXPENSIVE_CHECKS
333335
assert(llvm::is_sorted(Fixups,
334336
[](const FixupEntry &A, const FixupEntry &B) {
@@ -340,7 +342,8 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
340342
assert(MI.getNumOperands() >= (OperandNo + X86::AddrNumOperands) &&
341343
"Unexpected number of operands!");
342344
if (auto *C = X86::getConstantFromPool(MI, OperandNo)) {
343-
unsigned RegBitWidth = C->getType()->getPrimitiveSizeInBits();
345+
RegBitWidth =
346+
RegBitWidth ? RegBitWidth : C->getType()->getPrimitiveSizeInBits();
344347
for (const FixupEntry &Fixup : Fixups) {
345348
if (Fixup.Op) {
346349
// Construct a suitable constant and adjust the MI to use the new
@@ -377,7 +380,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
377380
// TODO: SSE3 MOVDDUP Handling
378381
return FixupConstant({{X86::MOVSSrm, 1, 32, rebuildZeroUpperCst},
379382
{X86::MOVSDrm, 1, 64, rebuildZeroUpperCst}},
380-
1);
383+
128, 1);
381384
case X86::VMOVAPDrm:
382385
case X86::VMOVAPSrm:
383386
case X86::VMOVUPDrm:
@@ -386,15 +389,15 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
386389
{X86::VBROADCASTSSrm, 1, 32, rebuildSplatCst},
387390
{X86::VMOVSDrm, 1, 64, rebuildZeroUpperCst},
388391
{X86::VMOVDDUPrm, 1, 64, rebuildSplatCst}},
389-
1);
392+
128, 1);
390393
case X86::VMOVAPDYrm:
391394
case X86::VMOVAPSYrm:
392395
case X86::VMOVUPDYrm:
393396
case X86::VMOVUPSYrm:
394397
return FixupConstant({{X86::VBROADCASTSSYrm, 1, 32, rebuildSplatCst},
395398
{X86::VBROADCASTSDYrm, 1, 64, rebuildSplatCst},
396399
{X86::VBROADCASTF128rm, 1, 128, rebuildSplatCst}},
397-
1);
400+
256, 1);
398401
case X86::VMOVAPDZ128rm:
399402
case X86::VMOVAPSZ128rm:
400403
case X86::VMOVUPDZ128rm:
@@ -403,7 +406,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
403406
{X86::VBROADCASTSSZ128rm, 1, 32, rebuildSplatCst},
404407
{X86::VMOVSDZrm, 1, 64, rebuildZeroUpperCst},
405408
{X86::VMOVDDUPZ128rm, 1, 64, rebuildSplatCst}},
406-
1);
409+
128, 1);
407410
case X86::VMOVAPDZ256rm:
408411
case X86::VMOVAPSZ256rm:
409412
case X86::VMOVUPDZ256rm:
@@ -412,7 +415,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
412415
{{X86::VBROADCASTSSZ256rm, 1, 32, rebuildSplatCst},
413416
{X86::VBROADCASTSDZ256rm, 1, 64, rebuildSplatCst},
414417
{X86::VBROADCASTF32X4Z256rm, 1, 128, rebuildSplatCst}},
415-
1);
418+
256, 1);
416419
case X86::VMOVAPDZrm:
417420
case X86::VMOVAPSZrm:
418421
case X86::VMOVUPDZrm:
@@ -421,7 +424,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
421424
{X86::VBROADCASTSDZrm, 1, 64, rebuildSplatCst},
422425
{X86::VBROADCASTF32X4rm, 1, 128, rebuildSplatCst},
423426
{X86::VBROADCASTF64X4rm, 1, 256, rebuildSplatCst}},
424-
1);
427+
512, 1);
425428
/* Integer Loads */
426429
case X86::MOVDQArm:
427430
case X86::MOVDQUrm: {
@@ -440,7 +443,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
440443
{HasSSE41 ? X86::PMOVZXWDrm : 0, 4, 16, rebuildZExtCst},
441444
{HasSSE41 ? X86::PMOVSXDQrm : 0, 2, 32, rebuildSExtCst},
442445
{HasSSE41 ? X86::PMOVZXDQrm : 0, 2, 32, rebuildZExtCst}};
443-
return FixupConstant(Fixups, 1);
446+
return FixupConstant(Fixups, 128, 1);
444447
}
445448
case X86::VMOVDQArm:
446449
case X86::VMOVDQUrm: {
@@ -465,7 +468,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
465468
{X86::VPMOVZXWDrm, 4, 16, rebuildZExtCst},
466469
{X86::VPMOVSXDQrm, 2, 32, rebuildSExtCst},
467470
{X86::VPMOVZXDQrm, 2, 32, rebuildZExtCst}};
468-
return FixupConstant(Fixups, 1);
471+
return FixupConstant(Fixups, 128, 1);
469472
}
470473
case X86::VMOVDQAYrm:
471474
case X86::VMOVDQUYrm: {
@@ -490,7 +493,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
490493
{HasAVX2 ? X86::VPMOVZXWDYrm : 0, 8, 16, rebuildZExtCst},
491494
{HasAVX2 ? X86::VPMOVSXDQYrm : 0, 4, 32, rebuildSExtCst},
492495
{HasAVX2 ? X86::VPMOVZXDQYrm : 0, 4, 32, rebuildZExtCst}};
493-
return FixupConstant(Fixups, 1);
496+
return FixupConstant(Fixups, 256, 1);
494497
}
495498
case X86::VMOVDQA32Z128rm:
496499
case X86::VMOVDQA64Z128rm:
@@ -515,7 +518,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
515518
{X86::VPMOVZXWDZ128rm, 4, 16, rebuildZExtCst},
516519
{X86::VPMOVSXDQZ128rm, 2, 32, rebuildSExtCst},
517520
{X86::VPMOVZXDQZ128rm, 2, 32, rebuildZExtCst}};
518-
return FixupConstant(Fixups, 1);
521+
return FixupConstant(Fixups, 128, 1);
519522
}
520523
case X86::VMOVDQA32Z256rm:
521524
case X86::VMOVDQA64Z256rm:
@@ -539,7 +542,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
539542
{X86::VPMOVZXWDZ256rm, 8, 16, rebuildZExtCst},
540543
{X86::VPMOVSXDQZ256rm, 4, 32, rebuildSExtCst},
541544
{X86::VPMOVZXDQZ256rm, 4, 32, rebuildZExtCst}};
542-
return FixupConstant(Fixups, 1);
545+
return FixupConstant(Fixups, 256, 1);
543546
}
544547
case X86::VMOVDQA32Zrm:
545548
case X86::VMOVDQA64Zrm:
@@ -564,7 +567,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
564567
{X86::VPMOVZXWDZrm, 16, 16, rebuildZExtCst},
565568
{X86::VPMOVSXDQZrm, 8, 32, rebuildSExtCst},
566569
{X86::VPMOVZXDQZrm, 8, 32, rebuildZExtCst}};
567-
return FixupConstant(Fixups, 1);
570+
return FixupConstant(Fixups, 512, 1);
568571
}
569572
}
570573

@@ -592,7 +595,9 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
592595
unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
593596
FixupEntry Fixups[] = {{(int)OpBcst32, 32, 32, rebuildSplatCst},
594597
{(int)OpBcst64, 64, 64, rebuildSplatCst}};
595-
return FixupConstant(Fixups, OpNo);
598+
// TODO: Add support for RegBitWidth, but currently rebuildSplatCst
599+
// doesn't require it (defaults to Constant::getPrimitiveSizeInBits).
600+
return FixupConstant(Fixups, 0, OpNo);
596601
}
597602
return false;
598603
};

llvm/test/CodeGen/X86/pr81136.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
22
; RUN: llc < %s -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s
33

4-
; FIXME: Should be vpmovzxbq[128,1] instead of vpmovzxbd[128,1,0,0]
54
define i64 @PR81136(i32 %a0, i32 %a1, ptr %a2) {
65
; CHECK-LABEL: PR81136:
76
; CHECK: # %bb.0:
87
; CHECK-NEXT: vmovd %edi, %xmm0
98
; CHECK-NEXT: vmovd %esi, %xmm1
109
; CHECK-NEXT: vmovdqa (%rdx), %ymm2
1110
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
12-
; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm4 = [128,1,0,0]
11+
; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm4 = [128,1]
1312
; CHECK-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm4
1413
; CHECK-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1514
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1

0 commit comments

Comments
 (0)