Skip to content

Commit a4124e4

Browse files
committed
[X86] When storing v1i1/v2i1/v4i1 to memory, make sure we store zeros in the rest of the byte
We can't store garbage in the unused bits. It possible that something like zextload from i1/i2/i4 is created to read the memory. Those zextloads would be legalized assuming the extra bits are 0. I'm not sure that the code in lowerStore is executed for the v1i1/v2i1/v4i1 case. It looks like the DAG combine in combineStore may have converted them to v8i1 first. And I think we're missing some cases to avoid going to the stack in the first place. But I don't have time to investigate those things at the moment so I wanted to focus on the correctness issue. Should fix PR48147. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D91294
1 parent 77efb73 commit a4124e4

13 files changed

+313
-106
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23870,17 +23870,22 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
2387023870
// Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores.
2387123871
if (StoredVal.getValueType().isVector() &&
2387223872
StoredVal.getValueType().getVectorElementType() == MVT::i1) {
23873-
assert(StoredVal.getValueType().getVectorNumElements() <= 8 &&
23874-
"Unexpected VT");
23873+
unsigned NumElts = StoredVal.getValueType().getVectorNumElements();
23874+
assert(NumElts <= 8 && "Unexpected VT");
2387523875
assert(!St->isTruncatingStore() && "Expected non-truncating store");
2387623876
assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
2387723877
"Expected AVX512F without AVX512DQI");
2387823878

23879+
// We must pad with zeros to ensure we store zeroes to any unused bits.
2387923880
StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
2388023881
DAG.getUNDEF(MVT::v16i1), StoredVal,
2388123882
DAG.getIntPtrConstant(0, dl));
2388223883
StoredVal = DAG.getBitcast(MVT::i16, StoredVal);
2388323884
StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal);
23885+
// Make sure we store zeros in the extra bits.
23886+
if (NumElts < 8)
23887+
StoredVal = DAG.getZeroExtendInReg(StoredVal, dl,
23888+
MVT::getIntegerVT(NumElts));
2388423889

2388523890
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
2388623891
St->getPointerInfo(), St->getOriginalAlign(),
@@ -44971,17 +44976,21 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
4497144976
if (VT == MVT::v1i1 && VT == StVT && Subtarget.hasAVX512() &&
4497244977
StoredVal.getOpcode() == ISD::SCALAR_TO_VECTOR &&
4497344978
StoredVal.getOperand(0).getValueType() == MVT::i8) {
44974-
return DAG.getStore(St->getChain(), dl, StoredVal.getOperand(0),
44979+
SDValue Val = StoredVal.getOperand(0);
44980+
// We must store zeros to the unused bits.
44981+
Val = DAG.getZeroExtendInReg(Val, dl, MVT::i1);
44982+
return DAG.getStore(St->getChain(), dl, Val,
4497544983
St->getBasePtr(), St->getPointerInfo(),
4497644984
St->getOriginalAlign(),
4497744985
St->getMemOperand()->getFlags());
4497844986
}
4497944987

4498044988
// Widen v2i1/v4i1 stores to v8i1.
44981-
if ((VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
44989+
if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
4498244990
Subtarget.hasAVX512()) {
4498344991
unsigned NumConcats = 8 / VT.getVectorNumElements();
44984-
SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(VT));
44992+
// We must store zeros to the unused bits.
44993+
SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT));
4498544994
Ops[0] = StoredVal;
4498644995
StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
4498744996
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2921,9 +2921,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
29212921

29222922
// Load/store kreg
29232923
let Predicates = [HasDQI] in {
2924-
def : Pat<(store VK1:$src, addr:$dst),
2925-
(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2926-
29272924
def : Pat<(v1i1 (load addr:$src)),
29282925
(COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
29292926
def : Pat<(v2i1 (load addr:$src)),

llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -593,13 +593,17 @@ define void @load_v2i1_broadcast_1_v1i1_store(<2 x i1>* %a0,<1 x i1>* %a1) {
593593
; AVX512: # %bb.0:
594594
; AVX512-NEXT: kmovb (%rdi), %k0
595595
; AVX512-NEXT: kshiftrb $1, %k0, %k0
596+
; AVX512-NEXT: kshiftlb $7, %k0, %k0
597+
; AVX512-NEXT: kshiftrb $7, %k0, %k0
596598
; AVX512-NEXT: kmovb %k0, (%rsi)
597599
; AVX512-NEXT: retq
598600
;
599601
; AVX512NOTDQ-LABEL: load_v2i1_broadcast_1_v1i1_store:
600602
; AVX512NOTDQ: # %bb.0:
601603
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
602604
; AVX512NOTDQ-NEXT: kshiftrw $1, %k0, %k0
605+
; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
606+
; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
603607
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
604608
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
605609
; AVX512NOTDQ-NEXT: retq
@@ -619,6 +623,8 @@ define void @load_v3i1_broadcast_1_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) {
619623
; AVX512-NEXT: cmovel %ecx, %eax
620624
; AVX512-NEXT: kmovd %eax, %k0
621625
; AVX512-NEXT: kshiftrb $1, %k0, %k0
626+
; AVX512-NEXT: kshiftlb $7, %k0, %k0
627+
; AVX512-NEXT: kshiftrb $7, %k0, %k0
622628
; AVX512-NEXT: kmovb %k0, (%rsi)
623629
; AVX512-NEXT: retq
624630
;
@@ -632,6 +638,8 @@ define void @load_v3i1_broadcast_1_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) {
632638
; AVX512NOTDQ-NEXT: cmovel %ecx, %eax
633639
; AVX512NOTDQ-NEXT: kmovd %eax, %k0
634640
; AVX512NOTDQ-NEXT: kshiftrw $1, %k0, %k0
641+
; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
642+
; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
635643
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
636644
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
637645
; AVX512NOTDQ-NEXT: retq
@@ -649,6 +657,8 @@ define void @load_v3i1_broadcast_2_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) {
649657
; AVX512-NEXT: cmovel %eax, %ecx
650658
; AVX512-NEXT: kmovd %ecx, %k0
651659
; AVX512-NEXT: kshiftrb $2, %k0, %k0
660+
; AVX512-NEXT: kshiftlb $7, %k0, %k0
661+
; AVX512-NEXT: kshiftrb $7, %k0, %k0
652662
; AVX512-NEXT: kmovb %k0, (%rsi)
653663
; AVX512-NEXT: retq
654664
;
@@ -660,6 +670,8 @@ define void @load_v3i1_broadcast_2_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) {
660670
; AVX512NOTDQ-NEXT: cmovel %eax, %ecx
661671
; AVX512NOTDQ-NEXT: kmovd %ecx, %k0
662672
; AVX512NOTDQ-NEXT: kshiftrw $2, %k0, %k0
673+
; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
674+
; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
663675
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
664676
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
665677
; AVX512NOTDQ-NEXT: retq
@@ -673,13 +685,17 @@ define void @load_v4i1_broadcast_2_v1i1_store(<4 x i1>* %a0,<1 x i1>* %a1) {
673685
; AVX512: # %bb.0:
674686
; AVX512-NEXT: kmovb (%rdi), %k0
675687
; AVX512-NEXT: kshiftrb $2, %k0, %k0
688+
; AVX512-NEXT: kshiftlb $7, %k0, %k0
689+
; AVX512-NEXT: kshiftrb $7, %k0, %k0
676690
; AVX512-NEXT: kmovb %k0, (%rsi)
677691
; AVX512-NEXT: retq
678692
;
679693
; AVX512NOTDQ-LABEL: load_v4i1_broadcast_2_v1i1_store:
680694
; AVX512NOTDQ: # %bb.0:
681695
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
682696
; AVX512NOTDQ-NEXT: kshiftrw $2, %k0, %k0
697+
; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
698+
; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
683699
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
684700
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
685701
; AVX512NOTDQ-NEXT: retq
@@ -693,13 +709,17 @@ define void @load_v4i1_broadcast_3_v1i1_store(<4 x i1>* %a0,<1 x i1>* %a1) {
693709
; AVX512: # %bb.0:
694710
; AVX512-NEXT: kmovb (%rdi), %k0
695711
; AVX512-NEXT: kshiftrb $3, %k0, %k0
712+
; AVX512-NEXT: kshiftlb $7, %k0, %k0
713+
; AVX512-NEXT: kshiftrb $7, %k0, %k0
696714
; AVX512-NEXT: kmovb %k0, (%rsi)
697715
; AVX512-NEXT: retq
698716
;
699717
; AVX512NOTDQ-LABEL: load_v4i1_broadcast_3_v1i1_store:
700718
; AVX512NOTDQ: # %bb.0:
701719
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
702720
; AVX512NOTDQ-NEXT: kshiftrw $3, %k0, %k0
721+
; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
722+
; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
703723
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
704724
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
705725
; AVX512NOTDQ-NEXT: retq
@@ -713,13 +733,17 @@ define void @load_v8i1_broadcast_4_v1i1_store(<8 x i1>* %a0,<1 x i1>* %a1) {
713733
; AVX512: # %bb.0:
714734
; AVX512-NEXT: kmovb (%rdi), %k0
715735
; AVX512-NEXT: kshiftrb $4, %k0, %k0
736+
; AVX512-NEXT: kshiftlb $7, %k0, %k0
737+
; AVX512-NEXT: kshiftrb $7, %k0, %k0
716738
; AVX512-NEXT: kmovb %k0, (%rsi)
717739
; AVX512-NEXT: retq
718740
;
719741
; AVX512NOTDQ-LABEL: load_v8i1_broadcast_4_v1i1_store:
720742
; AVX512NOTDQ: # %bb.0:
721743
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
722744
; AVX512NOTDQ-NEXT: kshiftrw $4, %k0, %k0
745+
; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
746+
; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
723747
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
724748
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
725749
; AVX512NOTDQ-NEXT: retq
@@ -760,13 +784,17 @@ define void @load_v8i1_broadcast_7_v1i1_store(<8 x i1>* %a0,<1 x i1>* %a1) {
760784
; AVX512: # %bb.0:
761785
; AVX512-NEXT: kmovb (%rdi), %k0
762786
; AVX512-NEXT: kshiftrb $7, %k0, %k0
787+
; AVX512-NEXT: kshiftlb $7, %k0, %k0
788+
; AVX512-NEXT: kshiftrb $7, %k0, %k0
763789
; AVX512-NEXT: kmovb %k0, (%rsi)
764790
; AVX512-NEXT: retq
765791
;
766792
; AVX512NOTDQ-LABEL: load_v8i1_broadcast_7_v1i1_store:
767793
; AVX512NOTDQ: # %bb.0:
768794
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
769795
; AVX512NOTDQ-NEXT: kshiftrw $7, %k0, %k0
796+
; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
797+
; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
770798
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
771799
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
772800
; AVX512NOTDQ-NEXT: retq
@@ -807,13 +835,17 @@ define void @load_v16i1_broadcast_8_v1i1_store(<16 x i1>* %a0,<1 x i1>* %a1) {
807835
; AVX512: # %bb.0:
808836
; AVX512-NEXT: kmovw (%rdi), %k0
809837
; AVX512-NEXT: kshiftrw $8, %k0, %k0
838+
; AVX512-NEXT: kshiftlb $7, %k0, %k0
839+
; AVX512-NEXT: kshiftrb $7, %k0, %k0
810840
; AVX512-NEXT: kmovb %k0, (%rsi)
811841
; AVX512-NEXT: retq
812842
;
813843
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v1i1_store:
814844
; AVX512NOTDQ: # %bb.0:
815845
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
816846
; AVX512NOTDQ-NEXT: kshiftrw $8, %k0, %k0
847+
; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
848+
; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
817849
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
818850
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
819851
; AVX512NOTDQ-NEXT: retq
@@ -881,13 +913,17 @@ define void @load_v16i1_broadcast_15_v1i1_store(<16 x i1>* %a0,<1 x i1>* %a1) {
881913
; AVX512: # %bb.0:
882914
; AVX512-NEXT: kmovw (%rdi), %k0
883915
; AVX512-NEXT: kshiftrw $15, %k0, %k0
916+
; AVX512-NEXT: kshiftlb $7, %k0, %k0
917+
; AVX512-NEXT: kshiftrb $7, %k0, %k0
884918
; AVX512-NEXT: kmovb %k0, (%rsi)
885919
; AVX512-NEXT: retq
886920
;
887921
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v1i1_store:
888922
; AVX512NOTDQ: # %bb.0:
889923
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
890924
; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
925+
; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
926+
; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
891927
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
892928
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
893929
; AVX512NOTDQ-NEXT: retq
@@ -955,13 +991,17 @@ define void @load_v32i1_broadcast_16_v1i1_store(<32 x i1>* %a0,<1 x i1>* %a1) {
955991
; AVX512: # %bb.0:
956992
; AVX512-NEXT: kmovd (%rdi), %k0
957993
; AVX512-NEXT: kshiftrd $16, %k0, %k0
994+
; AVX512-NEXT: kshiftlb $7, %k0, %k0
995+
; AVX512-NEXT: kshiftrb $7, %k0, %k0
958996
; AVX512-NEXT: kmovb %k0, (%rsi)
959997
; AVX512-NEXT: retq
960998
;
961999
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v1i1_store:
9621000
; AVX512NOTDQ: # %bb.0:
9631001
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
9641002
; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k0
1003+
; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
1004+
; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
9651005
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
9661006
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
9671007
; AVX512NOTDQ-NEXT: retq
@@ -1056,13 +1096,17 @@ define void @load_v32i1_broadcast_31_v1i1_store(<32 x i1>* %a0,<1 x i1>* %a1) {
10561096
; AVX512: # %bb.0:
10571097
; AVX512-NEXT: kmovd (%rdi), %k0
10581098
; AVX512-NEXT: kshiftrd $31, %k0, %k0
1099+
; AVX512-NEXT: kshiftlb $7, %k0, %k0
1100+
; AVX512-NEXT: kshiftrb $7, %k0, %k0
10591101
; AVX512-NEXT: kmovb %k0, (%rsi)
10601102
; AVX512-NEXT: retq
10611103
;
10621104
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v1i1_store:
10631105
; AVX512NOTDQ: # %bb.0:
10641106
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
10651107
; AVX512NOTDQ-NEXT: kshiftrd $31, %k0, %k0
1108+
; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
1109+
; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
10661110
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
10671111
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
10681112
; AVX512NOTDQ-NEXT: retq
@@ -1160,13 +1204,17 @@ define void @load_v64i1_broadcast_32_v1i1_store(<64 x i1>* %a0,<1 x i1>* %a1) {
11601204
; AVX512: # %bb.0:
11611205
; AVX512-NEXT: kmovq (%rdi), %k0
11621206
; AVX512-NEXT: kshiftrq $32, %k0, %k0
1207+
; AVX512-NEXT: kshiftlb $7, %k0, %k0
1208+
; AVX512-NEXT: kshiftrb $7, %k0, %k0
11631209
; AVX512-NEXT: kmovb %k0, (%rsi)
11641210
; AVX512-NEXT: retq
11651211
;
11661212
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v1i1_store:
11671213
; AVX512NOTDQ: # %bb.0:
11681214
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
11691215
; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k0
1216+
; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
1217+
; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
11701218
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
11711219
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
11721220
; AVX512NOTDQ-NEXT: retq
@@ -1286,13 +1334,17 @@ define void @load_v64i1_broadcast_63_v1i1_store(<64 x i1>* %a0,<1 x i1>* %a1) {
12861334
; AVX512: # %bb.0:
12871335
; AVX512-NEXT: kmovq (%rdi), %k0
12881336
; AVX512-NEXT: kshiftrq $63, %k0, %k0
1337+
; AVX512-NEXT: kshiftlb $7, %k0, %k0
1338+
; AVX512-NEXT: kshiftrb $7, %k0, %k0
12891339
; AVX512-NEXT: kmovb %k0, (%rsi)
12901340
; AVX512-NEXT: retq
12911341
;
12921342
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v1i1_store:
12931343
; AVX512NOTDQ: # %bb.0:
12941344
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
12951345
; AVX512NOTDQ-NEXT: kshiftrq $63, %k0, %k0
1346+
; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
1347+
; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
12961348
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
12971349
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
12981350
; AVX512NOTDQ-NEXT: retq

0 commit comments

Comments
 (0)