Skip to content

Commit 534015e

Browse files
author
Elena Demikhovsky
committed
AVX-512: gather-scatter tests; added foldable instructions;
Specify GATHER/SCATTER as heavy instructions. llvm-svn: 189736
1 parent 843657c commit 534015e

File tree

2 files changed

+61
-15
lines changed

2 files changed

+61
-15
lines changed

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
375375
{ X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
376376
{ X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
377377
{ X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE },
378-
{ X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE }
378+
{ X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE },
379+
// AVX-512 foldable instructions
380+
{ X86::VMOVPDI2DIZrr,X86::VMOVPDI2DIZmr, TB_FOLDED_STORE }
379381
};
380382

381383
for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
@@ -581,6 +583,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
581583
{ X86::TZCNT16rr, X86::TZCNT16rm, 0 },
582584
{ X86::TZCNT32rr, X86::TZCNT32rm, 0 },
583585
{ X86::TZCNT64rr, X86::TZCNT64rm, 0 },
586+
587+
// AVX-512 foldable instructions
588+
{ X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 },
589+
{ X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 },
590+
{ X86::VMOVDQA32rr, X86::VMOVDQA32rm, TB_ALIGN_64 },
591+
{ X86::VMOVDQA64rr, X86::VMOVDQA64rm, TB_ALIGN_64 },
592+
{ X86::VMOVDQU32rr, X86::VMOVDQU32rm, 0 },
593+
{ X86::VMOVDQU64rr, X86::VMOVDQU64rm, 0 },
584594
};
585595

586596
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
@@ -1180,12 +1190,35 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
11801190
{ X86::PEXT64rr, X86::PEXT64rm, 0 },
11811191

11821192
// AVX-512 foldable instructions
1193+
{ X86::VPADDDZrr, X86::VPADDDZrm, 0 },
1194+
{ X86::VPADDQZrr, X86::VPADDQZrm, 0 },
1195+
{ X86::VADDPSZrr, X86::VADDPSZrm, 0 },
1196+
{ X86::VADDPDZrr, X86::VADDPDZrm, 0 },
1197+
{ X86::VSUBPSZrr, X86::VSUBPSZrm, 0 },
1198+
{ X86::VSUBPDZrr, X86::VSUBPDZrm, 0 },
1199+
{ X86::VMULPSZrr, X86::VMULPSZrm, 0 },
1200+
{ X86::VMULPDZrr, X86::VMULPDZrm, 0 },
1201+
{ X86::VDIVPSZrr, X86::VDIVPSZrm, 0 },
1202+
{ X86::VDIVPDZrr, X86::VDIVPDZrm, 0 },
1203+
{ X86::VMINPSZrr, X86::VMINPSZrm, 0 },
1204+
{ X86::VMINPDZrr, X86::VMINPDZrm, 0 },
1205+
{ X86::VMAXPSZrr, X86::VMAXPSZrm, 0 },
1206+
{ X86::VMAXPDZrr, X86::VMAXPDZrm, 0 },
11831207
{ X86::VPERMPDZri, X86::VPERMPDZmi, 0 },
11841208
{ X86::VPERMPSZrr, X86::VPERMPSZrm, 0 },
11851209
{ X86::VPERMI2Drr, X86::VPERMI2Drm, 0 },
11861210
{ X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 },
11871211
{ X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 },
11881212
{ X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 },
1213+
{ X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 },
1214+
{ X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 },
1215+
{ X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 },
1216+
{ X86::VPSRLVDZrr, X86::VPSRLVDZrm, 0 },
1217+
{ X86::VPSRLVQZrr, X86::VPSRLVQZrm, 0 },
1218+
{ X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 },
1219+
{ X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 },
1220+
{ X86::VALIGNQrri, X86::VALIGNQrmi, 0 },
1221+
{ X86::VALIGNDrri, X86::VALIGNDrmi, 0 },
11891222
};
11901223

11911224
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
@@ -4010,6 +4043,8 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
40104043
case X86::Int_VCVTSD2SSrr:
40114044
case X86::VCVTSS2SDrr:
40124045
case X86::Int_VCVTSS2SDrr:
4046+
case X86::VCVTSD2SSZrr:
4047+
case X86::VCVTSS2SDZrr:
40134048
case X86::VRCPSSr:
40144049
case X86::VROUNDSDr:
40154050
case X86::VROUNDSDr_Int:
@@ -5064,6 +5099,15 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
50645099
case X86::VSQRTSSm:
50655100
case X86::VSQRTSSm_Int:
50665101
case X86::VSQRTSSr:
5102+
5103+
case X86::VGATHERQPSZrm:
5104+
case X86::VGATHERQPDZrm:
5105+
case X86::VGATHERDPDZrm:
5106+
case X86::VGATHERDPSZrm:
5107+
case X86::VPGATHERQDZrm:
5108+
case X86::VPGATHERQQZrm:
5109+
case X86::VPGATHERDDZrm:
5110+
case X86::VPGATHERDQZrm:
50675111
return true;
50685112
}
50695113
}

llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ declare void @llvm.x86.avx512.scatter.qps.mask.512 (i8*, i8, <8 x i64>, <8 x flo
1010
declare <8 x double> @llvm.x86.avx512.gather.qpd.mask.512 (<8 x double>, i8, <8 x i64>, i8*, i32)
1111
declare void @llvm.x86.avx512.scatter.qpd.mask.512 (i8*, i8, <8 x i64>, <8 x double>, i32)
1212

13-
;CHECK: gather_mask_dps
13+
;CHECK-LABEL: gather_mask_dps
1414
;CHECK: kmovw
1515
;CHECK: vgatherdps
1616
;CHECK: vpadd
@@ -23,7 +23,7 @@ define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8*
2323
ret void
2424
}
2525

26-
;CHECK: gather_mask_dpd
26+
;CHECK-LABEL: gather_mask_dpd
2727
;CHECK: kmovw
2828
;CHECK: vgatherdpd
2929
;CHECK: vpadd
@@ -36,7 +36,7 @@ define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %b
3636
ret void
3737
}
3838

39-
;CHECK: gather_mask_qps
39+
;CHECK-LABEL: gather_mask_qps
4040
;CHECK: kmovw
4141
;CHECK: vgatherqps
4242
;CHECK: vpadd
@@ -49,7 +49,7 @@ define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %ba
4949
ret void
5050
}
5151

52-
;CHECK: gather_mask_qpd
52+
;CHECK-LABEL: gather_mask_qpd
5353
;CHECK: kmovw
5454
;CHECK: vgatherqpd
5555
;CHECK: vpadd
@@ -74,7 +74,7 @@ declare void @llvm.x86.avx512.scatter.qpi.mask.512 (i8*, i8, <8 x i64>, <8 x i32
7474
declare <8 x i64> @llvm.x86.avx512.gather.qpq.mask.512 (<8 x i64>, i8, <8 x i64>, i8*, i32)
7575
declare void @llvm.x86.avx512.scatter.qpq.mask.512 (i8*, i8, <8 x i64>, <8 x i64>, i32)
7676

77-
;CHECK: gather_mask_dd
77+
;CHECK-LABEL: gather_mask_dd
7878
;CHECK: kmovw
7979
;CHECK: vpgatherdd
8080
;CHECK: vpadd
@@ -87,7 +87,7 @@ define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %ba
8787
ret void
8888
}
8989

90-
;CHECK: gather_mask_qd
90+
;CHECK-LABEL: gather_mask_qd
9191
;CHECK: kmovw
9292
;CHECK: vpgatherqd
9393
;CHECK: vpadd
@@ -100,7 +100,7 @@ define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base,
100100
ret void
101101
}
102102

103-
;CHECK: gather_mask_qq
103+
;CHECK-LABEL: gather_mask_qq
104104
;CHECK: kmovw
105105
;CHECK: vpgatherqq
106106
;CHECK: vpadd
@@ -113,7 +113,7 @@ define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base,
113113
ret void
114114
}
115115

116-
;CHECK: gather_mask_dq
116+
;CHECK-LABEL: gather_mask_dq
117117
;CHECK: kmovw
118118
;CHECK: vpgatherdq
119119
;CHECK: vpadd
@@ -135,7 +135,7 @@ declare void @llvm.x86.avx512.scatter.qps.512 (i8*, <8 x i64>, <8 x float>, i32)
135135
declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x i64>, i8*, i32)
136136
declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, <8 x i64>, <8 x double>, i32)
137137

138-
;CHECK: gather_dps
138+
;CHECK-LABEL: gather_dps
139139
;CHECK: kxnorw
140140
;CHECK: vgatherdps
141141
;CHECK: vscatterdps
@@ -147,7 +147,7 @@ define void @gather_dps(<16 x i32> %ind, i8* %base, i8* %stbuf) {
147147
ret void
148148
}
149149

150-
;CHECK: gather_qps
150+
;CHECK-LABEL: gather_qps
151151
;CHECK: kxnorw
152152
;CHECK: vgatherqps
153153
;CHECK: vscatterqps
@@ -159,7 +159,7 @@ define void @gather_qps(<8 x i64> %ind, i8* %base, i8* %stbuf) {
159159
ret void
160160
}
161161

162-
;CHECK: gather_qpd
162+
;CHECK-LABEL: gather_qpd
163163
;CHECK: kxnorw
164164
;CHECK: vgatherqpd
165165
;CHECK: vpadd
@@ -184,7 +184,7 @@ declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, <8 x i64>, <8 x i32>, i32)
184184
declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, i32)
185185
declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, <8 x i64>, <8 x i64>, i32)
186186

187-
;CHECK: gather_dpi
187+
;CHECK-LABEL: gather_dpi
188188
;CHECK: kxnorw
189189
;CHECK: vpgatherdd
190190
;CHECK: vpscatterdd
@@ -196,7 +196,8 @@ define void @gather_dpi(<16 x i32> %ind, i8* %base, i8* %stbuf) {
196196
ret void
197197
}
198198

199-
;CHECK: gather_qpq
199+
;CHECK-LABEL: gather_qpq
200+
;CHECK: vpxord %zmm
200201
;CHECK: kxnorw
201202
;CHECK: vpgatherqq
202203
;CHECK: vpadd
@@ -209,7 +210,8 @@ define void @gather_qpq(<8 x i64> %ind, i8* %base, i8* %stbuf) {
209210
ret void
210211
}
211212

212-
;CHECK: gather_qpi
213+
;CHECK-LABEL: gather_qpi
214+
;CHECK: vpxor %ymm
213215
;CHECK: kxnorw
214216
;CHECK: vpgatherqd
215217
;CHECK: vpadd

0 commit comments

Comments
 (0)