Skip to content

Commit 4262d53

Browse files
committed
[AVX-512] Add the vector down convert instructions to the store folding tables.
llvm-svn: 283687
1 parent 7abb95d commit 4262d53

File tree

3 files changed

+239
-0
lines changed

3 files changed

+239
-0
lines changed

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,21 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
409409
{ X86::VMOVDQU16Zrr, X86::VMOVDQU16Zmr, TB_FOLDED_STORE },
410410
{ X86::VMOVDQU32Zrr, X86::VMOVDQU32Zmr, TB_FOLDED_STORE },
411411
{ X86::VMOVDQU64Zrr, X86::VMOVDQU64Zmr, TB_FOLDED_STORE },
412+
{ X86::VPMOVDBZrr, X86::VPMOVDBZmr, TB_FOLDED_STORE },
413+
{ X86::VPMOVDWZrr, X86::VPMOVDWZmr, TB_FOLDED_STORE },
414+
{ X86::VPMOVQDZrr, X86::VPMOVQDZmr, TB_FOLDED_STORE },
415+
{ X86::VPMOVQWZrr, X86::VPMOVQWZmr, TB_FOLDED_STORE },
416+
{ X86::VPMOVWBZrr, X86::VPMOVWBZmr, TB_FOLDED_STORE },
417+
{ X86::VPMOVSDBZrr, X86::VPMOVSDBZmr, TB_FOLDED_STORE },
418+
{ X86::VPMOVSDWZrr, X86::VPMOVSDWZmr, TB_FOLDED_STORE },
419+
{ X86::VPMOVSQDZrr, X86::VPMOVSQDZmr, TB_FOLDED_STORE },
420+
{ X86::VPMOVSQWZrr, X86::VPMOVSQWZmr, TB_FOLDED_STORE },
421+
{ X86::VPMOVSWBZrr, X86::VPMOVSWBZmr, TB_FOLDED_STORE },
422+
{ X86::VPMOVUSDBZrr, X86::VPMOVUSDBZmr, TB_FOLDED_STORE },
423+
{ X86::VPMOVUSDWZrr, X86::VPMOVUSDWZmr, TB_FOLDED_STORE },
424+
{ X86::VPMOVUSQDZrr, X86::VPMOVUSQDZmr, TB_FOLDED_STORE },
425+
{ X86::VPMOVUSQWZrr, X86::VPMOVUSQWZmr, TB_FOLDED_STORE },
426+
{ X86::VPMOVUSWBZrr, X86::VPMOVUSWBZmr, TB_FOLDED_STORE },
412427

413428
// AVX-512 foldable instructions (256-bit versions)
414429
{ X86::VMOVAPDZ256rr, X86::VMOVAPDZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
@@ -421,6 +436,15 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
421436
{ X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256mr, TB_FOLDED_STORE },
422437
{ X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256mr, TB_FOLDED_STORE },
423438
{ X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256mr, TB_FOLDED_STORE },
439+
{ X86::VPMOVDWZ256rr, X86::VPMOVDWZ256mr, TB_FOLDED_STORE },
440+
{ X86::VPMOVQDZ256rr, X86::VPMOVQDZ256mr, TB_FOLDED_STORE },
441+
{ X86::VPMOVWBZ256rr, X86::VPMOVWBZ256mr, TB_FOLDED_STORE },
442+
{ X86::VPMOVSDWZ256rr, X86::VPMOVSDWZ256mr, TB_FOLDED_STORE },
443+
{ X86::VPMOVSQDZ256rr, X86::VPMOVSQDZ256mr, TB_FOLDED_STORE },
444+
{ X86::VPMOVSWBZ256rr, X86::VPMOVSWBZ256mr, TB_FOLDED_STORE },
445+
{ X86::VPMOVUSDWZ256rr, X86::VPMOVUSDWZ256mr, TB_FOLDED_STORE },
446+
{ X86::VPMOVUSQDZ256rr, X86::VPMOVUSQDZ256mr, TB_FOLDED_STORE },
447+
{ X86::VPMOVUSWBZ256rr, X86::VPMOVUSWBZ256mr, TB_FOLDED_STORE },
424448

425449
// AVX-512 foldable instructions (128-bit versions)
426450
{ X86::VMOVAPDZ128rr, X86::VMOVAPDZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 },

llvm/test/CodeGen/X86/stack-folding-int-avx512.ll

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,3 +198,138 @@ define <8 x i64> @stack_fold_ternlogq(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x
198198
}
199199

200200
declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
201+
202+
define <16 x i8> @stack_fold_vpmovdb(<16 x i32> %a0) {
203+
;CHECK-LABEL: stack_fold_vpmovdb
204+
;CHECK: vpmovdb %zmm0, {{-?[0-9]*}}(%rsp) # 16-byte Folded Spill
205+
%1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %a0, <16 x i8> undef, i16 -1)
206+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
207+
ret <16 x i8> %1
208+
}
209+
declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
210+
211+
define <16 x i16> @stack_fold_vpmovdw(<16 x i32> %a0) {
212+
;CHECK-LABEL: stack_fold_vpmovdw
213+
;CHECK: vpmovdw %zmm0, {{-?[0-9]*}}(%rsp) # 32-byte Folded Spill
214+
%1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %a0, <16 x i16> undef, i16 -1)
215+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
216+
ret <16 x i16> %1
217+
}
218+
declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16)
219+
220+
define <8 x i32> @stack_fold_vpmovqd(<8 x i64> %a0) {
221+
;CHECK-LABEL: stack_fold_vpmovqd
222+
;CHECK: vpmovqd %zmm0, {{-?[0-9]*}}(%rsp) # 32-byte Folded Spill
223+
%1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %a0, <8 x i32> undef, i8 -1)
224+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
225+
ret <8 x i32> %1
226+
}
227+
declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8)
228+
229+
define <8 x i16> @stack_fold_vpmovqw(<8 x i64> %a0) {
230+
;CHECK-LABEL: stack_fold_vpmovqw
231+
;CHECK: vpmovqw %zmm0, {{-?[0-9]*}}(%rsp) # 16-byte Folded Spill
232+
%1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %a0, <8 x i16> undef, i8 -1)
233+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
234+
ret <8 x i16> %1
235+
}
236+
declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
237+
238+
define <32 x i8> @stack_fold_vpmovwb(<32 x i16> %a0) {
239+
;CHECK-LABEL: stack_fold_vpmovwb
240+
;CHECK: vpmovwb %zmm0, {{-?[0-9]*}}(%rsp) # 32-byte Folded Spill
241+
%1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %a0, <32 x i8> undef, i32 -1)
242+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
243+
ret <32 x i8> %1
244+
}
245+
declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32)
246+
247+
define <16 x i8> @stack_fold_vpmovsdb(<16 x i32> %a0) {
248+
;CHECK-LABEL: stack_fold_vpmovsdb
249+
;CHECK: vpmovsdb %zmm0, {{-?[0-9]*}}(%rsp) # 16-byte Folded Spill
250+
%1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %a0, <16 x i8> undef, i16 -1)
251+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
252+
ret <16 x i8> %1
253+
}
254+
declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16)
255+
256+
define <16 x i16> @stack_fold_vpmovsdw(<16 x i32> %a0) {
257+
;CHECK-LABEL: stack_fold_vpmovsdw
258+
;CHECK: vpmovsdw %zmm0, {{-?[0-9]*}}(%rsp) # 32-byte Folded Spill
259+
%1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %a0, <16 x i16> undef, i16 -1)
260+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
261+
ret <16 x i16> %1
262+
}
263+
declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16)
264+
265+
define <8 x i32> @stack_fold_vpmovsqd(<8 x i64> %a0) {
266+
;CHECK-LABEL: stack_fold_vpmovsqd
267+
;CHECK: vpmovsqd %zmm0, {{-?[0-9]*}}(%rsp) # 32-byte Folded Spill
268+
%1 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %a0, <8 x i32> undef, i8 -1)
269+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
270+
ret <8 x i32> %1
271+
}
272+
declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8)
273+
274+
define <8 x i16> @stack_fold_vpmovsqw(<8 x i64> %a0) {
275+
;CHECK-LABEL: stack_fold_vpmovsqw
276+
;CHECK: vpmovsqw %zmm0, {{-?[0-9]*}}(%rsp) # 16-byte Folded Spill
277+
%1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %a0, <8 x i16> undef, i8 -1)
278+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
279+
ret <8 x i16> %1
280+
}
281+
declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8)
282+
283+
define <32 x i8> @stack_fold_vpmovswb(<32 x i16> %a0) {
284+
;CHECK-LABEL: stack_fold_vpmovswb
285+
;CHECK: vpmovswb %zmm0, {{-?[0-9]*}}(%rsp) # 32-byte Folded Spill
286+
%1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %a0, <32 x i8> undef, i32 -1)
287+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
288+
ret <32 x i8> %1
289+
}
290+
declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32)
291+
292+
define <16 x i8> @stack_fold_vpmovusdb(<16 x i32> %a0) {
293+
;CHECK-LABEL: stack_fold_vpmovusdb
294+
;CHECK: vpmovusdb %zmm0, {{-?[0-9]*}}(%rsp) # 16-byte Folded Spill
295+
%1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %a0, <16 x i8> undef, i16 -1)
296+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
297+
ret <16 x i8> %1
298+
}
299+
declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16)
300+
301+
define <16 x i16> @stack_fold_vpmovusdw(<16 x i32> %a0) {
302+
;CHECK-LABEL: stack_fold_vpmovusdw
303+
;CHECK: vpmovusdw %zmm0, {{-?[0-9]*}}(%rsp) # 32-byte Folded Spill
304+
%1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %a0, <16 x i16> undef, i16 -1)
305+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
306+
ret <16 x i16> %1
307+
}
308+
declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16)
309+
310+
define <8 x i32> @stack_fold_vpmovusqd(<8 x i64> %a0) {
311+
;CHECK-LABEL: stack_fold_vpmovusqd
312+
;CHECK: vpmovusqd %zmm0, {{-?[0-9]*}}(%rsp) # 32-byte Folded Spill
313+
%1 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %a0, <8 x i32> undef, i8 -1)
314+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
315+
ret <8 x i32> %1
316+
}
317+
declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8)
318+
319+
define <8 x i16> @stack_fold_vpmovusqw(<8 x i64> %a0) {
320+
;CHECK-LABEL: stack_fold_vpmovusqw
321+
;CHECK: vpmovusqw %zmm0, {{-?[0-9]*}}(%rsp) # 16-byte Folded Spill
322+
%1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %a0, <8 x i16> undef, i8 -1)
323+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
324+
ret <8 x i16> %1
325+
}
326+
declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8)
327+
328+
define <32 x i8> @stack_fold_vpmovuswb(<32 x i16> %a0) {
329+
;CHECK-LABEL: stack_fold_vpmovuswb
330+
;CHECK: vpmovuswb %zmm0, {{-?[0-9]*}}(%rsp) # 32-byte Folded Spill
331+
%1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %a0, <32 x i8> undef, i32 -1)
332+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
333+
ret <32 x i8> %1
334+
}
335+
declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32)

llvm/test/CodeGen/X86/stack-folding-int-avx512vl.ll

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,3 +318,83 @@ define <16 x i16> @stack_fold_psubw_ymm(<16 x i16> %a0, <16 x i16> %a1) {
318318
ret <16 x i16> %2
319319
}
320320

321+
define <8 x i16> @stack_fold_vpmovdw(<8 x i32> %a0) {
322+
;CHECK-LABEL: stack_fold_vpmovdw
323+
;CHECK: vpmovdw %ymm0, {{-?[0-9]*}}(%rsp) # 16-byte Folded Spill
324+
%1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %a0, <8 x i16> undef, i8 -1)
325+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
326+
ret <8 x i16> %1
327+
}
328+
declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32>, <8 x i16>, i8)
329+
330+
define <4 x i32> @stack_fold_vpmovqd(<4 x i64> %a0) {
331+
;CHECK-LABEL: stack_fold_vpmovqd
332+
;CHECK: vpmovqd %ymm0, {{-?[0-9]*}}(%rsp) # 16-byte Folded Spill
333+
%1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %a0, <4 x i32> undef, i8 -1)
334+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
335+
ret <4 x i32> %1
336+
}
337+
declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64>, <4 x i32>, i8)
338+
339+
define <16 x i8> @stack_fold_vpmovwb(<16 x i16> %a0) {
340+
;CHECK-LABEL: stack_fold_vpmovwb
341+
;CHECK: vpmovwb %ymm0, {{-?[0-9]*}}(%rsp) # 16-byte Folded Spill
342+
%1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %a0, <16 x i8> undef, i16 -1)
343+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
344+
ret <16 x i8> %1
345+
}
346+
declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16>, <16 x i8>, i16)
347+
348+
define <8 x i16> @stack_fold_vpmovsdw(<8 x i32> %a0) {
349+
;CHECK-LABEL: stack_fold_vpmovsdw
350+
;CHECK: vpmovsdw %ymm0, {{-?[0-9]*}}(%rsp) # 16-byte Folded Spill
351+
%1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %a0, <8 x i16> undef, i8 -1)
352+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
353+
ret <8 x i16> %1
354+
}
355+
declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32>, <8 x i16>, i8)
356+
357+
define <4 x i32> @stack_fold_vpmovsqd(<4 x i64> %a0) {
358+
;CHECK-LABEL: stack_fold_vpmovsqd
359+
;CHECK: vpmovsqd %ymm0, {{-?[0-9]*}}(%rsp) # 16-byte Folded Spill
360+
%1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %a0, <4 x i32> undef, i8 -1)
361+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
362+
ret <4 x i32> %1
363+
}
364+
declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64>, <4 x i32>, i8)
365+
366+
define <16 x i8> @stack_fold_vpmovswb(<16 x i16> %a0) {
367+
;CHECK-LABEL: stack_fold_vpmovswb
368+
;CHECK: vpmovswb %ymm0, {{-?[0-9]*}}(%rsp) # 16-byte Folded Spill
369+
%1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %a0, <16 x i8> undef, i16 -1)
370+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
371+
ret <16 x i8> %1
372+
}
373+
declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16>, <16 x i8>, i16)
374+
375+
define <8 x i16> @stack_fold_vpmovusdw(<8 x i32> %a0) {
376+
;CHECK-LABEL: stack_fold_vpmovusdw
377+
;CHECK: vpmovusdw %ymm0, {{-?[0-9]*}}(%rsp) # 16-byte Folded Spill
378+
%1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %a0, <8 x i16> undef, i8 -1)
379+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
380+
ret <8 x i16> %1
381+
}
382+
declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32>, <8 x i16>, i8)
383+
384+
define <4 x i32> @stack_fold_vpmovusqd(<4 x i64> %a0) {
385+
;CHECK-LABEL: stack_fold_vpmovusqd
386+
;CHECK: vpmovusqd %ymm0, {{-?[0-9]*}}(%rsp) # 16-byte Folded Spill
387+
%1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %a0, <4 x i32> undef, i8 -1)
388+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
389+
ret <4 x i32> %1
390+
}
391+
declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64>, <4 x i32>, i8)
392+
393+
define <16 x i8> @stack_fold_vpmovuswb(<16 x i16> %a0) {
394+
;CHECK-LABEL: stack_fold_vpmovuswb
395+
;CHECK: vpmovuswb %ymm0, {{-?[0-9]*}}(%rsp) # 16-byte Folded Spill
396+
%1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %a0, <16 x i8> undef, i16 -1)
397+
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
398+
ret <16 x i8> %1
399+
}
400+
declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16>, <16 x i8>, i16)

0 commit comments

Comments
 (0)