Skip to content

Commit 0468293

Browse files
committed
[X86] Use sse_load_f32/f64 and timm in patterns for memory form of vgetmantss/sd.
Previously we only matched scalar_to_vector and scalar load, but we should be able to narrow a vector load or match vzload. Also need to match TargetConstant instead of Constant. The register patterns were previously updated, but not the memory patterns. llvm-svn: 372458
1 parent 4fa12ac commit 0468293

File tree

2 files changed

+5
-8
lines changed

2 files changed

+5
-8
lines changed

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10280,12 +10280,11 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
1028010280
(i32 timm:$src3))>,
1028110281
Sched<[sched]>;
1028210282
defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10283-
(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10283+
(ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
1028410284
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
1028510285
(OpNode (_.VT _.RC:$src1),
10286-
(_.VT (scalar_to_vector
10287-
(_.ScalarLdFrag addr:$src2))),
10288-
(i32 imm:$src3))>,
10286+
(_.VT _.ScalarIntMemCPat:$src2),
10287+
(i32 timm:$src3))>,
1028910288
Sched<[sched.Folded, sched.ReadAfterFold]>;
1029010289
}
1029110290
}

llvm/test/CodeGen/X86/avx512-intrinsics.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4801,15 +4801,13 @@ define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x flo
48014801
define <4 x float> @test_int_x86_avx512_mask_getmant_ss_load(<4 x float> %x0, <4 x float>* %x1p) {
48024802
; X64-LABEL: test_int_x86_avx512_mask_getmant_ss_load:
48034803
; X64: # %bb.0:
4804-
; X64-NEXT: vmovaps (%rdi), %xmm1
4805-
; X64-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm0
4804+
; X64-NEXT: vgetmantss $11, (%rdi), %xmm0, %xmm0
48064805
; X64-NEXT: retq
48074806
;
48084807
; X86-LABEL: test_int_x86_avx512_mask_getmant_ss_load:
48094808
; X86: # %bb.0:
48104809
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
4811-
; X86-NEXT: vmovaps (%eax), %xmm1
4812-
; X86-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm0
4810+
; X86-NEXT: vgetmantss $11, (%eax), %xmm0, %xmm0
48134811
; X86-NEXT: retl
48144812
%x1 = load <4 x float>, <4 x float>* %x1p
48154813
%res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> undef, i8 -1, i32 4)

0 commit comments

Comments
 (0)