Skip to content

Commit a25bf0b

Browse files
committed
[X86][SSE] Non-temporal loads shouldn't be folded if it can be avoided (PR32743)
Differential Revision: https://reviews.llvm.org/D33728 llvm-svn: 304717
1 parent ada0435 commit a25bf0b

File tree

2 files changed

+172
-74
lines changed

2 files changed

+172
-74
lines changed

llvm/lib/Target/X86/X86InstrFragmentsSIMD.td

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -641,22 +641,37 @@ def sdmem : Operand<v2f64> {
641641
// SSE pattern fragments
642642
//===----------------------------------------------------------------------===//
643643

644+
// Vector load wrappers to prevent folding of non-temporal aligned loads on
645+
// supporting targets.
646+
def vec128load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
647+
return !Subtarget->hasSSE41() || !cast<LoadSDNode>(N)->isNonTemporal() ||
648+
cast<LoadSDNode>(N)->getAlignment() < 16;
649+
}]>;
650+
def vec256load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
651+
return !Subtarget->hasAVX2() || !cast<LoadSDNode>(N)->isNonTemporal() ||
652+
cast<LoadSDNode>(N)->getAlignment() < 32;
653+
}]>;
654+
def vec512load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
655+
return !Subtarget->hasAVX512() || !cast<LoadSDNode>(N)->isNonTemporal() ||
656+
cast<LoadSDNode>(N)->getAlignment() < 64;
657+
}]>;
658+
644659
// 128-bit load pattern fragments
645660
// NOTE: all 128-bit integer vector loads are promoted to v2i64
646-
def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
647-
def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
648-
def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
661+
def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (vec128load node:$ptr))>;
662+
def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (vec128load node:$ptr))>;
663+
def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (vec128load node:$ptr))>;
649664

650665
// 256-bit load pattern fragments
651666
// NOTE: all 256-bit integer vector loads are promoted to v4i64
652-
def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
653-
def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
654-
def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
667+
def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (vec256load node:$ptr))>;
668+
def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (vec256load node:$ptr))>;
669+
def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (vec256load node:$ptr))>;
655670

656671
// 512-bit load pattern fragments
657-
def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>;
658-
def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>;
659-
def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>;
672+
def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (vec512load node:$ptr))>;
673+
def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (vec512load node:$ptr))>;
674+
def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (vec512load node:$ptr))>;
660675

661676
// 128-/256-/512-bit extload pattern fragments
662677
def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;

0 commit comments

Comments
 (0)