@@ -641,22 +641,37 @@ def sdmem : Operand<v2f64> {
641
641
// SSE pattern fragments
642
642
//===----------------------------------------------------------------------===//
643
643
644
+ // Vector load wrappers to prevent folding of non-temporal aligned loads on
645
+ // supporting targets.
646
+ def vec128load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
647
+ return !Subtarget->hasSSE41() || !cast<LoadSDNode>(N)->isNonTemporal() ||
648
+ cast<LoadSDNode>(N)->getAlignment() < 16;
649
+ }]>;
650
+ def vec256load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
651
+ return !Subtarget->hasAVX2() || !cast<LoadSDNode>(N)->isNonTemporal() ||
652
+ cast<LoadSDNode>(N)->getAlignment() < 32;
653
+ }]>;
654
+ def vec512load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
655
+ return !Subtarget->hasAVX512() || !cast<LoadSDNode>(N)->isNonTemporal() ||
656
+ cast<LoadSDNode>(N)->getAlignment() < 64;
657
+ }]>;
658
+
644
659
// 128-bit load pattern fragments
645
660
// NOTE: all 128-bit integer vector loads are promoted to v2i64
646
- def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
647
- def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
648
- def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
661
+ def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (vec128load node:$ptr))>;
662
+ def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (vec128load node:$ptr))>;
663
+ def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (vec128load node:$ptr))>;
649
664
650
665
// 256-bit load pattern fragments
651
666
// NOTE: all 256-bit integer vector loads are promoted to v4i64
652
- def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
653
- def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
654
- def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
667
+ def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (vec256load node:$ptr))>;
668
+ def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (vec256load node:$ptr))>;
669
+ def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (vec256load node:$ptr))>;
655
670
656
671
// 512-bit load pattern fragments
657
- def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>;
658
- def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>;
659
- def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>;
672
+ def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (vec512load node:$ptr))>;
673
+ def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (vec512load node:$ptr))>;
674
+ def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (vec512load node:$ptr))>;
660
675
661
676
// 128-/256-/512-bit extload pattern fragments
662
677
def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
0 commit comments