-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LoongArch] Optimize vreplgr2vr + vinsgr2vr intrinsic sequence #115803
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LoongArch] Optimize vreplgr2vr + vinsgr2vr intrinsic sequence #115803
Conversation
@llvm/pr-subscribers-backend-loongarch Author: ZhaoQi (zhaoqi5) ChangesInspired by #101624. Full diff: https://github.com/llvm/llvm-project/pull/115803.diff 6 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index afc016c84162e7..4d77912b9ed54f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -4229,11 +4229,10 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
- case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
- EVT ResTy = N->getValueType(0);
- SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
- return DAG.getBuildVector(ResTy, DL, Ops);
- }
+ case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
+ return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
+ DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
+ N->getOperand(1)));
case Intrinsic::loongarch_lsx_vreplve_b:
case Intrinsic::loongarch_lsx_vreplve_h:
case Intrinsic::loongarch_lsx_vreplve_w:
@@ -4710,6 +4709,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VILVH)
NODE_NAME_CASE(VSHUF4I)
NODE_NAME_CASE(VREPLVEI)
+ NODE_NAME_CASE(VREPLGR2VR)
NODE_NAME_CASE(XVPERMI)
NODE_NAME_CASE(VPICK_SEXT_ELT)
NODE_NAME_CASE(VPICK_ZEXT_ELT)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index df6a55a2b83190..c10acc043c5006 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -129,6 +129,7 @@ enum NodeType : unsigned {
VILVH,
VSHUF4I,
VREPLVEI,
+ VREPLGR2VR,
XVPERMI,
// Extended vector element extraction
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 3e39e2c10a617a..49ae440073f2e0 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1574,6 +1574,15 @@ def : Pat<(lasxsplati16 GPR:$rj), (XVREPLGR2VR_H GPR:$rj)>;
def : Pat<(lasxsplati32 GPR:$rj), (XVREPLGR2VR_W GPR:$rj)>;
def : Pat<(lasxsplati64 GPR:$rj), (XVREPLGR2VR_D GPR:$rj)>;
+def : Pat<(v32i8 (loongarch_vreplgr2vr GRLenVT:$rj)),
+ (v32i8 (XVREPLGR2VR_B GRLenVT:$rj))>;
+def : Pat<(v16i16 (loongarch_vreplgr2vr GRLenVT:$rj)),
+ (v16i16 (XVREPLGR2VR_H GRLenVT:$rj))>;
+def : Pat<(v8i32 (loongarch_vreplgr2vr GRLenVT:$rj)),
+ (v8i32 (XVREPLGR2VR_W GRLenVT:$rj))>;
+def : Pat<(v4i64 (loongarch_vreplgr2vr GRLenVT:$rj)),
+ (v4i64 (XVREPLGR2VR_D GRLenVT:$rj))>;
+
// XVREPLVE_{B/H/W/D}
def : Pat<(loongarch_vreplve v32i8:$xj, GRLenVT:$rk),
(XVREPLVE_B v32i8:$xj, GRLenVT:$rk)>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 25e70b4e6b35ae..250896cbbe5f7a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -23,6 +23,7 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
+def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>]>;
def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
@@ -52,6 +53,8 @@ def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
+def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplgr2vr>;
+
def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>;
def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>;
@@ -1737,6 +1740,15 @@ def : Pat<(lsxsplati16 GPR:$rj), (VREPLGR2VR_H GPR:$rj)>;
def : Pat<(lsxsplati32 GPR:$rj), (VREPLGR2VR_W GPR:$rj)>;
def : Pat<(lsxsplati64 GPR:$rj), (VREPLGR2VR_D GPR:$rj)>;
+def : Pat<(v16i8 (loongarch_vreplgr2vr GRLenVT:$rj)),
+ (v16i8 (VREPLGR2VR_B GRLenVT:$rj))>;
+def : Pat<(v8i16 (loongarch_vreplgr2vr GRLenVT:$rj)),
+ (v8i16 (VREPLGR2VR_H GRLenVT:$rj))>;
+def : Pat<(v4i32 (loongarch_vreplgr2vr GRLenVT:$rj)),
+ (v4i32 (VREPLGR2VR_W GRLenVT:$rj))>;
+def : Pat<(v2i64 (loongarch_vreplgr2vr GRLenVT:$rj)),
+ (v2i64 (VREPLGR2VR_D GRLenVT:$rj))>;
+
// VREPLVE_{B/H/W/D}
def : Pat<(loongarch_vreplve v16i8:$vj, GRLenVT:$rk),
(VREPLVE_B v16i8:$vj, GRLenVT:$rk)>;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll
index b3dcd373b60e08..2e538ed66b250e 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll
@@ -4,14 +4,8 @@
define <8 x i32> @xvrepl_ins_w(i32 %a, i32 %b) {
; CHECK-LABEL: xvrepl_ins_w:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 0
+; CHECK-NEXT: xvreplgr2vr.w $xr0, $a0
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 1
-; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 2
-; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 3
-; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 4
-; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 5
-; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 6
-; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 7
; CHECK-NEXT: ret
entry:
%0 = call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 %a)
@@ -22,10 +16,8 @@ entry:
define <4 x i64> @xvrepl_ins_d(i64 %a, i64 %b) {
; CHECK-LABEL: xvrepl_ins_d:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0
+; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 1
-; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 2
-; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 3
; CHECK-NEXT: ret
entry:
%0 = call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 %a)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll
index 3eb06149010402..aee74929468299 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll
@@ -4,22 +4,8 @@
define <16 x i8> @vrepl_ins_b(i32 %a, i32 %b) {
; CHECK-LABEL: vrepl_ins_b:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0
+; CHECK-NEXT: vreplgr2vr.b $vr0, $a0
; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 1
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 2
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 3
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 4
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 5
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 6
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 7
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 8
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 9
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 10
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 11
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 12
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 13
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 14
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 15
; CHECK-NEXT: ret
entry:
%0 = call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 %a)
@@ -30,14 +16,8 @@ entry:
define <8 x i16> @vrepl_ins_h(i32 %a, i32 %b) {
; CHECK-LABEL: vrepl_ins_h:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
+; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 2
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 3
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 4
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 5
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 6
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 7
; CHECK-NEXT: ret
entry:
%0 = call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 %a)
@@ -48,10 +28,8 @@ entry:
define <4 x i32> @vrepl_ins_w(i32 %a, i32 %b) {
; CHECK-LABEL: vrepl_ins_w:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; CHECK-NEXT: vreplgr2vr.w $vr0, $a0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 1
-; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 2
-; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3
; CHECK-NEXT: ret
entry:
%0 = call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 %a)
@@ -62,7 +40,7 @@ entry:
define <2 x i64> @vrepl_ins_d(i64 %a, i64 %b) {
; CHECK-LABEL: vrepl_ins_d:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT: vreplgr2vr.d $vr0, $a0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 1
; CHECK-NEXT: ret
entry:
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/196/builds/901 Here is the relevant piece of the build log for the reference
|
Inspired by #101624.