Skip to content

Commit 8d6ab7d

Browse files
committed
Revert "Revert "[SVE] Add patterns for shift intrinsics with FalseLanesZero mode""
This reverts commit 32bd1f5.
1 parent 31aa8ea commit 8d6ab7d

File tree

3 files changed

+200
-0
lines changed

3 files changed

+200
-0
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2060,6 +2060,10 @@ let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in {
20602060
defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>;
20612061
defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>;
20622062
defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<AArch64asrd_m1>;
2063+
2064+
defm ASR_ZPZI : sve_int_bin_pred_imm_zeroing_bhsd<int_aarch64_sve_asr, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
2065+
defm LSR_ZPZI : sve_int_bin_pred_imm_zeroing_bhsd<int_aarch64_sve_lsr, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
2066+
defm LSL_ZPZI : sve_int_bin_pred_imm_zeroing_bhsd<int_aarch64_sve_lsl, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>;
20632067
} // End HasSVEorSME, UseExperimentalZeroingPseudos
20642068

20652069
let Predicates = [HasSVEorSME] in {

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,12 @@ class SVE_Shift_DupImm_Any_Predicate_Pat<ValueType vt, SDPatternOperator op,
571571
: Pat<(vt (op (pt (SVEAnyPredicate)), vt:$Rn, (vt (splat_vector (it (cast i32:$imm)))))),
572572
(inst $Rn, i32:$imm)>;
573573

574+
class SVE_2_Op_Imm_Pat_Zero<ValueType vt, SDPatternOperator op, ValueType pt,
575+
ValueType it, ComplexPattern cpx, Instruction inst>
576+
: Pat<(vt (op pt:$Pg, (vselect pt:$Pg, vt:$Op1, (SVEDup0)),
577+
(vt (splat_vector (it (cpx i32:$imm)))))),
578+
(inst $Pg, $Op1, i32:$imm)>;
579+
574580
class SVE_2_Op_Fp_Imm_Pat<ValueType vt, SDPatternOperator op,
575581
ValueType pt, ValueType it,
576582
FPImmLeaf immL, int imm,
@@ -5894,6 +5900,20 @@ multiclass sve_int_bin_pred_zeroing_bhsd<SDPatternOperator op> {
58945900
def : SVE_3_Op_Pat_SelZero<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _ZERO_D)>;
58955901
}
58965902

5903+
multiclass sve_int_bin_pred_imm_zeroing_bhsd<SDPatternOperator op,
5904+
ComplexPattern imm_b, ComplexPattern imm_h,
5905+
ComplexPattern imm_s, ComplexPattern imm_d> {
5906+
def _ZERO_B : PredTwoOpImmPseudo<NAME # _B, ZPR8, Operand<i32>, FalseLanesZero>;
5907+
def _ZERO_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, Operand<i32>, FalseLanesZero>;
5908+
def _ZERO_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, Operand<i32>, FalseLanesZero>;
5909+
def _ZERO_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, Operand<i32>, FalseLanesZero>;
5910+
5911+
def : SVE_2_Op_Imm_Pat_Zero<nxv16i8, op, nxv16i1, i32, imm_b, !cast<Pseudo>(NAME # _ZERO_B)>;
5912+
def : SVE_2_Op_Imm_Pat_Zero<nxv8i16, op, nxv8i1, i32, imm_h, !cast<Pseudo>(NAME # _ZERO_H)>;
5913+
def : SVE_2_Op_Imm_Pat_Zero<nxv4i32, op, nxv4i1, i32, imm_s, !cast<Pseudo>(NAME # _ZERO_S)>;
5914+
def : SVE_2_Op_Imm_Pat_Zero<nxv2i64, op, nxv2i1, i64, imm_d, !cast<Pseudo>(NAME # _ZERO_D)>;
5915+
}
5916+
58975917
multiclass sve_int_bin_pred_shift_wide<bits<3> opc, string asm,
58985918
SDPatternOperator op> {
58995919
def _B : sve_int_bin_pred_shift<0b00, 0b1, opc, asm, ZPR8, ZPR64>;
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-experimental-zeroing-pseudos < %s | FileCheck %s
3+
4+
;; ASR
5+
define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg) {
6+
; CHECK-LABEL: asr_i8:
7+
; CHECK: // %bb.0:
8+
; CHECK-NEXT: movprfx z0.b, p0/z, z0.b
9+
; CHECK-NEXT: asr z0.b, p0/m, z0.b, #8
10+
; CHECK-NEXT: ret
11+
%vsel = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer
12+
%ele = insertelement <vscale x 16 x i8> poison, i8 8, i32 0
13+
%shuffle = shufflevector <vscale x 16 x i8> %ele, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
14+
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %vsel, <vscale x 16 x i8> %shuffle)
15+
ret <vscale x 16 x i8> %res
16+
}
17+
18+
define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg) {
19+
; CHECK-LABEL: asr_i16:
20+
; CHECK: // %bb.0:
21+
; CHECK-NEXT: movprfx z0.h, p0/z, z0.h
22+
; CHECK-NEXT: asr z0.h, p0/m, z0.h, #16
23+
; CHECK-NEXT: ret
24+
%vsel = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
25+
%ele = insertelement <vscale x 8 x i16> poison, i16 16, i32 0
26+
%shuffle = shufflevector <vscale x 8 x i16> %ele, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
27+
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %vsel, <vscale x 8 x i16> %shuffle)
28+
ret <vscale x 8 x i16> %res
29+
}
30+
31+
define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) local_unnamed_addr #0 {
32+
; CHECK-LABEL: asr_i32:
33+
; CHECK: // %bb.0:
34+
; CHECK-NEXT: movprfx z0.s, p0/z, z0.s
35+
; CHECK-NEXT: asr z0.s, p0/m, z0.s, #32
36+
; CHECK-NEXT: ret
37+
%vsel = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> zeroinitializer
38+
%ele = insertelement <vscale x 4 x i32> poison, i32 32, i32 0
39+
%shuffle = shufflevector <vscale x 4 x i32> %ele, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
40+
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %vsel, <vscale x 4 x i32> %shuffle)
41+
ret <vscale x 4 x i32> %res
42+
}
43+
44+
define <vscale x 2 x i64> @asr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg) {
45+
; CHECK-LABEL: asr_i64:
46+
; CHECK: // %bb.0:
47+
; CHECK-NEXT: movprfx z0.d, p0/z, z0.d
48+
; CHECK-NEXT: asr z0.d, p0/m, z0.d, #64
49+
; CHECK-NEXT: ret
50+
%vsel = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
51+
%ele = insertelement <vscale x 2 x i64> poison, i64 64, i32 0
52+
%shuffle = shufflevector <vscale x 2 x i64> %ele, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
53+
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %vsel, <vscale x 2 x i64> %shuffle)
54+
ret <vscale x 2 x i64> %res
55+
}
56+
57+
;; LSL
58+
define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg) {
59+
; CHECK-LABEL: lsl_i8:
60+
; CHECK: // %bb.0:
61+
; CHECK-NEXT: movprfx z0.b, p0/z, z0.b
62+
; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7
63+
; CHECK-NEXT: ret
64+
%vsel = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer
65+
%ele = insertelement <vscale x 16 x i8> poison, i8 7, i32 0
66+
%shuffle = shufflevector <vscale x 16 x i8> %ele, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
67+
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %vsel, <vscale x 16 x i8> %shuffle)
68+
ret <vscale x 16 x i8> %res
69+
}
70+
71+
define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg) {
72+
; CHECK-LABEL: lsl_i16:
73+
; CHECK: // %bb.0:
74+
; CHECK-NEXT: movprfx z0.h, p0/z, z0.h
75+
; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15
76+
; CHECK-NEXT: ret
77+
%vsel = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
78+
%ele = insertelement <vscale x 8 x i16> poison, i16 15, i32 0
79+
%shuffle = shufflevector <vscale x 8 x i16> %ele, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
80+
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %vsel, <vscale x 8 x i16> %shuffle)
81+
ret <vscale x 8 x i16> %res
82+
}
83+
84+
define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) local_unnamed_addr #0 {
85+
; CHECK-LABEL: lsl_i32:
86+
; CHECK: // %bb.0:
87+
; CHECK-NEXT: movprfx z0.s, p0/z, z0.s
88+
; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31
89+
; CHECK-NEXT: ret
90+
%vsel = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> zeroinitializer
91+
%ele = insertelement <vscale x 4 x i32> poison, i32 31, i32 0
92+
%shuffle = shufflevector <vscale x 4 x i32> %ele, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
93+
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %vsel, <vscale x 4 x i32> %shuffle)
94+
ret <vscale x 4 x i32> %res
95+
}
96+
97+
define <vscale x 2 x i64> @lsl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg) {
98+
; CHECK-LABEL: lsl_i64:
99+
; CHECK: // %bb.0:
100+
; CHECK-NEXT: movprfx z0.d, p0/z, z0.d
101+
; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63
102+
; CHECK-NEXT: ret
103+
%vsel = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
104+
%ele = insertelement <vscale x 2 x i64> poison, i64 63, i32 0
105+
%shuffle = shufflevector <vscale x 2 x i64> %ele, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
106+
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %vsel, <vscale x 2 x i64> %shuffle)
107+
ret <vscale x 2 x i64> %res
108+
}
109+
110+
;; LSR
111+
define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg) {
112+
; CHECK-LABEL: lsr_i8:
113+
; CHECK: // %bb.0:
114+
; CHECK-NEXT: movprfx z0.b, p0/z, z0.b
115+
; CHECK-NEXT: lsr z0.b, p0/m, z0.b, #8
116+
; CHECK-NEXT: ret
117+
%vsel = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer
118+
%ele = insertelement <vscale x 16 x i8> poison, i8 8, i32 0
119+
%shuffle = shufflevector <vscale x 16 x i8> %ele, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
120+
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %vsel, <vscale x 16 x i8> %shuffle)
121+
ret <vscale x 16 x i8> %res
122+
}
123+
124+
define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg) {
125+
; CHECK-LABEL: lsr_i16:
126+
; CHECK: // %bb.0:
127+
; CHECK-NEXT: movprfx z0.h, p0/z, z0.h
128+
; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #16
129+
; CHECK-NEXT: ret
130+
%vsel = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
131+
%ele = insertelement <vscale x 8 x i16> poison, i16 16, i32 0
132+
%shuffle = shufflevector <vscale x 8 x i16> %ele, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
133+
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %vsel, <vscale x 8 x i16> %shuffle)
134+
ret <vscale x 8 x i16> %res
135+
}
136+
137+
define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) local_unnamed_addr #0 {
138+
; CHECK-LABEL: lsr_i32:
139+
; CHECK: // %bb.0:
140+
; CHECK-NEXT: movprfx z0.s, p0/z, z0.s
141+
; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #32
142+
; CHECK-NEXT: ret
143+
%vsel = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> zeroinitializer
144+
%ele = insertelement <vscale x 4 x i32> poison, i32 32, i32 0
145+
%shuffle = shufflevector <vscale x 4 x i32> %ele, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
146+
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %vsel, <vscale x 4 x i32> %shuffle)
147+
ret <vscale x 4 x i32> %res
148+
}
149+
150+
define <vscale x 2 x i64> @lsr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg) {
151+
; CHECK-LABEL: lsr_i64:
152+
; CHECK: // %bb.0:
153+
; CHECK-NEXT: movprfx z0.d, p0/z, z0.d
154+
; CHECK-NEXT: lsr z0.d, p0/m, z0.d, #64
155+
; CHECK-NEXT: ret
156+
%vsel = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
157+
%ele = insertelement <vscale x 2 x i64> poison, i64 64, i32 0
158+
%shuffle = shufflevector <vscale x 2 x i64> %ele, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
159+
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %vsel, <vscale x 2 x i64> %shuffle)
160+
ret <vscale x 2 x i64> %res
161+
}
162+
163+
declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
164+
declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
165+
declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
166+
declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
167+
168+
declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
169+
declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
170+
declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
171+
declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
172+
173+
declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
174+
declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
175+
declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
176+
declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

0 commit comments

Comments
 (0)