Skip to content

Commit dbc7665

Browse files
authored
PowerPC: Use REG_SEQUENCE instead of INSERT_SUBREG (#129941)
Update to use REG_SEQUENCE when possible. This patch only update td pattern to utilize REG_SEQUENCE for INSERT_SUBREG for cases where it does not produce a nesting of REG_SEQUENCE. This seem to show some improvement in code gen for `llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll`. Fixes part of llvm/llvm-project#125502
1 parent 9feac2c commit dbc7665

14 files changed

+406
-423
lines changed

llvm/lib/Target/PowerPC/PPCInstrMMA.td

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1047,19 +1047,18 @@ let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
10471047
}
10481048

10491049
def ConcatsMMA {
1050-
dag VecsToVecPair0 =
1051-
(v256i1 (INSERT_SUBREG
1052-
(INSERT_SUBREG (IMPLICIT_DEF), $vs0, sub_vsx1),
1053-
$vs1, sub_vsx0));
1054-
dag VecsToVecPair1 =
1055-
(v256i1 (INSERT_SUBREG
1056-
(INSERT_SUBREG (IMPLICIT_DEF), $vs2, sub_vsx1),
1057-
$vs3, sub_vsx0));
1058-
dag VecsToVecQuad =
1059-
(BUILD_UACC (INSERT_SUBREG
1060-
(INSERT_SUBREG (v512i1 (IMPLICIT_DEF)),
1061-
(KILL_PAIR VecsToVecPair0), sub_pair0),
1062-
(KILL_PAIR VecsToVecPair1), sub_pair1));
1050+
dag VecsToVecPair0 =
1051+
(v256i1 (INSERT_SUBREG
1052+
(INSERT_SUBREG (IMPLICIT_DEF), $vs0, sub_vsx1),
1053+
$vs1, sub_vsx0));
1054+
dag VecsToVecPair1 =
1055+
(v256i1 (INSERT_SUBREG
1056+
(INSERT_SUBREG (IMPLICIT_DEF), $vs2, sub_vsx1),
1057+
$vs3, sub_vsx0));
1058+
dag VecsToVecQuad = (BUILD_UACC
1059+
(v512i1 (REG_SEQUENCE UACCRC,
1060+
(KILL_PAIR VecsToVecPair0), sub_pair0,
1061+
(KILL_PAIR VecsToVecPair1), sub_pair1)));
10631062
}
10641063

10651064
def Extracts {

llvm/lib/Target/PowerPC/PPCInstrP10.td

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1139,17 +1139,11 @@ class MMIRR_XX3Form_XYP4_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
11391139
let Inst{63} = 0;
11401140
}
11411141

1142-
1143-
11441142
def Concats {
11451143
dag VecsToVecPair0 =
1146-
(v256i1 (INSERT_SUBREG
1147-
(INSERT_SUBREG (IMPLICIT_DEF), $vs0, sub_vsx1),
1148-
$vs1, sub_vsx0));
1144+
(v256i1 (REG_SEQUENCE VSRpRC, $vs0, sub_vsx1, $vs1, sub_vsx0));
11491145
dag VecsToVecPair1 =
1150-
(v256i1 (INSERT_SUBREG
1151-
(INSERT_SUBREG (IMPLICIT_DEF), $vs2, sub_vsx1),
1152-
$vs3, sub_vsx0));
1146+
(v256i1 (REG_SEQUENCE VSRpRC, $vs2, sub_vsx1, $vs3, sub_vsx0));
11531147
}
11541148

11551149
let Predicates = [PairedVectorMemops] in {

llvm/test/CodeGen/PowerPC/bfloat16-outer-product.ll

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,10 @@ declare <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8>, <16 x i8>, i32, i32, i3
7070
define dso_local void @test52(ptr nocapture readonly %vqp, ptr nocapture readnone %vpp, <16 x i8> %vc, ptr nocapture %resp) {
7171
; CHECK-LABEL: test52:
7272
; CHECK: # %bb.0: # %entry
73-
; CHECK-NEXT: lxv vs1, 32(r3)
74-
; CHECK-NEXT: lxv vs0, 48(r3)
7573
; CHECK-NEXT: lxv vs3, 0(r3)
7674
; CHECK-NEXT: lxv vs2, 16(r3)
75+
; CHECK-NEXT: lxv vs1, 32(r3)
76+
; CHECK-NEXT: lxv vs0, 48(r3)
7777
; CHECK-NEXT: xxmtacc acc0
7878
; CHECK-NEXT: xvbf16ger2pp acc0, v2, v2
7979
; CHECK-NEXT: xxmfacc acc0
@@ -85,10 +85,10 @@ define dso_local void @test52(ptr nocapture readonly %vqp, ptr nocapture readnon
8585
;
8686
; CHECK-BE-LABEL: test52:
8787
; CHECK-BE: # %bb.0: # %entry
88-
; CHECK-BE-NEXT: lxv vs1, 16(r3)
89-
; CHECK-BE-NEXT: lxv vs0, 0(r3)
9088
; CHECK-BE-NEXT: lxv vs3, 48(r3)
9189
; CHECK-BE-NEXT: lxv vs2, 32(r3)
90+
; CHECK-BE-NEXT: lxv vs1, 16(r3)
91+
; CHECK-BE-NEXT: lxv vs0, 0(r3)
9292
; CHECK-BE-NEXT: xxmtacc acc0
9393
; CHECK-BE-NEXT: xvbf16ger2pp acc0, v2, v2
9494
; CHECK-BE-NEXT: xxmfacc acc0
@@ -111,10 +111,10 @@ declare <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>)
111111
define dso_local void @test53(ptr nocapture readonly %vqp, ptr nocapture readnone %vpp, <16 x i8> %vc, ptr nocapture %resp) {
112112
; CHECK-LABEL: test53:
113113
; CHECK: # %bb.0: # %entry
114-
; CHECK-NEXT: lxv vs1, 32(r3)
115-
; CHECK-NEXT: lxv vs0, 48(r3)
116114
; CHECK-NEXT: lxv vs3, 0(r3)
117115
; CHECK-NEXT: lxv vs2, 16(r3)
116+
; CHECK-NEXT: lxv vs1, 32(r3)
117+
; CHECK-NEXT: lxv vs0, 48(r3)
118118
; CHECK-NEXT: xxmtacc acc0
119119
; CHECK-NEXT: xvbf16ger2pn acc0, v2, v2
120120
; CHECK-NEXT: xxmfacc acc0
@@ -126,10 +126,10 @@ define dso_local void @test53(ptr nocapture readonly %vqp, ptr nocapture readnon
126126
;
127127
; CHECK-BE-LABEL: test53:
128128
; CHECK-BE: # %bb.0: # %entry
129-
; CHECK-BE-NEXT: lxv vs1, 16(r3)
130-
; CHECK-BE-NEXT: lxv vs0, 0(r3)
131129
; CHECK-BE-NEXT: lxv vs3, 48(r3)
132130
; CHECK-BE-NEXT: lxv vs2, 32(r3)
131+
; CHECK-BE-NEXT: lxv vs1, 16(r3)
132+
; CHECK-BE-NEXT: lxv vs0, 0(r3)
133133
; CHECK-BE-NEXT: xxmtacc acc0
134134
; CHECK-BE-NEXT: xvbf16ger2pn acc0, v2, v2
135135
; CHECK-BE-NEXT: xxmfacc acc0
@@ -152,10 +152,10 @@ declare <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1>, <16 x i8>, <16 x i8>)
152152
define dso_local void @test54(ptr nocapture readonly %vqp, ptr nocapture readnone %vpp, <16 x i8> %vc, ptr nocapture %resp) {
153153
; CHECK-LABEL: test54:
154154
; CHECK: # %bb.0: # %entry
155-
; CHECK-NEXT: lxv vs1, 32(r3)
156-
; CHECK-NEXT: lxv vs0, 48(r3)
157155
; CHECK-NEXT: lxv vs3, 0(r3)
158156
; CHECK-NEXT: lxv vs2, 16(r3)
157+
; CHECK-NEXT: lxv vs1, 32(r3)
158+
; CHECK-NEXT: lxv vs0, 48(r3)
159159
; CHECK-NEXT: xxmtacc acc0
160160
; CHECK-NEXT: xvbf16ger2np acc0, v2, v2
161161
; CHECK-NEXT: xxmfacc acc0
@@ -167,10 +167,10 @@ define dso_local void @test54(ptr nocapture readonly %vqp, ptr nocapture readnon
167167
;
168168
; CHECK-BE-LABEL: test54:
169169
; CHECK-BE: # %bb.0: # %entry
170-
; CHECK-BE-NEXT: lxv vs1, 16(r3)
171-
; CHECK-BE-NEXT: lxv vs0, 0(r3)
172170
; CHECK-BE-NEXT: lxv vs3, 48(r3)
173171
; CHECK-BE-NEXT: lxv vs2, 32(r3)
172+
; CHECK-BE-NEXT: lxv vs1, 16(r3)
173+
; CHECK-BE-NEXT: lxv vs0, 0(r3)
174174
; CHECK-BE-NEXT: xxmtacc acc0
175175
; CHECK-BE-NEXT: xvbf16ger2np acc0, v2, v2
176176
; CHECK-BE-NEXT: xxmfacc acc0
@@ -193,10 +193,10 @@ declare <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1>, <16 x i8>, <16 x i8>)
193193
define dso_local void @test55(ptr nocapture readonly %vqp, ptr nocapture readnone %vpp, <16 x i8> %vc, ptr nocapture %resp) {
194194
; CHECK-LABEL: test55:
195195
; CHECK: # %bb.0: # %entry
196-
; CHECK-NEXT: lxv vs1, 32(r3)
197-
; CHECK-NEXT: lxv vs0, 48(r3)
198196
; CHECK-NEXT: lxv vs3, 0(r3)
199197
; CHECK-NEXT: lxv vs2, 16(r3)
198+
; CHECK-NEXT: lxv vs1, 32(r3)
199+
; CHECK-NEXT: lxv vs0, 48(r3)
200200
; CHECK-NEXT: xxmtacc acc0
201201
; CHECK-NEXT: xvbf16ger2nn acc0, v2, v2
202202
; CHECK-NEXT: xxmfacc acc0
@@ -208,10 +208,10 @@ define dso_local void @test55(ptr nocapture readonly %vqp, ptr nocapture readnon
208208
;
209209
; CHECK-BE-LABEL: test55:
210210
; CHECK-BE: # %bb.0: # %entry
211-
; CHECK-BE-NEXT: lxv vs1, 16(r3)
212-
; CHECK-BE-NEXT: lxv vs0, 0(r3)
213211
; CHECK-BE-NEXT: lxv vs3, 48(r3)
214212
; CHECK-BE-NEXT: lxv vs2, 32(r3)
213+
; CHECK-BE-NEXT: lxv vs1, 16(r3)
214+
; CHECK-BE-NEXT: lxv vs0, 0(r3)
215215
; CHECK-BE-NEXT: xxmtacc acc0
216216
; CHECK-BE-NEXT: xvbf16ger2nn acc0, v2, v2
217217
; CHECK-BE-NEXT: xxmfacc acc0
@@ -234,10 +234,10 @@ declare <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1>, <16 x i8>, <16 x i8>)
234234
define dso_local void @test56(ptr nocapture readonly %vqp, ptr nocapture readnone %vpp, <16 x i8> %vc, ptr nocapture %resp) {
235235
; CHECK-LABEL: test56:
236236
; CHECK: # %bb.0: # %entry
237-
; CHECK-NEXT: lxv vs1, 32(r3)
238-
; CHECK-NEXT: lxv vs0, 48(r3)
239237
; CHECK-NEXT: lxv vs3, 0(r3)
240238
; CHECK-NEXT: lxv vs2, 16(r3)
239+
; CHECK-NEXT: lxv vs1, 32(r3)
240+
; CHECK-NEXT: lxv vs0, 48(r3)
241241
; CHECK-NEXT: xxmtacc acc0
242242
; CHECK-NEXT: pmxvbf16ger2pp acc0, v2, v2, 0, 0, 0
243243
; CHECK-NEXT: xxmfacc acc0
@@ -249,10 +249,10 @@ define dso_local void @test56(ptr nocapture readonly %vqp, ptr nocapture readnon
249249
;
250250
; CHECK-BE-LABEL: test56:
251251
; CHECK-BE: # %bb.0: # %entry
252-
; CHECK-BE-NEXT: lxv vs1, 16(r3)
253-
; CHECK-BE-NEXT: lxv vs0, 0(r3)
254252
; CHECK-BE-NEXT: lxv vs3, 48(r3)
255253
; CHECK-BE-NEXT: lxv vs2, 32(r3)
254+
; CHECK-BE-NEXT: lxv vs1, 16(r3)
255+
; CHECK-BE-NEXT: lxv vs0, 0(r3)
256256
; CHECK-BE-NEXT: xxmtacc acc0
257257
; CHECK-BE-NEXT: pmxvbf16ger2pp acc0, v2, v2, 0, 0, 0
258258
; CHECK-BE-NEXT: xxmfacc acc0
@@ -275,10 +275,10 @@ declare <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>
275275
define dso_local void @test57(ptr nocapture readonly %vqp, ptr nocapture readnone %vpp, <16 x i8> %vc, ptr nocapture %resp) {
276276
; CHECK-LABEL: test57:
277277
; CHECK: # %bb.0: # %entry
278-
; CHECK-NEXT: lxv vs1, 32(r3)
279-
; CHECK-NEXT: lxv vs0, 48(r3)
280278
; CHECK-NEXT: lxv vs3, 0(r3)
281279
; CHECK-NEXT: lxv vs2, 16(r3)
280+
; CHECK-NEXT: lxv vs1, 32(r3)
281+
; CHECK-NEXT: lxv vs0, 48(r3)
282282
; CHECK-NEXT: xxmtacc acc0
283283
; CHECK-NEXT: pmxvbf16ger2pn acc0, v2, v2, 0, 0, 0
284284
; CHECK-NEXT: xxmfacc acc0
@@ -290,10 +290,10 @@ define dso_local void @test57(ptr nocapture readonly %vqp, ptr nocapture readnon
290290
;
291291
; CHECK-BE-LABEL: test57:
292292
; CHECK-BE: # %bb.0: # %entry
293-
; CHECK-BE-NEXT: lxv vs1, 16(r3)
294-
; CHECK-BE-NEXT: lxv vs0, 0(r3)
295293
; CHECK-BE-NEXT: lxv vs3, 48(r3)
296294
; CHECK-BE-NEXT: lxv vs2, 32(r3)
295+
; CHECK-BE-NEXT: lxv vs1, 16(r3)
296+
; CHECK-BE-NEXT: lxv vs0, 0(r3)
297297
; CHECK-BE-NEXT: xxmtacc acc0
298298
; CHECK-BE-NEXT: pmxvbf16ger2pn acc0, v2, v2, 0, 0, 0
299299
; CHECK-BE-NEXT: xxmfacc acc0
@@ -316,10 +316,10 @@ declare <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1>, <16 x i8>, <16 x i8>
316316
define dso_local void @test58(ptr nocapture readonly %vqp, ptr nocapture readnone %vpp, <16 x i8> %vc, ptr nocapture %resp) {
317317
; CHECK-LABEL: test58:
318318
; CHECK: # %bb.0: # %entry
319-
; CHECK-NEXT: lxv vs1, 32(r3)
320-
; CHECK-NEXT: lxv vs0, 48(r3)
321319
; CHECK-NEXT: lxv vs3, 0(r3)
322320
; CHECK-NEXT: lxv vs2, 16(r3)
321+
; CHECK-NEXT: lxv vs1, 32(r3)
322+
; CHECK-NEXT: lxv vs0, 48(r3)
323323
; CHECK-NEXT: xxmtacc acc0
324324
; CHECK-NEXT: pmxvbf16ger2np acc0, v2, v2, 0, 0, 0
325325
; CHECK-NEXT: xxmfacc acc0
@@ -331,10 +331,10 @@ define dso_local void @test58(ptr nocapture readonly %vqp, ptr nocapture readnon
331331
;
332332
; CHECK-BE-LABEL: test58:
333333
; CHECK-BE: # %bb.0: # %entry
334-
; CHECK-BE-NEXT: lxv vs1, 16(r3)
335-
; CHECK-BE-NEXT: lxv vs0, 0(r3)
336334
; CHECK-BE-NEXT: lxv vs3, 48(r3)
337335
; CHECK-BE-NEXT: lxv vs2, 32(r3)
336+
; CHECK-BE-NEXT: lxv vs1, 16(r3)
337+
; CHECK-BE-NEXT: lxv vs0, 0(r3)
338338
; CHECK-BE-NEXT: xxmtacc acc0
339339
; CHECK-BE-NEXT: pmxvbf16ger2np acc0, v2, v2, 0, 0, 0
340340
; CHECK-BE-NEXT: xxmfacc acc0
@@ -357,10 +357,10 @@ declare <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1>, <16 x i8>, <16 x i8>
357357
define dso_local void @test59(ptr nocapture readonly %vqp, ptr nocapture readnone %vpp, <16 x i8> %vc, ptr nocapture %resp) {
358358
; CHECK-LABEL: test59:
359359
; CHECK: # %bb.0: # %entry
360-
; CHECK-NEXT: lxv vs1, 32(r3)
361-
; CHECK-NEXT: lxv vs0, 48(r3)
362360
; CHECK-NEXT: lxv vs3, 0(r3)
363361
; CHECK-NEXT: lxv vs2, 16(r3)
362+
; CHECK-NEXT: lxv vs1, 32(r3)
363+
; CHECK-NEXT: lxv vs0, 48(r3)
364364
; CHECK-NEXT: xxmtacc acc0
365365
; CHECK-NEXT: pmxvbf16ger2nn acc0, v2, v2, 0, 0, 0
366366
; CHECK-NEXT: xxmfacc acc0
@@ -372,10 +372,10 @@ define dso_local void @test59(ptr nocapture readonly %vqp, ptr nocapture readnon
372372
;
373373
; CHECK-BE-LABEL: test59:
374374
; CHECK-BE: # %bb.0: # %entry
375-
; CHECK-BE-NEXT: lxv vs1, 16(r3)
376-
; CHECK-BE-NEXT: lxv vs0, 0(r3)
377375
; CHECK-BE-NEXT: lxv vs3, 48(r3)
378376
; CHECK-BE-NEXT: lxv vs2, 32(r3)
377+
; CHECK-BE-NEXT: lxv vs1, 16(r3)
378+
; CHECK-BE-NEXT: lxv vs0, 0(r3)
379379
; CHECK-BE-NEXT: xxmtacc acc0
380380
; CHECK-BE-NEXT: pmxvbf16ger2nn acc0, v2, v2, 0, 0, 0
381381
; CHECK-BE-NEXT: xxmfacc acc0

0 commit comments

Comments
 (0)