Skip to content

Commit 6bec45e

Browse files
author
Cameron McInally
committed
[SVE] Add legalisation tests to sve-fixed-length-int-reduce.ll
1 parent 1b31b50 commit 6bec45e

File tree

1 file changed

+200
-0
lines changed

1 file changed

+200
-0
lines changed

llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,16 @@ define i8 @uaddv_v64i8(<64 x i8>* %a) #0 {
6161
; VBITS_GE_512-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].b
6262
; VBITS_GE_512-NEXT: fmov x0, [[REDUCE]]
6363
; VBITS_GE_512-NEXT: ret
64+
65+
; Ensure sensible type legalisation.
66+
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
67+
; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32
68+
; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0]
69+
; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]]
70+
; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].b, [[PG]]/m, [[HI]].b, [[LO]].b
71+
; VBITS_EQ_256-DAG: addv [[REDUCE:d[0-9]+]], [[PG]], [[ADD]].b
72+
; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
73+
; VBITS_EQ_256-NEXT: ret
6474
%op = load <64 x i8>, <64 x i8>* %a
6575
%res = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> %op)
6676
ret i8 %res
@@ -127,6 +137,16 @@ define i16 @uaddv_v32i16(<32 x i16>* %a) #0 {
127137
; VBITS_GE_512-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].h
128138
; VBITS_GE_512-NEXT: fmov x0, [[REDUCE]]
129139
; VBITS_GE_512-NEXT: ret
140+
141+
; Ensure sensible type legalisation.
142+
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
143+
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
144+
; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
145+
; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
146+
; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h
147+
; VBITS_EQ_256-DAG: addv [[REDUCE:d[0-9]+]], [[PG]], [[ADD]].h
148+
; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
149+
; VBITS_EQ_256-NEXT: ret
130150
%op = load <32 x i16>, <32 x i16>* %a
131151
%res = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> %op)
132152
ret i16 %res
@@ -193,6 +213,16 @@ define i32 @uaddv_v16i32(<16 x i32>* %a) #0 {
193213
; VBITS_GE_512-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].s
194214
; VBITS_GE_512-NEXT: fmov x0, [[REDUCE]]
195215
; VBITS_GE_512-NEXT: ret
216+
217+
; Ensure sensible type legalisation.
218+
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
219+
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
220+
; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
221+
; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
222+
; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s
223+
; VBITS_EQ_256-DAG: addv [[REDUCE:d[0-9]+]], [[PG]], [[ADD]].s
224+
; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
225+
; VBITS_EQ_256-NEXT: ret
196226
%op = load <16 x i32>, <16 x i32>* %a
197227
%res = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %op)
198228
ret i32 %res
@@ -259,6 +289,16 @@ define i64 @uaddv_v8i64(<8 x i64>* %a) #0 {
259289
; VBITS_GE_512-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
260290
; VBITS_GE_512-NEXT: fmov x0, [[REDUCE]]
261291
; VBITS_GE_512-NEXT: ret
292+
293+
; Ensure sensible type legalisation.
294+
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
295+
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
296+
; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
297+
; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
298+
; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d
299+
; VBITS_EQ_256-DAG: addv [[REDUCE:d[0-9]+]], [[PG]], [[ADD]].d
300+
; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
301+
; VBITS_EQ_256-NEXT: ret
262302
%op = load <8 x i64>, <8 x i64>* %a
263303
%res = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %op)
264304
ret i64 %res
@@ -329,6 +369,16 @@ define i8 @smaxv_v64i8(<64 x i8>* %a) #0 {
329369
; VBITS_GE_512-NEXT: smaxv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
330370
; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]]
331371
; VBITS_GE_512-NEXT: ret
372+
373+
; Ensure sensible type legalisation.
374+
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
375+
; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32
376+
; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0]
377+
; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]]
378+
; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].b, [[PG]]/m, [[HI]].b, [[LO]].b
379+
; VBITS_EQ_256-DAG: smaxv b[[REDUCE:[0-9]+]], [[PG]], [[MAX]].b
380+
; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
381+
; VBITS_EQ_256-NEXT: ret
332382
%op = load <64 x i8>, <64 x i8>* %a
333383
%res = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> %op)
334384
ret i8 %res
@@ -395,6 +445,16 @@ define i16 @smaxv_v32i16(<32 x i16>* %a) #0 {
395445
; VBITS_GE_512-NEXT: smaxv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
396446
; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]]
397447
; VBITS_GE_512-NEXT: ret
448+
449+
; Ensure sensible type legalisation.
450+
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
451+
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
452+
; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
453+
; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
454+
; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h
455+
; VBITS_EQ_256-DAG: smaxv h[[REDUCE:[0-9]+]], [[PG]], [[MAX]].h
456+
; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
457+
; VBITS_EQ_256-NEXT: ret
398458
%op = load <32 x i16>, <32 x i16>* %a
399459
%res = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> %op)
400460
ret i16 %res
@@ -461,6 +521,16 @@ define i32 @smaxv_v16i32(<16 x i32>* %a) #0 {
461521
; VBITS_GE_512-NEXT: smaxv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
462522
; VBITS_GE_512-NEXT: fmov w0, [[REDUCE]]
463523
; VBITS_GE_512-NEXT: ret
524+
525+
; Ensure sensible type legalisation.
526+
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
527+
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
528+
; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
529+
; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
530+
; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s
531+
; VBITS_EQ_256-DAG: smaxv [[REDUCE:s[0-9]+]], [[PG]], [[MAX]].s
532+
; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]]
533+
; VBITS_EQ_256-NEXT: ret
464534
%op = load <16 x i32>, <16 x i32>* %a
465535
%res = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> %op)
466536
ret i32 %res
@@ -529,6 +599,16 @@ define i64 @smaxv_v8i64(<8 x i64>* %a) #0 {
529599
; VBITS_GE_512-NEXT: smaxv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
530600
; VBITS_GE_512-NEXT: fmov x0, [[REDUCE]]
531601
; VBITS_GE_512-NEXT: ret
602+
603+
; Ensure sensible type legalisation.
604+
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
605+
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
606+
; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
607+
; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
608+
; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d
609+
; VBITS_EQ_256-DAG: smaxv [[REDUCE:d[0-9]+]], [[PG]], [[MAX]].d
610+
; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
611+
; VBITS_EQ_256-NEXT: ret
532612
%op = load <8 x i64>, <8 x i64>* %a
533613
%res = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> %op)
534614
ret i64 %res
@@ -599,6 +679,16 @@ define i8 @sminv_v64i8(<64 x i8>* %a) #0 {
599679
; VBITS_GE_512-NEXT: sminv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
600680
; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]]
601681
; VBITS_GE_512-NEXT: ret
682+
683+
; Ensure sensible type legalisation.
684+
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
685+
; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32
686+
; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0]
687+
; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]]
688+
; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].b, [[PG]]/m, [[HI]].b, [[LO]].b
689+
; VBITS_EQ_256-DAG: sminv b[[REDUCE:[0-9]+]], [[PG]], [[MIN]].b
690+
; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
691+
; VBITS_EQ_256-NEXT: ret
602692
%op = load <64 x i8>, <64 x i8>* %a
603693
%res = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> %op)
604694
ret i8 %res
@@ -665,6 +755,16 @@ define i16 @sminv_v32i16(<32 x i16>* %a) #0 {
665755
; VBITS_GE_512-NEXT: sminv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
666756
; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]]
667757
; VBITS_GE_512-NEXT: ret
758+
759+
; Ensure sensible type legalisation.
760+
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
761+
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
762+
; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
763+
; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
764+
; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h
765+
; VBITS_EQ_256-DAG: sminv h[[REDUCE:[0-9]+]], [[PG]], [[MIN]].h
766+
; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
767+
; VBITS_EQ_256-NEXT: ret
668768
%op = load <32 x i16>, <32 x i16>* %a
669769
%res = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> %op)
670770
ret i16 %res
@@ -731,6 +831,16 @@ define i32 @sminv_v16i32(<16 x i32>* %a) #0 {
731831
; VBITS_GE_512-NEXT: sminv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
732832
; VBITS_GE_512-NEXT: fmov w0, [[REDUCE]]
733833
; VBITS_GE_512-NEXT: ret
834+
835+
; Ensure sensible type legalisation.
836+
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
837+
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
838+
; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
839+
; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
840+
; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s
841+
; VBITS_EQ_256-DAG: sminv [[REDUCE:s[0-9]+]], [[PG]], [[MIN]].s
842+
; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]]
843+
; VBITS_EQ_256-NEXT: ret
734844
%op = load <16 x i32>, <16 x i32>* %a
735845
%res = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> %op)
736846
ret i32 %res
@@ -799,6 +909,16 @@ define i64 @sminv_v8i64(<8 x i64>* %a) #0 {
799909
; VBITS_GE_512-NEXT: sminv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
800910
; VBITS_GE_512-NEXT: fmov x0, [[REDUCE]]
801911
; VBITS_GE_512-NEXT: ret
912+
913+
; Ensure sensible type legalisation.
914+
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
915+
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
916+
; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
917+
; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
918+
; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d
919+
; VBITS_EQ_256-DAG: sminv [[REDUCE:d[0-9]+]], [[PG]], [[MIN]].d
920+
; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
921+
; VBITS_EQ_256-NEXT: ret
802922
%op = load <8 x i64>, <8 x i64>* %a
803923
%res = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> %op)
804924
ret i64 %res
@@ -869,6 +989,16 @@ define i8 @umaxv_v64i8(<64 x i8>* %a) #0 {
869989
; VBITS_GE_512-NEXT: umaxv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
870990
; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]]
871991
; VBITS_GE_512-NEXT: ret
992+
993+
; Ensure sensible type legalisation.
994+
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
995+
; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32
996+
; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0]
997+
; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]]
998+
; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].b, [[PG]]/m, [[HI]].b, [[LO]].b
999+
; VBITS_EQ_256-DAG: umaxv b[[REDUCE:[0-9]+]], [[PG]], [[MAX]].b
1000+
; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
1001+
; VBITS_EQ_256-NEXT: ret
8721002
%op = load <64 x i8>, <64 x i8>* %a
8731003
%res = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> %op)
8741004
ret i8 %res
@@ -935,6 +1065,16 @@ define i16 @umaxv_v32i16(<32 x i16>* %a) #0 {
9351065
; VBITS_GE_512-NEXT: umaxv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
9361066
; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]]
9371067
; VBITS_GE_512-NEXT: ret
1068+
1069+
; Ensure sensible type legalisation.
1070+
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
1071+
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
1072+
; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
1073+
; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
1074+
; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h
1075+
; VBITS_EQ_256-DAG: umaxv h[[REDUCE:[0-9]+]], [[PG]], [[MAX]].h
1076+
; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
1077+
; VBITS_EQ_256-NEXT: ret
9381078
%op = load <32 x i16>, <32 x i16>* %a
9391079
%res = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> %op)
9401080
ret i16 %res
@@ -1001,6 +1141,16 @@ define i32 @umaxv_v16i32(<16 x i32>* %a) #0 {
10011141
; VBITS_GE_512-NEXT: umaxv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
10021142
; VBITS_GE_512-NEXT: fmov w0, [[REDUCE]]
10031143
; VBITS_GE_512-NEXT: ret
1144+
1145+
; Ensure sensible type legalisation.
1146+
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
1147+
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
1148+
; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
1149+
; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
1150+
; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s
1151+
; VBITS_EQ_256-DAG: umaxv [[REDUCE:s[0-9]+]], [[PG]], [[MAX]].s
1152+
; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]]
1153+
; VBITS_EQ_256-NEXT: ret
10041154
%op = load <16 x i32>, <16 x i32>* %a
10051155
%res = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> %op)
10061156
ret i32 %res
@@ -1069,6 +1219,16 @@ define i64 @umaxv_v8i64(<8 x i64>* %a) #0 {
10691219
; VBITS_GE_512-NEXT: umaxv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
10701220
; VBITS_GE_512-NEXT: fmov x0, [[REDUCE]]
10711221
; VBITS_GE_512-NEXT: ret
1222+
1223+
; Ensure sensible type legalisation.
1224+
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
1225+
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
1226+
; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
1227+
; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
1228+
; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d
1229+
; VBITS_EQ_256-DAG: umaxv [[REDUCE:d[0-9]+]], [[PG]], [[MAX]].d
1230+
; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
1231+
; VBITS_EQ_256-NEXT: ret
10721232
%op = load <8 x i64>, <8 x i64>* %a
10731233
%res = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> %op)
10741234
ret i64 %res
@@ -1139,6 +1299,16 @@ define i8 @uminv_v64i8(<64 x i8>* %a) #0 {
11391299
; VBITS_GE_512-NEXT: uminv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
11401300
; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]]
11411301
; VBITS_GE_512-NEXT: ret
1302+
1303+
; Ensure sensible type legalisation.
1304+
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
1305+
; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32
1306+
; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0]
1307+
; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]]
1308+
; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].b, [[PG]]/m, [[HI]].b, [[LO]].b
1309+
; VBITS_EQ_256-DAG: uminv b[[REDUCE:[0-9]+]], [[PG]], [[MIN]].b
1310+
; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
1311+
; VBITS_EQ_256-NEXT: ret
11421312
%op = load <64 x i8>, <64 x i8>* %a
11431313
%res = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> %op)
11441314
ret i8 %res
@@ -1205,6 +1375,16 @@ define i16 @uminv_v32i16(<32 x i16>* %a) #0 {
12051375
; VBITS_GE_512-NEXT: uminv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
12061376
; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]]
12071377
; VBITS_GE_512-NEXT: ret
1378+
1379+
; Ensure sensible type legalisation.
1380+
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
1381+
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
1382+
; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
1383+
; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
1384+
; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h
1385+
; VBITS_EQ_256-DAG: uminv h[[REDUCE:[0-9]+]], [[PG]], [[MIN]].h
1386+
; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
1387+
; VBITS_EQ_256-NEXT: ret
12081388
%op = load <32 x i16>, <32 x i16>* %a
12091389
%res = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> %op)
12101390
ret i16 %res
@@ -1271,6 +1451,16 @@ define i32 @uminv_v16i32(<16 x i32>* %a) #0 {
12711451
; VBITS_GE_512-NEXT: uminv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
12721452
; VBITS_GE_512-NEXT: fmov w0, [[REDUCE]]
12731453
; VBITS_GE_512-NEXT: ret
1454+
1455+
; Ensure sensible type legalisation.
1456+
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
1457+
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
1458+
; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
1459+
; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
1460+
; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s
1461+
; VBITS_EQ_256-DAG: uminv [[REDUCE:s[0-9]+]], [[PG]], [[MIN]].s
1462+
; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]]
1463+
; VBITS_EQ_256-NEXT: ret
12741464
%op = load <16 x i32>, <16 x i32>* %a
12751465
%res = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> %op)
12761466
ret i32 %res
@@ -1339,6 +1529,16 @@ define i64 @uminv_v8i64(<8 x i64>* %a) #0 {
13391529
; VBITS_GE_512-NEXT: uminv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
13401530
; VBITS_GE_512-NEXT: fmov x0, [[REDUCE]]
13411531
; VBITS_GE_512-NEXT: ret
1532+
1533+
; Ensure sensible type legalisation.
1534+
; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
1535+
; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
1536+
; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
1537+
; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
1538+
; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d
1539+
; VBITS_EQ_256-DAG: uminv [[REDUCE:d[0-9]+]], [[PG]], [[MIN]].d
1540+
; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
1541+
; VBITS_EQ_256-NEXT: ret
13421542
%op = load <8 x i64>, <8 x i64>* %a
13431543
%res = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> %op)
13441544
ret i64 %res

0 commit comments

Comments
 (0)