@@ -51,9 +51,10 @@ entry:
51
51
define <vscale x 2 x i32 > @haddu_v2i32 (<vscale x 2 x i32 > %s0 , <vscale x 2 x i32 > %s1 ) {
52
52
; CHECK-LABEL: haddu_v2i32:
53
53
; CHECK: // %bb.0: // %entry
54
+ ; CHECK-NEXT: ptrue p0.d
54
55
; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
55
- ; CHECK-NEXT: adr z0 .d, [z0.d, z1.d, uxtw]
56
- ; CHECK-NEXT: lsr z0.d, z0.d, #1
56
+ ; CHECK-NEXT: and z1 .d, z1.d, #0xffffffff
57
+ ; CHECK-NEXT: uhadd z0.d, p0/m, z0.d, z1.d
57
58
; CHECK-NEXT: ret
58
59
entry:
59
60
%s0s = zext <vscale x 2 x i32 > %s0 to <vscale x 2 x i64 >
@@ -116,10 +117,10 @@ entry:
116
117
define <vscale x 2 x i16 > @haddu_v2i16 (<vscale x 2 x i16 > %s0 , <vscale x 2 x i16 > %s1 ) {
117
118
; CHECK-LABEL: haddu_v2i16:
118
119
; CHECK: // %bb.0: // %entry
120
+ ; CHECK-NEXT: ptrue p0.d
119
121
; CHECK-NEXT: and z0.d, z0.d, #0xffff
120
122
; CHECK-NEXT: and z1.d, z1.d, #0xffff
121
- ; CHECK-NEXT: add z0.d, z0.d, z1.d
122
- ; CHECK-NEXT: lsr z0.d, z0.d, #1
123
+ ; CHECK-NEXT: uhadd z0.d, p0/m, z0.d, z1.d
123
124
; CHECK-NEXT: ret
124
125
entry:
125
126
%s0s = zext <vscale x 2 x i16 > %s0 to <vscale x 2 x i32 >
@@ -151,10 +152,10 @@ entry:
151
152
define <vscale x 4 x i16 > @haddu_v4i16 (<vscale x 4 x i16 > %s0 , <vscale x 4 x i16 > %s1 ) {
152
153
; CHECK-LABEL: haddu_v4i16:
153
154
; CHECK: // %bb.0: // %entry
155
+ ; CHECK-NEXT: ptrue p0.s
154
156
; CHECK-NEXT: and z0.s, z0.s, #0xffff
155
157
; CHECK-NEXT: and z1.s, z1.s, #0xffff
156
- ; CHECK-NEXT: add z0.s, z0.s, z1.s
157
- ; CHECK-NEXT: lsr z0.s, z0.s, #1
158
+ ; CHECK-NEXT: uhadd z0.s, p0/m, z0.s, z1.s
158
159
; CHECK-NEXT: ret
159
160
entry:
160
161
%s0s = zext <vscale x 4 x i16 > %s0 to <vscale x 4 x i32 >
@@ -217,10 +218,10 @@ entry:
217
218
define <vscale x 4 x i8 > @haddu_v4i8 (<vscale x 4 x i8 > %s0 , <vscale x 4 x i8 > %s1 ) {
218
219
; CHECK-LABEL: haddu_v4i8:
219
220
; CHECK: // %bb.0: // %entry
221
+ ; CHECK-NEXT: ptrue p0.s
220
222
; CHECK-NEXT: and z0.s, z0.s, #0xff
221
223
; CHECK-NEXT: and z1.s, z1.s, #0xff
222
- ; CHECK-NEXT: add z0.s, z0.s, z1.s
223
- ; CHECK-NEXT: lsr z0.s, z0.s, #1
224
+ ; CHECK-NEXT: uhadd z0.s, p0/m, z0.s, z1.s
224
225
; CHECK-NEXT: ret
225
226
entry:
226
227
%s0s = zext <vscale x 4 x i8 > %s0 to <vscale x 4 x i16 >
@@ -252,10 +253,10 @@ entry:
252
253
define <vscale x 8 x i8 > @haddu_v8i8 (<vscale x 8 x i8 > %s0 , <vscale x 8 x i8 > %s1 ) {
253
254
; CHECK-LABEL: haddu_v8i8:
254
255
; CHECK: // %bb.0: // %entry
256
+ ; CHECK-NEXT: ptrue p0.h
255
257
; CHECK-NEXT: and z0.h, z0.h, #0xff
256
258
; CHECK-NEXT: and z1.h, z1.h, #0xff
257
- ; CHECK-NEXT: add z0.h, z0.h, z1.h
258
- ; CHECK-NEXT: lsr z0.h, z0.h, #1
259
+ ; CHECK-NEXT: uhadd z0.h, p0/m, z0.h, z1.h
259
260
; CHECK-NEXT: ret
260
261
entry:
261
262
%s0s = zext <vscale x 8 x i8 > %s0 to <vscale x 8 x i16 >
@@ -352,12 +353,10 @@ entry:
352
353
define <vscale x 2 x i32 > @rhaddu_v2i32 (<vscale x 2 x i32 > %s0 , <vscale x 2 x i32 > %s1 ) {
353
354
; CHECK-LABEL: rhaddu_v2i32:
354
355
; CHECK: // %bb.0: // %entry
355
- ; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
356
+ ; CHECK-NEXT: ptrue p0.d
356
357
; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
357
358
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
358
- ; CHECK-NEXT: eor z0.d, z0.d, z2.d
359
- ; CHECK-NEXT: sub z0.d, z1.d, z0.d
360
- ; CHECK-NEXT: lsr z0.d, z0.d, #1
359
+ ; CHECK-NEXT: urhadd z0.d, p0/m, z0.d, z1.d
361
360
; CHECK-NEXT: ret
362
361
entry:
363
362
%s0s = zext <vscale x 2 x i32 > %s0 to <vscale x 2 x i64 >
@@ -467,12 +466,10 @@ entry:
467
466
define <vscale x 4 x i16 > @rhaddu_v4i16 (<vscale x 4 x i16 > %s0 , <vscale x 4 x i16 > %s1 ) {
468
467
; CHECK-LABEL: rhaddu_v4i16:
469
468
; CHECK: // %bb.0: // %entry
470
- ; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
469
+ ; CHECK-NEXT: ptrue p0.s
471
470
; CHECK-NEXT: and z0.s, z0.s, #0xffff
472
471
; CHECK-NEXT: and z1.s, z1.s, #0xffff
473
- ; CHECK-NEXT: eor z0.d, z0.d, z2.d
474
- ; CHECK-NEXT: sub z0.s, z1.s, z0.s
475
- ; CHECK-NEXT: lsr z0.s, z0.s, #1
472
+ ; CHECK-NEXT: urhadd z0.s, p0/m, z0.s, z1.s
476
473
; CHECK-NEXT: ret
477
474
entry:
478
475
%s0s = zext <vscale x 4 x i16 > %s0 to <vscale x 4 x i32 >
@@ -582,12 +579,10 @@ entry:
582
579
define <vscale x 8 x i8 > @rhaddu_v8i8 (<vscale x 8 x i8 > %s0 , <vscale x 8 x i8 > %s1 ) {
583
580
; CHECK-LABEL: rhaddu_v8i8:
584
581
; CHECK: // %bb.0: // %entry
585
- ; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
582
+ ; CHECK-NEXT: ptrue p0.h
586
583
; CHECK-NEXT: and z0.h, z0.h, #0xff
587
584
; CHECK-NEXT: and z1.h, z1.h, #0xff
588
- ; CHECK-NEXT: eor z0.d, z0.d, z2.d
589
- ; CHECK-NEXT: sub z0.h, z1.h, z0.h
590
- ; CHECK-NEXT: lsr z0.h, z0.h, #1
585
+ ; CHECK-NEXT: urhadd z0.h, p0/m, z0.h, z1.h
591
586
; CHECK-NEXT: ret
592
587
entry:
593
588
%s0s = zext <vscale x 8 x i8 > %s0 to <vscale x 8 x i16 >
0 commit comments