@@ -357,3 +357,54 @@ define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
357
357
%v = call <2 x half > @llvm.maximum.v2f16 (<2 x half > %a , <2 x half > %c )
358
358
ret <2 x half > %v
359
359
}
360
+
361
+ declare <4 x half > @llvm.vector.insert.v2f32.v4f32 (<4 x half >, <2 x half >, i64 )
362
+
363
+ define <4 x half > @vfmax_v2f16_vv_nnan_insert_subvector (<2 x half > %a , <2 x half > %b , <4 x half > %c ) {
364
+ ; ZVFH-LABEL: vfmax_v2f16_vv_nnan_insert_subvector:
365
+ ; ZVFH: # %bb.0:
366
+ ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
367
+ ; ZVFH-NEXT: vfadd.vv v8, v8, v8
368
+ ; ZVFH-NEXT: vfadd.vv v9, v9, v9
369
+ ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
370
+ ; ZVFH-NEXT: vslideup.vi v8, v9, 2
371
+ ; ZVFH-NEXT: vmfeq.vv v0, v8, v8
372
+ ; ZVFH-NEXT: vmerge.vvm v9, v8, v10, v0
373
+ ; ZVFH-NEXT: vmfeq.vv v0, v10, v10
374
+ ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
375
+ ; ZVFH-NEXT: vfmax.vv v8, v8, v9
376
+ ; ZVFH-NEXT: ret
377
+ ;
378
+ ; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnan_insert_subvector:
379
+ ; ZVFHMIN: # %bb.0:
380
+ ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
381
+ ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
382
+ ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
383
+ ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
384
+ ; ZVFHMIN-NEXT: vfadd.vv v9, v11, v11
385
+ ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v8
386
+ ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
387
+ ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v9
388
+ ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
389
+ ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
390
+ ; ZVFHMIN-NEXT: vslideup.vi v11, v9, 2
391
+ ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
392
+ ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
393
+ ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
394
+ ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
395
+ ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
396
+ ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
397
+ ; ZVFHMIN-NEXT: vmerge.vvm v10, v8, v9, v0
398
+ ; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
399
+ ; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v8, v0
400
+ ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v10
401
+ ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
402
+ ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
403
+ ; ZVFHMIN-NEXT: ret
404
+ %d = fadd nnan <2 x half > %a , %a
405
+ %e = fadd nnan <2 x half > %b , %b
406
+ %f = call <4 x half > @llvm.vector.insert.v2f32.v4f32 (<4 x half > undef , <2 x half > %d , i64 0 )
407
+ %g = call <4 x half > @llvm.vector.insert.v2f32.v4f32 (<4 x half > %f , <2 x half > %e , i64 2 )
408
+ %v = call <4 x half > @llvm.maximum.v4f16 (<4 x half > %g , <4 x half > %c )
409
+ ret <4 x half > %v
410
+ }
0 commit comments