Skip to content

Commit cd1503e

Browse files
committed
[RISCV] Allow f16/bf16 with zvfhmin/zvfbfmin as legal strided access
This is also split off from the zvfhmin/zvfbfmin isLegalElementTypeForRVV work. Enabling this will cause SLP and RISCVGatherScatterLowering to emit @llvm.experimental.vp.strided.{load,store} intrinsics, and support for this was added in llvm#109387 and llvm#114750.
1 parent b887c43 commit cd1503e

File tree

2 files changed

+8
-93
lines changed

2 files changed

+8
-93
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21552,7 +21552,10 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
2155221552
return false;
2155321553

2155421554
EVT ScalarType = DataType.getScalarType();
21555-
if (!isLegalElementTypeForRVV(ScalarType))
21555+
// TODO: Move bf16/f16 support into isLegalElementTypeForRVV
21556+
if (!(isLegalElementTypeForRVV(ScalarType) ||
21557+
(ScalarType == MVT::bf16 && Subtarget.hasVInstructionsBF16Minimal()) ||
21558+
(ScalarType == MVT::f16 && Subtarget.hasVInstructionsF16Minimal())))
2155621559
return false;
2155721560

2155821561
if (!Subtarget.enableUnalignedVectorMem() &&

llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll

Lines changed: 4 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -375,54 +375,10 @@ define void @test_bf16(ptr %p, ptr noalias %s) {
375375
; ZVFHMIN-ZVFBFMIN-LABEL: @test_bf16(
376376
; ZVFHMIN-ZVFBFMIN-NEXT: entry:
377377
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P:%.*]], i64 0, i64 0
378-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I:%.*]] = load bfloat, ptr [[ARRAYIDX]], align 4
379378
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 30
380-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I1:%.*]] = load bfloat, ptr [[ARRAYIDX1]], align 4
381379
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds bfloat, ptr [[S:%.*]], i64 0
382-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 4
383-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I2:%.*]] = load bfloat, ptr [[ARRAYIDX4]], align 4
384-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 26
385-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I3:%.*]] = load bfloat, ptr [[ARRAYIDX6]], align 4
386-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 8
387-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I4:%.*]] = load bfloat, ptr [[ARRAYIDX11]], align 4
388-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 22
389-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I5:%.*]] = load bfloat, ptr [[ARRAYIDX13]], align 4
390-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 12
391-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I6:%.*]] = load bfloat, ptr [[ARRAYIDX18]], align 4
392-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 18
393-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I7:%.*]] = load bfloat, ptr [[ARRAYIDX20]], align 4
394-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 16
395-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I8:%.*]] = load bfloat, ptr [[ARRAYIDX25]], align 4
396-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 14
397-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I9:%.*]] = load bfloat, ptr [[ARRAYIDX27]], align 4
398-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 20
399-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I10:%.*]] = load bfloat, ptr [[ARRAYIDX32]], align 4
400-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 10
401-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I11:%.*]] = load bfloat, ptr [[ARRAYIDX34]], align 4
402-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 24
403-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I12:%.*]] = load bfloat, ptr [[ARRAYIDX39]], align 4
404-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 6
405-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I13:%.*]] = load bfloat, ptr [[ARRAYIDX41]], align 4
406-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 28
407-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I14:%.*]] = load bfloat, ptr [[ARRAYIDX46]], align 4
408-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 2
409-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I15:%.*]] = load bfloat, ptr [[ARRAYIDX48]], align 4
410-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP0:%.*]] = insertelement <8 x bfloat> poison, bfloat [[I1]], i32 0
411-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP1:%.*]] = insertelement <8 x bfloat> [[TMP0]], bfloat [[I3]], i32 1
412-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP2:%.*]] = insertelement <8 x bfloat> [[TMP1]], bfloat [[I5]], i32 2
413-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP3:%.*]] = insertelement <8 x bfloat> [[TMP2]], bfloat [[I7]], i32 3
414-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP4:%.*]] = insertelement <8 x bfloat> [[TMP3]], bfloat [[I9]], i32 4
415-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP5:%.*]] = insertelement <8 x bfloat> [[TMP4]], bfloat [[I11]], i32 5
416-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP6:%.*]] = insertelement <8 x bfloat> [[TMP5]], bfloat [[I13]], i32 6
417-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP7:%.*]] = insertelement <8 x bfloat> [[TMP6]], bfloat [[I15]], i32 7
418-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP8:%.*]] = insertelement <8 x bfloat> poison, bfloat [[I]], i32 0
419-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP9:%.*]] = insertelement <8 x bfloat> [[TMP8]], bfloat [[I2]], i32 1
420-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP10:%.*]] = insertelement <8 x bfloat> [[TMP9]], bfloat [[I4]], i32 2
421-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP11:%.*]] = insertelement <8 x bfloat> [[TMP10]], bfloat [[I6]], i32 3
422-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP12:%.*]] = insertelement <8 x bfloat> [[TMP11]], bfloat [[I8]], i32 4
423-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP13:%.*]] = insertelement <8 x bfloat> [[TMP12]], bfloat [[I10]], i32 5
424-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP14:%.*]] = insertelement <8 x bfloat> [[TMP13]], bfloat [[I12]], i32 6
425-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP15:%.*]] = insertelement <8 x bfloat> [[TMP14]], bfloat [[I14]], i32 7
380+
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP15:%.*]] = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i64(ptr align 4 [[ARRAYIDX]], i64 8, <8 x i1> splat (i1 true), i32 8)
381+
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP7:%.*]] = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -8, <8 x i1> splat (i1 true), i32 8)
426382
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP16:%.*]] = fsub fast <8 x bfloat> [[TMP7]], [[TMP15]]
427383
; ZVFHMIN-ZVFBFMIN-NEXT: store <8 x bfloat> [[TMP16]], ptr [[ARRAYIDX2]], align 4
428384
; ZVFHMIN-ZVFBFMIN-NEXT: ret void
@@ -551,54 +507,10 @@ define void @test_f16(ptr %p, ptr noalias %s) {
551507
; ZVFHMIN-ZVFBFMIN-LABEL: @test_f16(
552508
; ZVFHMIN-ZVFBFMIN-NEXT: entry:
553509
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x half], ptr [[P:%.*]], i64 0, i64 0
554-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I:%.*]] = load half, ptr [[ARRAYIDX]], align 4
555510
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 30
556-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I1:%.*]] = load half, ptr [[ARRAYIDX1]], align 4
557511
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[S:%.*]], i64 0
558-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 4
559-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I2:%.*]] = load half, ptr [[ARRAYIDX4]], align 4
560-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 26
561-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I3:%.*]] = load half, ptr [[ARRAYIDX6]], align 4
562-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 8
563-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I4:%.*]] = load half, ptr [[ARRAYIDX11]], align 4
564-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 22
565-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I5:%.*]] = load half, ptr [[ARRAYIDX13]], align 4
566-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 12
567-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I6:%.*]] = load half, ptr [[ARRAYIDX18]], align 4
568-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 18
569-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I7:%.*]] = load half, ptr [[ARRAYIDX20]], align 4
570-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 16
571-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I8:%.*]] = load half, ptr [[ARRAYIDX25]], align 4
572-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 14
573-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I9:%.*]] = load half, ptr [[ARRAYIDX27]], align 4
574-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 20
575-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I10:%.*]] = load half, ptr [[ARRAYIDX32]], align 4
576-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 10
577-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I11:%.*]] = load half, ptr [[ARRAYIDX34]], align 4
578-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 24
579-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I12:%.*]] = load half, ptr [[ARRAYIDX39]], align 4
580-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 6
581-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I13:%.*]] = load half, ptr [[ARRAYIDX41]], align 4
582-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 28
583-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I14:%.*]] = load half, ptr [[ARRAYIDX46]], align 4
584-
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 2
585-
; ZVFHMIN-ZVFBFMIN-NEXT: [[I15:%.*]] = load half, ptr [[ARRAYIDX48]], align 4
586-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP0:%.*]] = insertelement <8 x half> poison, half [[I1]], i32 0
587-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half [[I3]], i32 1
588-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half [[I5]], i32 2
589-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half [[I7]], i32 3
590-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half [[I9]], i32 4
591-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half [[I11]], i32 5
592-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half [[I13]], i32 6
593-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half [[I15]], i32 7
594-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP8:%.*]] = insertelement <8 x half> poison, half [[I]], i32 0
595-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP9:%.*]] = insertelement <8 x half> [[TMP8]], half [[I2]], i32 1
596-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP10:%.*]] = insertelement <8 x half> [[TMP9]], half [[I4]], i32 2
597-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP11:%.*]] = insertelement <8 x half> [[TMP10]], half [[I6]], i32 3
598-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP12:%.*]] = insertelement <8 x half> [[TMP11]], half [[I8]], i32 4
599-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP13:%.*]] = insertelement <8 x half> [[TMP12]], half [[I10]], i32 5
600-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP14:%.*]] = insertelement <8 x half> [[TMP13]], half [[I12]], i32 6
601-
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP15:%.*]] = insertelement <8 x half> [[TMP14]], half [[I14]], i32 7
512+
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP15:%.*]] = call <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i64(ptr align 4 [[ARRAYIDX]], i64 8, <8 x i1> splat (i1 true), i32 8)
513+
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP7:%.*]] = call <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -8, <8 x i1> splat (i1 true), i32 8)
602514
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP16:%.*]] = fsub fast <8 x half> [[TMP7]], [[TMP15]]
603515
; ZVFHMIN-ZVFBFMIN-NEXT: store <8 x half> [[TMP16]], ptr [[ARRAYIDX2]], align 4
604516
; ZVFHMIN-ZVFBFMIN-NEXT: ret void

0 commit comments

Comments
 (0)