Skip to content

[RISCV] Allow f16/bf16 with zvfhmin/zvfbfmin as legal strided access #115264

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21552,7 +21552,10 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
return false;

EVT ScalarType = DataType.getScalarType();
if (!isLegalElementTypeForRVV(ScalarType))
// TODO: Move bf16/f16 support into isLegalElementTypeForRVV
if (!(isLegalElementTypeForRVV(ScalarType) ||
(ScalarType == MVT::bf16 && Subtarget.hasVInstructionsBF16Minimal()) ||
(ScalarType == MVT::f16 && Subtarget.hasVInstructionsF16Minimal())))
return false;

if (!Subtarget.enableUnalignedVectorMem() &&
Expand Down
268 changes: 267 additions & 1 deletion llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s
; RUN: opt -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s --check-prefixes=CHECK,NO-ZVFHMIN-ZVFBFMIN
; RUN: opt -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN-ZVFBFMIN


define void @test(ptr %p, ptr noalias %s) {
; CHECK-LABEL: @test(
Expand Down Expand Up @@ -308,3 +310,267 @@ entry:
ret void
}


define void @test_bf16(ptr %p, ptr noalias %s) {
; NO-ZVFHMIN-ZVFBFMIN-LABEL: @test_bf16(
; NO-ZVFHMIN-ZVFBFMIN-NEXT: entry:
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P:%.*]], i64 0, i64 0
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I:%.*]] = load bfloat, ptr [[ARRAYIDX]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 30
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I1:%.*]] = load bfloat, ptr [[ARRAYIDX1]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD:%.*]] = fsub fast bfloat [[I1]], [[I]]
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds bfloat, ptr [[S:%.*]], i64 0
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD]], ptr [[ARRAYIDX2]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I2:%.*]] = load bfloat, ptr [[ARRAYIDX4]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 26
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I3:%.*]] = load bfloat, ptr [[ARRAYIDX6]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD7:%.*]] = fsub fast bfloat [[I3]], [[I2]]
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 1
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD7]], ptr [[ARRAYIDX9]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 8
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I4:%.*]] = load bfloat, ptr [[ARRAYIDX11]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 22
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I5:%.*]] = load bfloat, ptr [[ARRAYIDX13]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD14:%.*]] = fsub fast bfloat [[I5]], [[I4]]
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 2
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD14]], ptr [[ARRAYIDX16]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 12
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I6:%.*]] = load bfloat, ptr [[ARRAYIDX18]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 18
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I7:%.*]] = load bfloat, ptr [[ARRAYIDX20]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD21:%.*]] = fsub fast bfloat [[I7]], [[I6]]
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 3
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD21]], ptr [[ARRAYIDX23]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 16
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I8:%.*]] = load bfloat, ptr [[ARRAYIDX25]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 14
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I9:%.*]] = load bfloat, ptr [[ARRAYIDX27]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD28:%.*]] = fsub fast bfloat [[I9]], [[I8]]
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD28]], ptr [[ARRAYIDX30]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 20
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I10:%.*]] = load bfloat, ptr [[ARRAYIDX32]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 10
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I11:%.*]] = load bfloat, ptr [[ARRAYIDX34]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD35:%.*]] = fsub fast bfloat [[I11]], [[I10]]
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 5
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD35]], ptr [[ARRAYIDX37]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 24
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I12:%.*]] = load bfloat, ptr [[ARRAYIDX39]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 6
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I13:%.*]] = load bfloat, ptr [[ARRAYIDX41]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD42:%.*]] = fsub fast bfloat [[I13]], [[I12]]
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 6
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD42]], ptr [[ARRAYIDX44]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 28
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I14:%.*]] = load bfloat, ptr [[ARRAYIDX46]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 2
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I15:%.*]] = load bfloat, ptr [[ARRAYIDX48]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD49:%.*]] = fsub fast bfloat [[I15]], [[I14]]
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 7
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD49]], ptr [[ARRAYIDX51]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: ret void
;
; ZVFHMIN-ZVFBFMIN-LABEL: @test_bf16(
; ZVFHMIN-ZVFBFMIN-NEXT: entry:
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P:%.*]], i64 0, i64 0
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 30
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds bfloat, ptr [[S:%.*]], i64 0
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP15:%.*]] = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i64(ptr align 4 [[ARRAYIDX]], i64 8, <8 x i1> splat (i1 true), i32 8)
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP7:%.*]] = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -8, <8 x i1> splat (i1 true), i32 8)
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP16:%.*]] = fsub fast <8 x bfloat> [[TMP7]], [[TMP15]]
; ZVFHMIN-ZVFBFMIN-NEXT: store <8 x bfloat> [[TMP16]], ptr [[ARRAYIDX2]], align 4
; ZVFHMIN-ZVFBFMIN-NEXT: ret void
;
entry:
%arrayidx = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 0
%i = load bfloat, ptr %arrayidx, align 4
%arrayidx1 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 30
%i1 = load bfloat, ptr %arrayidx1, align 4
%add = fsub fast bfloat %i1, %i
%arrayidx2 = getelementptr inbounds bfloat, ptr %s, i64 0
store bfloat %add, ptr %arrayidx2, align 4
%arrayidx4 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 4
%i2 = load bfloat, ptr %arrayidx4, align 4
%arrayidx6 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 26
%i3 = load bfloat, ptr %arrayidx6, align 4
%add7 = fsub fast bfloat %i3, %i2
%arrayidx9 = getelementptr inbounds bfloat, ptr %s, i64 1
store bfloat %add7, ptr %arrayidx9, align 4
%arrayidx11 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 8
%i4 = load bfloat, ptr %arrayidx11, align 4
%arrayidx13 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 22
%i5 = load bfloat, ptr %arrayidx13, align 4
%add14 = fsub fast bfloat %i5, %i4
%arrayidx16 = getelementptr inbounds bfloat, ptr %s, i64 2
store bfloat %add14, ptr %arrayidx16, align 4
%arrayidx18 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 12
%i6 = load bfloat, ptr %arrayidx18, align 4
%arrayidx20 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 18
%i7 = load bfloat, ptr %arrayidx20, align 4
%add21 = fsub fast bfloat %i7, %i6
%arrayidx23 = getelementptr inbounds bfloat, ptr %s, i64 3
store bfloat %add21, ptr %arrayidx23, align 4
%arrayidx25 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 16
%i8 = load bfloat, ptr %arrayidx25, align 4
%arrayidx27 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 14
%i9 = load bfloat, ptr %arrayidx27, align 4
%add28 = fsub fast bfloat %i9, %i8
%arrayidx30 = getelementptr inbounds bfloat, ptr %s, i64 4
store bfloat %add28, ptr %arrayidx30, align 4
%arrayidx32 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 20
%i10 = load bfloat, ptr %arrayidx32, align 4
%arrayidx34 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 10
%i11 = load bfloat, ptr %arrayidx34, align 4
%add35 = fsub fast bfloat %i11, %i10
%arrayidx37 = getelementptr inbounds bfloat, ptr %s, i64 5
store bfloat %add35, ptr %arrayidx37, align 4
%arrayidx39 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 24
%i12 = load bfloat, ptr %arrayidx39, align 4
%arrayidx41 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 6
%i13 = load bfloat, ptr %arrayidx41, align 4
%add42 = fsub fast bfloat %i13, %i12
%arrayidx44 = getelementptr inbounds bfloat, ptr %s, i64 6
store bfloat %add42, ptr %arrayidx44, align 4
%arrayidx46 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 28
%i14 = load bfloat, ptr %arrayidx46, align 4
%arrayidx48 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 2
%i15 = load bfloat, ptr %arrayidx48, align 4
%add49 = fsub fast bfloat %i15, %i14
%arrayidx51 = getelementptr inbounds bfloat, ptr %s, i64 7
store bfloat %add49, ptr %arrayidx51, align 4
ret void
}

define void @test_f16(ptr %p, ptr noalias %s) {
; NO-ZVFHMIN-ZVFBFMIN-LABEL: @test_f16(
; NO-ZVFHMIN-ZVFBFMIN-NEXT: entry:
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x half], ptr [[P:%.*]], i64 0, i64 0
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I:%.*]] = load half, ptr [[ARRAYIDX]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 30
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I1:%.*]] = load half, ptr [[ARRAYIDX1]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD:%.*]] = fsub fast half [[I1]], [[I]]
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[S:%.*]], i64 0
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD]], ptr [[ARRAYIDX2]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I2:%.*]] = load half, ptr [[ARRAYIDX4]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 26
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I3:%.*]] = load half, ptr [[ARRAYIDX6]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD7:%.*]] = fsub fast half [[I3]], [[I2]]
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds half, ptr [[S]], i64 1
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD7]], ptr [[ARRAYIDX9]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 8
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I4:%.*]] = load half, ptr [[ARRAYIDX11]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 22
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I5:%.*]] = load half, ptr [[ARRAYIDX13]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD14:%.*]] = fsub fast half [[I5]], [[I4]]
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds half, ptr [[S]], i64 2
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD14]], ptr [[ARRAYIDX16]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 12
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I6:%.*]] = load half, ptr [[ARRAYIDX18]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 18
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I7:%.*]] = load half, ptr [[ARRAYIDX20]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD21:%.*]] = fsub fast half [[I7]], [[I6]]
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds half, ptr [[S]], i64 3
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD21]], ptr [[ARRAYIDX23]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 16
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I8:%.*]] = load half, ptr [[ARRAYIDX25]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 14
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I9:%.*]] = load half, ptr [[ARRAYIDX27]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD28:%.*]] = fsub fast half [[I9]], [[I8]]
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds half, ptr [[S]], i64 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD28]], ptr [[ARRAYIDX30]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 20
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I10:%.*]] = load half, ptr [[ARRAYIDX32]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 10
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I11:%.*]] = load half, ptr [[ARRAYIDX34]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD35:%.*]] = fsub fast half [[I11]], [[I10]]
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds half, ptr [[S]], i64 5
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD35]], ptr [[ARRAYIDX37]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 24
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I12:%.*]] = load half, ptr [[ARRAYIDX39]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 6
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I13:%.*]] = load half, ptr [[ARRAYIDX41]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD42:%.*]] = fsub fast half [[I13]], [[I12]]
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds half, ptr [[S]], i64 6
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD42]], ptr [[ARRAYIDX44]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 28
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I14:%.*]] = load half, ptr [[ARRAYIDX46]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 2
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I15:%.*]] = load half, ptr [[ARRAYIDX48]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD49:%.*]] = fsub fast half [[I15]], [[I14]]
; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds half, ptr [[S]], i64 7
; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD49]], ptr [[ARRAYIDX51]], align 4
; NO-ZVFHMIN-ZVFBFMIN-NEXT: ret void
;
; ZVFHMIN-ZVFBFMIN-LABEL: @test_f16(
; ZVFHMIN-ZVFBFMIN-NEXT: entry:
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x half], ptr [[P:%.*]], i64 0, i64 0
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 30
; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[S:%.*]], i64 0
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP15:%.*]] = call <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i64(ptr align 4 [[ARRAYIDX]], i64 8, <8 x i1> splat (i1 true), i32 8)
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP7:%.*]] = call <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -8, <8 x i1> splat (i1 true), i32 8)
; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP16:%.*]] = fsub fast <8 x half> [[TMP7]], [[TMP15]]
; ZVFHMIN-ZVFBFMIN-NEXT: store <8 x half> [[TMP16]], ptr [[ARRAYIDX2]], align 4
; ZVFHMIN-ZVFBFMIN-NEXT: ret void
;
entry:
%arrayidx = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 0
%i = load half, ptr %arrayidx, align 4
%arrayidx1 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 30
%i1 = load half, ptr %arrayidx1, align 4
%add = fsub fast half %i1, %i
%arrayidx2 = getelementptr inbounds half, ptr %s, i64 0
store half %add, ptr %arrayidx2, align 4
%arrayidx4 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 4
%i2 = load half, ptr %arrayidx4, align 4
%arrayidx6 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 26
%i3 = load half, ptr %arrayidx6, align 4
%add7 = fsub fast half %i3, %i2
%arrayidx9 = getelementptr inbounds half, ptr %s, i64 1
store half %add7, ptr %arrayidx9, align 4
%arrayidx11 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 8
%i4 = load half, ptr %arrayidx11, align 4
%arrayidx13 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 22
%i5 = load half, ptr %arrayidx13, align 4
%add14 = fsub fast half %i5, %i4
%arrayidx16 = getelementptr inbounds half, ptr %s, i64 2
store half %add14, ptr %arrayidx16, align 4
%arrayidx18 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 12
%i6 = load half, ptr %arrayidx18, align 4
%arrayidx20 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 18
%i7 = load half, ptr %arrayidx20, align 4
%add21 = fsub fast half %i7, %i6
%arrayidx23 = getelementptr inbounds half, ptr %s, i64 3
store half %add21, ptr %arrayidx23, align 4
%arrayidx25 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 16
%i8 = load half, ptr %arrayidx25, align 4
%arrayidx27 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 14
%i9 = load half, ptr %arrayidx27, align 4
%add28 = fsub fast half %i9, %i8
%arrayidx30 = getelementptr inbounds half, ptr %s, i64 4
store half %add28, ptr %arrayidx30, align 4
%arrayidx32 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 20
%i10 = load half, ptr %arrayidx32, align 4
%arrayidx34 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 10
%i11 = load half, ptr %arrayidx34, align 4
%add35 = fsub fast half %i11, %i10
%arrayidx37 = getelementptr inbounds half, ptr %s, i64 5
store half %add35, ptr %arrayidx37, align 4
%arrayidx39 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 24
%i12 = load half, ptr %arrayidx39, align 4
%arrayidx41 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 6
%i13 = load half, ptr %arrayidx41, align 4
%add42 = fsub fast half %i13, %i12
%arrayidx44 = getelementptr inbounds half, ptr %s, i64 6
store half %add42, ptr %arrayidx44, align 4
%arrayidx46 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 28
%i14 = load half, ptr %arrayidx46, align 4
%arrayidx48 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 2
%i15 = load half, ptr %arrayidx48, align 4
%add49 = fsub fast half %i15, %i14
%arrayidx51 = getelementptr inbounds half, ptr %s, i64 7
store half %add49, ptr %arrayidx51, align 4
ret void
}
Loading