Skip to content

Commit 97eff4d

Browse files
committed
Add negative test case for cdot
1 parent fc68abe commit 97eff4d

File tree

1 file changed

+113
-10
lines changed

1 file changed

+113
-10
lines changed

llvm/test/CodeGen/AArch64/complex-deinterleaving-cdot.ll

Lines changed: 113 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -803,15 +803,118 @@ middle.block: ; preds = %vector.body
803803
}
804804

805805

806-
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
807-
declare <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32>, <vscale x 16 x i32>) #0
808-
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
809-
declare <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i32(<vscale x 2 x i64>, <vscale x 16 x i32>) #0
806+
define i32 @not_cdotp(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b) {
807+
; CHECK-SVE2-LABEL: define i32 @not_cdotp(
808+
; CHECK-SVE2-SAME: <vscale x 32 x i8> [[A:%.*]], <vscale x 32 x i8> [[B:%.*]]) #[[ATTR0]] {
809+
; CHECK-SVE2-NEXT: [[ENTRY:.*]]:
810+
; CHECK-SVE2-NEXT: br label %[[VECTOR_BODY:.*]]
811+
; CHECK-SVE2: [[VECTOR_BODY]]:
812+
; CHECK-SVE2-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %[[ENTRY]] ], [ [[PARTIAL_REDUCE_SUB:%.*]], %[[VECTOR_BODY]] ]
813+
; CHECK-SVE2-NEXT: [[A_DEINTERLEAVED:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[A]])
814+
; CHECK-SVE2-NEXT: [[B_DEINTERLEAVED:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[B]])
815+
; CHECK-SVE2-NEXT: [[A_REAL:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[A_DEINTERLEAVED]], 0
816+
; CHECK-SVE2-NEXT: [[A_IMAG:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[A_DEINTERLEAVED]], 1
817+
; CHECK-SVE2-NEXT: [[B_REAL:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[B_DEINTERLEAVED]], 0
818+
; CHECK-SVE2-NEXT: [[B_IMAG:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[B_DEINTERLEAVED]], 1
819+
; CHECK-SVE2-NEXT: [[A_REAL_EXT:%.*]] = sext <vscale x 16 x i8> [[A_REAL]] to <vscale x 16 x i32>
820+
; CHECK-SVE2-NEXT: [[A_IMAG_EXT:%.*]] = sext <vscale x 16 x i8> [[A_IMAG]] to <vscale x 16 x i32>
821+
; CHECK-SVE2-NEXT: [[B_REAL_EXT:%.*]] = sext <vscale x 16 x i8> [[B_REAL]] to <vscale x 16 x i32>
822+
; CHECK-SVE2-NEXT: [[B_IMAG_EXT:%.*]] = sext <vscale x 16 x i8> [[B_IMAG]] to <vscale x 16 x i32>
823+
; CHECK-SVE2-NEXT: [[REAL_MUL:%.*]] = mul <vscale x 16 x i32> [[B_REAL_EXT]], [[A_REAL_EXT]]
824+
; CHECK-SVE2-NEXT: [[REAL_MUL_NEG:%.*]] = sub <vscale x 16 x i32> zeroinitializer, [[REAL_MUL]]
825+
; CHECK-SVE2-NEXT: [[REAL_MUL_REDUCED:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 16 x i32> [[REAL_MUL_NEG]])
826+
; CHECK-SVE2-NEXT: [[IMAG_MUL:%.*]] = mul <vscale x 16 x i32> [[B_IMAG_EXT]], [[A_IMAG_EXT]]
827+
; CHECK-SVE2-NEXT: [[IMAG_MUL_NEG:%.*]] = sub <vscale x 16 x i32> zeroinitializer, [[IMAG_MUL]]
828+
; CHECK-SVE2-NEXT: [[PARTIAL_REDUCE_SUB]] = call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> [[REAL_MUL_REDUCED]], <vscale x 16 x i32> [[IMAG_MUL_NEG]])
829+
; CHECK-SVE2-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]]
830+
; CHECK-SVE2: [[MIDDLE_BLOCK]]:
831+
; CHECK-SVE2-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[PARTIAL_REDUCE_SUB]])
832+
; CHECK-SVE2-NEXT: ret i32 [[TMP0]]
833+
;
834+
; CHECK-SVE-LABEL: define i32 @not_cdotp(
835+
; CHECK-SVE-SAME: <vscale x 32 x i8> [[A:%.*]], <vscale x 32 x i8> [[B:%.*]]) #[[ATTR0]] {
836+
; CHECK-SVE-NEXT: [[ENTRY:.*]]:
837+
; CHECK-SVE-NEXT: br label %[[VECTOR_BODY:.*]]
838+
; CHECK-SVE: [[VECTOR_BODY]]:
839+
; CHECK-SVE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %[[ENTRY]] ], [ [[PARTIAL_REDUCE_SUB:%.*]], %[[VECTOR_BODY]] ]
840+
; CHECK-SVE-NEXT: [[A_DEINTERLEAVED:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[A]])
841+
; CHECK-SVE-NEXT: [[B_DEINTERLEAVED:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[B]])
842+
; CHECK-SVE-NEXT: [[A_REAL:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[A_DEINTERLEAVED]], 0
843+
; CHECK-SVE-NEXT: [[A_IMAG:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[A_DEINTERLEAVED]], 1
844+
; CHECK-SVE-NEXT: [[B_REAL:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[B_DEINTERLEAVED]], 0
845+
; CHECK-SVE-NEXT: [[B_IMAG:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[B_DEINTERLEAVED]], 1
846+
; CHECK-SVE-NEXT: [[A_REAL_EXT:%.*]] = sext <vscale x 16 x i8> [[A_REAL]] to <vscale x 16 x i32>
847+
; CHECK-SVE-NEXT: [[A_IMAG_EXT:%.*]] = sext <vscale x 16 x i8> [[A_IMAG]] to <vscale x 16 x i32>
848+
; CHECK-SVE-NEXT: [[B_REAL_EXT:%.*]] = sext <vscale x 16 x i8> [[B_REAL]] to <vscale x 16 x i32>
849+
; CHECK-SVE-NEXT: [[B_IMAG_EXT:%.*]] = sext <vscale x 16 x i8> [[B_IMAG]] to <vscale x 16 x i32>
850+
; CHECK-SVE-NEXT: [[REAL_MUL:%.*]] = mul <vscale x 16 x i32> [[B_REAL_EXT]], [[A_REAL_EXT]]
851+
; CHECK-SVE-NEXT: [[REAL_MUL_NEG:%.*]] = sub <vscale x 16 x i32> zeroinitializer, [[REAL_MUL]]
852+
; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 16 x i32> [[REAL_MUL_NEG]])
853+
; CHECK-SVE-NEXT: [[IMAG_MUL:%.*]] = mul <vscale x 16 x i32> [[B_IMAG_EXT]], [[A_IMAG_EXT]]
854+
; CHECK-SVE-NEXT: [[IMAG_MUL_NEG:%.*]] = sub <vscale x 16 x i32> zeroinitializer, [[IMAG_MUL]]
855+
; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> [[REAL_MUL_REDUCED]], <vscale x 16 x i32> [[IMAG_MUL_NEG]])
856+
; CHECK-SVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]]
857+
; CHECK-SVE: [[MIDDLE_BLOCK]]:
858+
; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[PARTIAL_REDUCE_SUB]])
859+
; CHECK-SVE-NEXT: ret i32 [[TMP0]]
860+
;
861+
; CHECK-NOSVE-LABEL: define i32 @not_cdotp(
862+
; CHECK-NOSVE-SAME: <vscale x 32 x i8> [[A:%.*]], <vscale x 32 x i8> [[B:%.*]]) {
863+
; CHECK-NOSVE-NEXT: [[ENTRY:.*]]:
864+
; CHECK-NOSVE-NEXT: br label %[[VECTOR_BODY:.*]]
865+
; CHECK-NOSVE: [[VECTOR_BODY]]:
866+
; CHECK-NOSVE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %[[ENTRY]] ], [ [[PARTIAL_REDUCE_SUB:%.*]], %[[VECTOR_BODY]] ]
867+
; CHECK-NOSVE-NEXT: [[A_DEINTERLEAVED:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[A]])
868+
; CHECK-NOSVE-NEXT: [[B_DEINTERLEAVED:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[B]])
869+
; CHECK-NOSVE-NEXT: [[A_REAL:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[A_DEINTERLEAVED]], 0
870+
; CHECK-NOSVE-NEXT: [[A_IMAG:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[A_DEINTERLEAVED]], 1
871+
; CHECK-NOSVE-NEXT: [[B_REAL:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[B_DEINTERLEAVED]], 0
872+
; CHECK-NOSVE-NEXT: [[B_IMAG:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[B_DEINTERLEAVED]], 1
873+
; CHECK-NOSVE-NEXT: [[A_REAL_EXT:%.*]] = sext <vscale x 16 x i8> [[A_REAL]] to <vscale x 16 x i32>
874+
; CHECK-NOSVE-NEXT: [[A_IMAG_EXT:%.*]] = sext <vscale x 16 x i8> [[A_IMAG]] to <vscale x 16 x i32>
875+
; CHECK-NOSVE-NEXT: [[B_REAL_EXT:%.*]] = sext <vscale x 16 x i8> [[B_REAL]] to <vscale x 16 x i32>
876+
; CHECK-NOSVE-NEXT: [[B_IMAG_EXT:%.*]] = sext <vscale x 16 x i8> [[B_IMAG]] to <vscale x 16 x i32>
877+
; CHECK-NOSVE-NEXT: [[REAL_MUL:%.*]] = mul <vscale x 16 x i32> [[B_REAL_EXT]], [[A_REAL_EXT]]
878+
; CHECK-NOSVE-NEXT: [[REAL_MUL_NEG:%.*]] = sub <vscale x 16 x i32> zeroinitializer, [[REAL_MUL]]
879+
; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 16 x i32> [[REAL_MUL_NEG]])
880+
; CHECK-NOSVE-NEXT: [[IMAG_MUL:%.*]] = mul <vscale x 16 x i32> [[B_IMAG_EXT]], [[A_IMAG_EXT]]
881+
; CHECK-NOSVE-NEXT: [[IMAG_MUL_NEG:%.*]] = sub <vscale x 16 x i32> zeroinitializer, [[IMAG_MUL]]
882+
; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> [[REAL_MUL_REDUCED]], <vscale x 16 x i32> [[IMAG_MUL_NEG]])
883+
; CHECK-NOSVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]]
884+
; CHECK-NOSVE: [[MIDDLE_BLOCK]]:
885+
; CHECK-NOSVE-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[PARTIAL_REDUCE_SUB]])
886+
; CHECK-NOSVE-NEXT: ret i32 [[TMP0]]
887+
;
888+
entry:
889+
br label %vector.body
890+
891+
vector.body: ; preds = %vector.body, %entry
892+
%vec.phi = phi <vscale x 4 x i32> [ zeroinitializer, %entry ], [ %partial.reduce.sub, %vector.body ]
893+
%a.deinterleaved = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.v32i8(<vscale x 32 x i8> %a)
894+
%b.deinterleaved = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.v32i8(<vscale x 32 x i8> %b)
895+
%a.real = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %a.deinterleaved, 0
896+
%a.imag = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %a.deinterleaved, 1
897+
%b.real = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %b.deinterleaved, 0
898+
%b.imag = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %b.deinterleaved, 1
899+
%a.real.ext = sext <vscale x 16 x i8> %a.real to <vscale x 16 x i32>
900+
%a.imag.ext = sext <vscale x 16 x i8> %a.imag to <vscale x 16 x i32>
901+
%b.real.ext = sext <vscale x 16 x i8> %b.real to <vscale x 16 x i32>
902+
%b.imag.ext = sext <vscale x 16 x i8> %b.imag to <vscale x 16 x i32>
903+
%real.mul = mul <vscale x 16 x i32> %b.real.ext, %a.real.ext
904+
%real.mul.neg = sub <vscale x 16 x i32> zeroinitializer, %real.mul
905+
%real.mul.reduced = call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %vec.phi, <vscale x 16 x i32> %real.mul.neg)
906+
%imag.mul = mul <vscale x 16 x i32> %b.imag.ext, %a.imag.ext
907+
%imag.mul.neg = sub <vscale x 16 x i32> zeroinitializer, %imag.mul
908+
%partial.reduce.sub = call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %real.mul.reduced, <vscale x 16 x i32> %imag.mul.neg)
909+
br i1 true, label %middle.block, label %vector.body
910+
911+
middle.block: ; preds = %vector.body
912+
%0 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %partial.reduce.sub)
913+
ret i32 %0
914+
}
810915

811-
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
812-
declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>) #1
813-
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
814-
declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>) #1
916+
declare <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32>, <vscale x 16 x i32>)
917+
declare <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i32(<vscale x 2 x i64>, <vscale x 16 x i32>)
815918

816-
attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
817-
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
919+
declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
920+
declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>)

0 commit comments

Comments
 (0)