@@ -916,50 +916,57 @@ entry:
916
916
}
917
917
918
918
define <4 x i32 > @usdot_multiple_zext_users (ptr %p1 , ptr %p2 , ptr %p3 ) {
919
- ; CHECK-LABEL: usdot_multiple_zext_users:
920
- ; CHECK: // %bb.0: // %entry
921
- ; CHECK-NEXT: adrp x8, .LCPI28_0
922
- ; CHECK-NEXT: movi v0.2d, #0000000000000000
923
- ; CHECK-NEXT: movi v2.2d, #0000000000000000
924
- ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI28_0]
925
- ; CHECK-NEXT: adrp x8, .LCPI28_1
926
- ; CHECK-NEXT: adrp x9, .LCPI28_2
927
- ; CHECK-NEXT: adrp x10, .LCPI28_3
928
- ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI28_1]
929
- ; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI28_2]
930
- ; CHECK-NEXT: ldr q5, [x10, :lo12:.LCPI28_3]
931
- ; CHECK-NEXT: mov x8, xzr
932
- ; CHECK-NEXT: .LBB28_1: // %vector.body
933
- ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
934
- ; CHECK-NEXT: ldr q6, [x2, x8]
935
- ; CHECK-NEXT: ldr q18, [x0, x8]
936
- ; CHECK-NEXT: ldr q19, [x1, x8]
937
- ; CHECK-NEXT: add x8, x8, #16
938
- ; CHECK-NEXT: tbl v7.16b, { v6.16b }, v1.16b
939
- ; CHECK-NEXT: tbl v16.16b, { v6.16b }, v3.16b
940
- ; CHECK-NEXT: tbl v17.16b, { v6.16b }, v4.16b
941
- ; CHECK-NEXT: tbl v6.16b, { v6.16b }, v5.16b
942
- ; CHECK-NEXT: cmp x8, #1024
943
- ; CHECK-NEXT: uzp1 v7.8h, v16.8h, v7.8h
944
- ; CHECK-NEXT: sshll v16.8h, v18.8b, #0
945
- ; CHECK-NEXT: uzp1 v6.8h, v6.8h, v17.8h
946
- ; CHECK-NEXT: sshll2 v17.8h, v18.16b, #0
947
- ; CHECK-NEXT: sshll v18.8h, v19.8b, #0
948
- ; CHECK-NEXT: sshll2 v19.8h, v19.16b, #0
949
- ; CHECK-NEXT: smlal v0.4s, v16.4h, v7.4h
950
- ; CHECK-NEXT: smlal v2.4s, v18.4h, v7.4h
951
- ; CHECK-NEXT: smull v20.4s, v17.4h, v6.4h
952
- ; CHECK-NEXT: smull v21.4s, v19.4h, v6.4h
953
- ; CHECK-NEXT: smlal2 v0.4s, v17.8h, v6.8h
954
- ; CHECK-NEXT: smlal2 v2.4s, v19.8h, v6.8h
955
- ; CHECK-NEXT: smlal2 v20.4s, v16.8h, v7.8h
956
- ; CHECK-NEXT: smlal2 v21.4s, v18.8h, v7.8h
957
- ; CHECK-NEXT: add v0.4s, v20.4s, v0.4s
958
- ; CHECK-NEXT: add v2.4s, v21.4s, v2.4s
959
- ; CHECK-NEXT: b.ne .LBB28_1
960
- ; CHECK-NEXT: // %bb.2: // %end
961
- ; CHECK-NEXT: add v0.4s, v2.4s, v0.4s
962
- ; CHECK-NEXT: ret
919
+ ; CHECK-NOI8MM-LABEL: usdot_multiple_zext_users:
920
+ ; CHECK-NOI8MM: // %bb.0: // %entry
921
+ ; CHECK-NOI8MM-NEXT: movi v0.2d, #0000000000000000
922
+ ; CHECK-NOI8MM-NEXT: movi v1.2d, #0000000000000000
923
+ ; CHECK-NOI8MM-NEXT: mov x8, xzr
924
+ ; CHECK-NOI8MM-NEXT: .LBB28_1: // %vector.body
925
+ ; CHECK-NOI8MM-NEXT: // =>This Inner Loop Header: Depth=1
926
+ ; CHECK-NOI8MM-NEXT: ldr q2, [x0, x8]
927
+ ; CHECK-NOI8MM-NEXT: ldr q3, [x2, x8]
928
+ ; CHECK-NOI8MM-NEXT: ldr q4, [x1, x8]
929
+ ; CHECK-NOI8MM-NEXT: add x8, x8, #16
930
+ ; CHECK-NOI8MM-NEXT: sshll v5.8h, v2.8b, #0
931
+ ; CHECK-NOI8MM-NEXT: sshll2 v2.8h, v2.16b, #0
932
+ ; CHECK-NOI8MM-NEXT: ushll2 v6.8h, v3.16b, #0
933
+ ; CHECK-NOI8MM-NEXT: ushll v3.8h, v3.8b, #0
934
+ ; CHECK-NOI8MM-NEXT: sshll v7.8h, v4.8b, #0
935
+ ; CHECK-NOI8MM-NEXT: sshll2 v4.8h, v4.16b, #0
936
+ ; CHECK-NOI8MM-NEXT: cmp x8, #1024
937
+ ; CHECK-NOI8MM-NEXT: smull v16.4s, v2.4h, v6.4h
938
+ ; CHECK-NOI8MM-NEXT: smlal v0.4s, v5.4h, v3.4h
939
+ ; CHECK-NOI8MM-NEXT: smull v17.4s, v4.4h, v6.4h
940
+ ; CHECK-NOI8MM-NEXT: smlal v1.4s, v7.4h, v3.4h
941
+ ; CHECK-NOI8MM-NEXT: smlal2 v16.4s, v5.8h, v3.8h
942
+ ; CHECK-NOI8MM-NEXT: smlal2 v0.4s, v2.8h, v6.8h
943
+ ; CHECK-NOI8MM-NEXT: smlal2 v17.4s, v7.8h, v3.8h
944
+ ; CHECK-NOI8MM-NEXT: smlal2 v1.4s, v4.8h, v6.8h
945
+ ; CHECK-NOI8MM-NEXT: add v0.4s, v16.4s, v0.4s
946
+ ; CHECK-NOI8MM-NEXT: add v1.4s, v17.4s, v1.4s
947
+ ; CHECK-NOI8MM-NEXT: b.ne .LBB28_1
948
+ ; CHECK-NOI8MM-NEXT: // %bb.2: // %end
949
+ ; CHECK-NOI8MM-NEXT: add v0.4s, v1.4s, v0.4s
950
+ ; CHECK-NOI8MM-NEXT: ret
951
+ ;
952
+ ; CHECK-I8MM-LABEL: usdot_multiple_zext_users:
953
+ ; CHECK-I8MM: // %bb.0: // %entry
954
+ ; CHECK-I8MM-NEXT: movi v0.2d, #0000000000000000
955
+ ; CHECK-I8MM-NEXT: movi v1.2d, #0000000000000000
956
+ ; CHECK-I8MM-NEXT: mov x8, xzr
957
+ ; CHECK-I8MM-NEXT: .LBB28_1: // %vector.body
958
+ ; CHECK-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
959
+ ; CHECK-I8MM-NEXT: ldr q2, [x0, x8]
960
+ ; CHECK-I8MM-NEXT: ldr q3, [x1, x8]
961
+ ; CHECK-I8MM-NEXT: ldr q4, [x2, x8]
962
+ ; CHECK-I8MM-NEXT: add x8, x8, #16
963
+ ; CHECK-I8MM-NEXT: usdot v0.4s, v4.16b, v2.16b
964
+ ; CHECK-I8MM-NEXT: usdot v1.4s, v4.16b, v3.16b
965
+ ; CHECK-I8MM-NEXT: cmp x8, #1024
966
+ ; CHECK-I8MM-NEXT: b.ne .LBB28_1
967
+ ; CHECK-I8MM-NEXT: // %bb.2: // %end
968
+ ; CHECK-I8MM-NEXT: add v0.4s, v1.4s, v0.4s
969
+ ; CHECK-I8MM-NEXT: ret
963
970
entry:
964
971
br label %vector.body
965
972
0 commit comments