@@ -574,35 +574,42 @@ define <4 x i32> @udot_no_bin_op(<4 x i32> %acc, <16 x i8> %a){
574
574
}
575
575
576
576
define <4 x i32 > @udot_no_bin_op_in_loop (ptr %p ){
577
- ; CHECK-LABEL: udot_no_bin_op_in_loop:
578
- ; CHECK: // %bb.0: // %entry
579
- ; CHECK-NEXT: adrp x8, .LCPI20_0
580
- ; CHECK-NEXT: movi v4.2d, #0000000000000000
581
- ; CHECK-NEXT: adrp x9, .LCPI20_2
582
- ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_0]
583
- ; CHECK-NEXT: adrp x8, .LCPI20_1
584
- ; CHECK-NEXT: adrp x10, .LCPI20_3
585
- ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_1]
586
- ; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI20_2]
587
- ; CHECK-NEXT: ldr q5, [x10, :lo12:.LCPI20_3]
588
- ; CHECK-NEXT: mov x8, xzr
589
- ; CHECK-NEXT: .LBB20_1: // %vector.body
590
- ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
591
- ; CHECK-NEXT: ldr q6, [x0, x8]
592
- ; CHECK-NEXT: mov v0.16b, v4.16b
593
- ; CHECK-NEXT: add x8, x8, #16
594
- ; CHECK-NEXT: cmp x8, #16
595
- ; CHECK-NEXT: tbl v7.16b, { v6.16b }, v2.16b
596
- ; CHECK-NEXT: tbl v4.16b, { v6.16b }, v1.16b
597
- ; CHECK-NEXT: tbl v16.16b, { v6.16b }, v3.16b
598
- ; CHECK-NEXT: tbl v6.16b, { v6.16b }, v5.16b
599
- ; CHECK-NEXT: add v7.4s, v0.4s, v7.4s
600
- ; CHECK-NEXT: add v6.4s, v6.4s, v16.4s
601
- ; CHECK-NEXT: add v4.4s, v4.4s, v7.4s
602
- ; CHECK-NEXT: add v4.4s, v6.4s, v4.4s
603
- ; CHECK-NEXT: b.ne .LBB20_1
604
- ; CHECK-NEXT: // %bb.2: // %end
605
- ; CHECK-NEXT: ret
577
+ ; CHECK-DOT-LABEL: udot_no_bin_op_in_loop:
578
+ ; CHECK-DOT: // %bb.0: // %entry
579
+ ; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000
580
+ ; CHECK-DOT-NEXT: movi v2.16b, #1
581
+ ; CHECK-DOT-NEXT: mov x8, xzr
582
+ ; CHECK-DOT-NEXT: .LBB20_1: // %vector.body
583
+ ; CHECK-DOT-NEXT: // =>This Inner Loop Header: Depth=1
584
+ ; CHECK-DOT-NEXT: ldr q3, [x0, x8]
585
+ ; CHECK-DOT-NEXT: mov v0.16b, v1.16b
586
+ ; CHECK-DOT-NEXT: add x8, x8, #16
587
+ ; CHECK-DOT-NEXT: cmp x8, #16
588
+ ; CHECK-DOT-NEXT: udot v1.4s, v3.16b, v2.16b
589
+ ; CHECK-DOT-NEXT: b.ne .LBB20_1
590
+ ; CHECK-DOT-NEXT: // %bb.2: // %end
591
+ ; CHECK-DOT-NEXT: ret
592
+ ;
593
+ ; CHECK-NODOT-LABEL: udot_no_bin_op_in_loop:
594
+ ; CHECK-NODOT: // %bb.0: // %entry
595
+ ; CHECK-NODOT-NEXT: movi v1.2d, #0000000000000000
596
+ ; CHECK-NODOT-NEXT: mov x8, xzr
597
+ ; CHECK-NODOT-NEXT: .LBB20_1: // %vector.body
598
+ ; CHECK-NODOT-NEXT: // =>This Inner Loop Header: Depth=1
599
+ ; CHECK-NODOT-NEXT: ldr q0, [x0, x8]
600
+ ; CHECK-NODOT-NEXT: add x8, x8, #16
601
+ ; CHECK-NODOT-NEXT: cmp x8, #16
602
+ ; CHECK-NODOT-NEXT: ushll v2.8h, v0.8b, #0
603
+ ; CHECK-NODOT-NEXT: ushll2 v3.8h, v0.16b, #0
604
+ ; CHECK-NODOT-NEXT: mov v0.16b, v1.16b
605
+ ; CHECK-NODOT-NEXT: ushll v1.4s, v3.4h, #0
606
+ ; CHECK-NODOT-NEXT: uaddw v4.4s, v0.4s, v2.4h
607
+ ; CHECK-NODOT-NEXT: uaddw2 v1.4s, v1.4s, v2.8h
608
+ ; CHECK-NODOT-NEXT: uaddw2 v2.4s, v4.4s, v3.8h
609
+ ; CHECK-NODOT-NEXT: add v1.4s, v1.4s, v2.4s
610
+ ; CHECK-NODOT-NEXT: b.ne .LBB20_1
611
+ ; CHECK-NODOT-NEXT: // %bb.2: // %end
612
+ ; CHECK-NODOT-NEXT: ret
606
613
entry:
607
614
br label %vector.body
608
615
0 commit comments