@@ -2722,3 +2722,135 @@ loop:
2722
2722
exit:
2723
2723
ret void
2724
2724
}
2725
+
2726
+ ; FIXME: Widening instructions should be used instead of tbl.
2727
+ define i32 @test_pr62620_widening_instr (ptr %p1 , ptr %p2 , i64 %lx , i32 %h ) {
2728
+ ; CHECK-LABEL: test_pr62620_widening_instr:
2729
+ ; CHECK: ; %bb.0: ; %entry
2730
+ ; CHECK-NEXT: Lloh38:
2731
+ ; CHECK-NEXT: adrp x9, lCPI23_0@PAGE
2732
+ ; CHECK-NEXT: Lloh39:
2733
+ ; CHECK-NEXT: adrp x10, lCPI23_1@PAGE
2734
+ ; CHECK-NEXT: Lloh40:
2735
+ ; CHECK-NEXT: adrp x11, lCPI23_2@PAGE
2736
+ ; CHECK-NEXT: Lloh41:
2737
+ ; CHECK-NEXT: adrp x12, lCPI23_3@PAGE
2738
+ ; CHECK-NEXT: mov x8, x0
2739
+ ; CHECK-NEXT: mov w0, wzr
2740
+ ; CHECK-NEXT: Lloh42:
2741
+ ; CHECK-NEXT: ldr q0, [x9, lCPI23_0@PAGEOFF]
2742
+ ; CHECK-NEXT: lsl x9, x2, #4
2743
+ ; CHECK-NEXT: Lloh43:
2744
+ ; CHECK-NEXT: ldr q1, [x10, lCPI23_1@PAGEOFF]
2745
+ ; CHECK-NEXT: Lloh44:
2746
+ ; CHECK-NEXT: ldr q2, [x11, lCPI23_2@PAGEOFF]
2747
+ ; CHECK-NEXT: Lloh45:
2748
+ ; CHECK-NEXT: ldr q3, [x12, lCPI23_3@PAGEOFF]
2749
+ ; CHECK-NEXT: LBB23_1: ; %loop
2750
+ ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
2751
+ ; CHECK-NEXT: ldr q4, [x8, x9]
2752
+ ; CHECK-NEXT: subs w3, w3, #1
2753
+ ; CHECK-NEXT: ldr q5, [x1, x9]
2754
+ ; CHECK-NEXT: tbl.16b v6, { v4 }, v0
2755
+ ; CHECK-NEXT: tbl.16b v7, { v4 }, v1
2756
+ ; CHECK-NEXT: tbl.16b v16, { v4 }, v2
2757
+ ; CHECK-NEXT: tbl.16b v4, { v4 }, v3
2758
+ ; CHECK-NEXT: tbl.16b v17, { v5 }, v2
2759
+ ; CHECK-NEXT: tbl.16b v18, { v5 }, v3
2760
+ ; CHECK-NEXT: tbl.16b v19, { v5 }, v0
2761
+ ; CHECK-NEXT: tbl.16b v5, { v5 }, v1
2762
+ ; CHECK-NEXT: sabd.4s v16, v16, v17
2763
+ ; CHECK-NEXT: sabd.4s v4, v4, v18
2764
+ ; CHECK-NEXT: saba.4s v16, v7, v5
2765
+ ; CHECK-NEXT: saba.4s v4, v6, v19
2766
+ ; CHECK-NEXT: add.4s v4, v4, v16
2767
+ ; CHECK-NEXT: addv.4s s4, v4
2768
+ ; CHECK-NEXT: fmov w10, s4
2769
+ ; CHECK-NEXT: add w0, w10, w0
2770
+ ; CHECK-NEXT: b.ne LBB23_1
2771
+ ; CHECK-NEXT: ; %bb.2: ; %exit
2772
+ ; CHECK-NEXT: ret
2773
+ ; CHECK-NEXT: .loh AdrpLdr Lloh41, Lloh45
2774
+ ; CHECK-NEXT: .loh AdrpLdr Lloh40, Lloh44
2775
+ ; CHECK-NEXT: .loh AdrpLdr Lloh39, Lloh43
2776
+ ; CHECK-NEXT: .loh AdrpLdr Lloh38, Lloh42
2777
+ ;
2778
+ ; CHECK-BE-LABEL: test_pr62620_widening_instr:
2779
+ ; CHECK-BE: // %bb.0: // %entry
2780
+ ; CHECK-BE-NEXT: adrp x10, .LCPI23_0
2781
+ ; CHECK-BE-NEXT: add x10, x10, :lo12:.LCPI23_0
2782
+ ; CHECK-BE-NEXT: mov x8, x0
2783
+ ; CHECK-BE-NEXT: lsl x9, x2, #4
2784
+ ; CHECK-BE-NEXT: mov w0, wzr
2785
+ ; CHECK-BE-NEXT: add x8, x8, x9
2786
+ ; CHECK-BE-NEXT: ld1 { v0.16b }, [x10]
2787
+ ; CHECK-BE-NEXT: adrp x10, .LCPI23_1
2788
+ ; CHECK-BE-NEXT: add x10, x10, :lo12:.LCPI23_1
2789
+ ; CHECK-BE-NEXT: add x9, x1, x9
2790
+ ; CHECK-BE-NEXT: ld1 { v1.16b }, [x10]
2791
+ ; CHECK-BE-NEXT: adrp x10, .LCPI23_2
2792
+ ; CHECK-BE-NEXT: add x10, x10, :lo12:.LCPI23_2
2793
+ ; CHECK-BE-NEXT: ld1 { v2.16b }, [x10]
2794
+ ; CHECK-BE-NEXT: adrp x10, .LCPI23_3
2795
+ ; CHECK-BE-NEXT: add x10, x10, :lo12:.LCPI23_3
2796
+ ; CHECK-BE-NEXT: ld1 { v3.16b }, [x10]
2797
+ ; CHECK-BE-NEXT: .LBB23_1: // %loop
2798
+ ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
2799
+ ; CHECK-BE-NEXT: ld1 { v4.16b }, [x8]
2800
+ ; CHECK-BE-NEXT: subs w3, w3, #1
2801
+ ; CHECK-BE-NEXT: ld1 { v5.16b }, [x9]
2802
+ ; CHECK-BE-NEXT: tbl v6.16b, { v4.16b }, v0.16b
2803
+ ; CHECK-BE-NEXT: tbl v7.16b, { v4.16b }, v1.16b
2804
+ ; CHECK-BE-NEXT: tbl v17.16b, { v5.16b }, v0.16b
2805
+ ; CHECK-BE-NEXT: tbl v18.16b, { v5.16b }, v1.16b
2806
+ ; CHECK-BE-NEXT: tbl v16.16b, { v4.16b }, v3.16b
2807
+ ; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v2.16b
2808
+ ; CHECK-BE-NEXT: tbl v19.16b, { v5.16b }, v3.16b
2809
+ ; CHECK-BE-NEXT: tbl v5.16b, { v5.16b }, v2.16b
2810
+ ; CHECK-BE-NEXT: rev32 v7.16b, v7.16b
2811
+ ; CHECK-BE-NEXT: rev32 v6.16b, v6.16b
2812
+ ; CHECK-BE-NEXT: rev32 v18.16b, v18.16b
2813
+ ; CHECK-BE-NEXT: rev32 v17.16b, v17.16b
2814
+ ; CHECK-BE-NEXT: rev32 v16.16b, v16.16b
2815
+ ; CHECK-BE-NEXT: rev32 v4.16b, v4.16b
2816
+ ; CHECK-BE-NEXT: rev32 v19.16b, v19.16b
2817
+ ; CHECK-BE-NEXT: rev32 v5.16b, v5.16b
2818
+ ; CHECK-BE-NEXT: sabd v7.4s, v7.4s, v18.4s
2819
+ ; CHECK-BE-NEXT: sabd v6.4s, v6.4s, v17.4s
2820
+ ; CHECK-BE-NEXT: saba v7.4s, v4.4s, v5.4s
2821
+ ; CHECK-BE-NEXT: saba v6.4s, v16.4s, v19.4s
2822
+ ; CHECK-BE-NEXT: add v4.4s, v6.4s, v7.4s
2823
+ ; CHECK-BE-NEXT: addv s4, v4.4s
2824
+ ; CHECK-BE-NEXT: fmov w10, s4
2825
+ ; CHECK-BE-NEXT: add w0, w10, w0
2826
+ ; CHECK-BE-NEXT: b.ne .LBB23_1
2827
+ ; CHECK-BE-NEXT: // %bb.2: // %exit
2828
+ ; CHECK-BE-NEXT: ret
2829
+ entry:
2830
+ br label %loop
2831
+
2832
+ loop:
2833
+ %s0 = phi i32 [ 0 , %entry ], [ %op.rdx , %loop ]
2834
+ %j.0261 = phi i32 [ 0 , %entry ], [ %inc , %loop ]
2835
+ %gep.1 = getelementptr inbounds <16 x i8 >, ptr %p1 , i64 %lx
2836
+ %gep.2 = getelementptr inbounds <16 x i8 >, ptr %p2 , i64 %lx
2837
+ %l1 = load <16 x i8 >, ptr %gep.1
2838
+ %z2 = zext <16 x i8 > %l1 to <16 x i32 >
2839
+ %l4 = load <16 x i8 >, ptr %gep.2
2840
+ %z5 = zext <16 x i8 > %l4 to <16 x i32 >
2841
+ %sub = sub nsw <16 x i32 > %z2 , %z5
2842
+ %abs = tail call <16 x i32 > @llvm.abs.v16i32 (<16 x i32 > %sub , i1 true )
2843
+ %red = tail call i32 @llvm.vector.reduce.add.v16i32 (<16 x i32 > %abs )
2844
+ %op.rdx = add i32 %red , %s0
2845
+ %inc = add nuw nsw i32 %j.0261 , 1
2846
+ %exitcond.not = icmp eq i32 %inc , %h
2847
+ br i1 %exitcond.not , label %exit , label %loop
2848
+
2849
+ exit:
2850
+ %s1 = phi i32 [ %op.rdx , %loop ]
2851
+ ret i32 %s1
2852
+ }
2853
+
2854
+ declare <16 x i32 > @llvm.abs.v16i32 (<16 x i32 >, i1 immarg)
2855
+
2856
+ declare i32 @llvm.vector.reduce.add.v16i32 (<16 x i32 >)
0 commit comments