Skip to content

Commit de16a05

Browse files
author
sgundapa
authored
[Hexagon] Fix zero extension of bit predicates with vtrunehb (#81772)
vector extension from v4i1 to v4i8 generates an incorrect word. This patch uses a vtrunehb for truncation to fix the bug.
1 parent ea06384 commit de16a05

File tree

2 files changed

+114
-2
lines changed

2 files changed

+114
-2
lines changed

llvm/lib/Target/Hexagon/HexagonPatterns.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -525,11 +525,11 @@ def Vsplatpi: OutPatFrag<(ops node:$V),
525525
(Combinew (A2_tfrsi $V), (A2_tfrsi $V))>;
526526

527527
def: Pat<(v2i16 (azext V2I1:$Pu)),
528-
(A2_andir (LoReg (C2_mask V2I1:$Pu)), (i32 0x00010001))>;
528+
(A2_andir (S2_vtrunehb (C2_mask V2I1:$Pu)), (i32 0x00010001))>;
529529
def: Pat<(v2i32 (azext V2I1:$Pu)),
530530
(A2_andp (C2_mask V2I1:$Pu), (A2_combineii (i32 1), (i32 1)))>;
531531
def: Pat<(v4i8 (azext V4I1:$Pu)),
532-
(A2_andir (LoReg (C2_mask V4I1:$Pu)), (i32 0x01010101))>;
532+
(A2_andir (S2_vtrunehb (C2_mask V4I1:$Pu)), (i32 0x01010101))>;
533533
def: Pat<(v4i16 (azext V4I1:$Pu)),
534534
(A2_andp (C2_mask V4I1:$Pu), (Vsplatpi (i32 0x00010001)))>;
535535
def: Pat<(v8i8 (azext V8I1:$Pu)),
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
; RUN: llc -march=hexagon < %s | FileCheck %s
2+
3+
; Check that when we extract a byte from the result of a mask from predicate
4+
; that the results of the mask all fit in the same word.
5+
; CHECK: [[PRED:p[0-9]+]] = vcmpb.gtu(r{{.*}},#0)
6+
; CHECK: [[REG1:r[0-9]*:[0-9]*]] = mask([[PRED]])
7+
; CHECK: [[REG2:r[0-9]*]] = vtrunehb([[REG1]])
8+
; CHECK: {{r[0-9]*}} = extractu([[REG2]],#1,#8)
9+
10+
target triple = "hexagon"
11+
12+
%struct.pluto = type { [12 x %struct.pluto.0], [4 x %struct.pluto.0], [2 x %struct.pluto.0], [4 x %struct.pluto.0], [6 x %struct.pluto.0], [2 x [7 x %struct.pluto.0]], [4 x %struct.pluto.0], [3 x [4 x %struct.pluto.0]], [3 x %struct.pluto.0], [3 x %struct.pluto.0] }
13+
%struct.pluto.0 = type { i8, i8 }
14+
15+
@global = internal unnamed_addr constant [3 x [4 x [2 x i8]]] [[4 x [2 x i8]] [[2 x i8] c"\FAV", [2 x i8] c"\EF_", [2 x i8] c"\FA=", [2 x i8] c"\09-"], [4 x [2 x i8]] [[2 x i8] c"\06E", [2 x i8] c"\F3Z", [2 x i8] c"\004", [2 x i8] c"\08+"], [4 x [2 x i8]] [[2 x i8] c"\FA]", [2 x i8] c"\F2X", [2 x i8] c"\FA,", [2 x i8] c"\047"]], align 8
16+
17+
; Function Attrs: nofree noinline norecurse nosync nounwind memory(write)
18+
define dso_local void @eggs(ptr nocapture %arg, ptr nocapture readnone %arg1, i32 %arg2, i32 %arg3, i32 %arg4) local_unnamed_addr #0 {
19+
bb:
20+
%icmp = icmp sgt i32 %arg3, 0
21+
%select = select i1 %icmp, i32 %arg3, i32 0
22+
br i1 false, label %bb33, label %bb5
23+
24+
bb5: ; preds = %bb
25+
%insertelement = insertelement <4 x i32> poison, i32 %select, i32 0
26+
%shufflevector = shufflevector <4 x i32> %insertelement, <4 x i32> poison, <4 x i32> zeroinitializer
27+
br label %bb6
28+
29+
bb6: ; preds = %bb6, %bb5
30+
%phi = phi i32 [ 0, %bb5 ], [ %add29, %bb6 ]
31+
%insertelement7 = insertelement <4 x i32> poison, i32 %phi, i32 0
32+
%shufflevector8 = shufflevector <4 x i32> %insertelement7, <4 x i32> poison, <4 x i32> zeroinitializer
33+
%add = add <4 x i32> %shufflevector8, <i32 0, i32 1, i32 2, i32 3>
34+
%add9 = add i32 %phi, 0
35+
%getelementptr = getelementptr inbounds [3 x [4 x [2 x i8]]], ptr @global, i32 0, i32 %arg2, i32 %add9, i32 0
36+
%getelementptr10 = getelementptr inbounds i8, ptr %getelementptr, i32 0
37+
%bitcast = bitcast ptr %getelementptr10 to ptr
38+
%load = load <8 x i8>, ptr %bitcast, align 1
39+
%shufflevector11 = shufflevector <8 x i8> %load, <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
40+
%shufflevector12 = shufflevector <8 x i8> %load, <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
41+
%getelementptr13 = getelementptr [3 x [4 x [2 x i8]]], ptr @global, i32 0, i32 %arg2, i32 %add9, i32 1
42+
%sext = sext <4 x i8> %shufflevector11 to <4 x i32>
43+
%mul = mul nsw <4 x i32> %shufflevector, %sext
44+
%ashr = ashr <4 x i32> %mul, <i32 4, i32 4, i32 4, i32 4>
45+
%sext14 = sext <4 x i8> %shufflevector12 to <4 x i32>
46+
%add15 = add nsw <4 x i32> %ashr, %sext14
47+
%icmp16 = icmp sgt <4 x i32> %add15, <i32 1, i32 1, i32 1, i32 1>
48+
%select17 = select <4 x i1> %icmp16, <4 x i32> %add15, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
49+
%icmp18 = icmp slt <4 x i32> %select17, <i32 126, i32 126, i32 126, i32 126>
50+
%select19 = select <4 x i1> %icmp18, <4 x i32> %select17, <4 x i32> <i32 126, i32 126, i32 126, i32 126>
51+
%icmp20 = icmp sgt <4 x i32> %select19, <i32 63, i32 63, i32 63, i32 63>
52+
%trunc = trunc <4 x i32> %select19 to <4 x i8>
53+
%add21 = add nsw <4 x i8> %trunc, <i8 -64, i8 -64, i8 -64, i8 -64>
54+
%getelementptr22 = getelementptr inbounds %struct.pluto, ptr %arg, i32 0, i32 1, i32 %add9, i32 0
55+
%sub = sub nsw <4 x i8> <i8 63, i8 63, i8 63, i8 63>, %trunc
56+
%select23 = select <4 x i1> %icmp20, <4 x i8> %add21, <4 x i8> %sub
57+
%getelementptr24 = getelementptr inbounds %struct.pluto, ptr %arg, i32 0, i32 1, i32 %add9, i32 1
58+
%zext = zext <4 x i1> %icmp20 to <4 x i8>
59+
%getelementptr25 = getelementptr inbounds i8, ptr %getelementptr24, i32 -1
60+
%bitcast26 = bitcast ptr %getelementptr25 to ptr
61+
%shufflevector27 = shufflevector <4 x i8> %select23, <4 x i8> %zext, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
62+
%shufflevector28 = shufflevector <8 x i8> %shufflevector27, <8 x i8> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
63+
store <8 x i8> %shufflevector28, ptr %bitcast26, align 1
64+
%add29 = add nuw i32 %phi, 4
65+
%icmp30 = icmp eq i32 %add29, 4
66+
br i1 %icmp30, label %bb31, label %bb6
67+
68+
bb31: ; preds = %bb6
69+
%icmp32 = icmp eq i32 4, 4
70+
br i1 %icmp32, label %bb61, label %bb33
71+
72+
bb33: ; preds = %bb31, %bb
73+
%phi34 = phi i32 [ 4, %bb31 ], [ 0, %bb ]
74+
br label %bb35
75+
76+
bb35: ; preds = %bb35, %bb33
77+
%phi36 = phi i32 [ %phi34, %bb33 ], [ %add58, %bb35 ]
78+
%getelementptr37 = getelementptr inbounds [3 x [4 x [2 x i8]]], ptr @global, i32 0, i32 %arg2, i32 %phi36, i32 0
79+
%load38 = load i8, ptr %getelementptr37, align 2
80+
%getelementptr39 = getelementptr [3 x [4 x [2 x i8]]], ptr @global, i32 0, i32 %arg2, i32 %phi36, i32 1
81+
%load40 = load i8, ptr %getelementptr39, align 1
82+
%sext41 = sext i8 %load38 to i32
83+
%mul42 = mul nsw i32 %select, %sext41
84+
%ashr43 = ashr i32 %mul42, 4
85+
%sext44 = sext i8 %load40 to i32
86+
%add45 = add nsw i32 %ashr43, %sext44
87+
%icmp46 = icmp sgt i32 %add45, 1
88+
%select47 = select i1 %icmp46, i32 %add45, i32 1
89+
%icmp48 = icmp slt i32 %select47, 126
90+
%select49 = select i1 %icmp48, i32 %select47, i32 126
91+
%icmp50 = icmp sgt i32 %select49, 63
92+
%trunc51 = trunc i32 %select49 to i8
93+
%add52 = add nsw i8 %trunc51, -64
94+
%getelementptr53 = getelementptr inbounds %struct.pluto, ptr %arg, i32 0, i32 1, i32 %phi36, i32 0
95+
%sub54 = sub nsw i8 63, %trunc51
96+
%select55 = select i1 %icmp50, i8 %add52, i8 %sub54
97+
store i8 %select55, ptr %getelementptr53, align 1
98+
%getelementptr56 = getelementptr inbounds %struct.pluto, ptr %arg, i32 0, i32 1, i32 %phi36, i32 1
99+
%zext57 = zext i1 %icmp50 to i8
100+
store i8 %zext57, ptr %getelementptr56, align 1
101+
%add58 = add nuw nsw i32 %phi36, 1
102+
%icmp59 = icmp eq i32 %add58, 4
103+
br i1 %icmp59, label %bb60, label %bb35
104+
105+
bb60: ; preds = %bb35
106+
br label %bb61
107+
108+
bb61: ; preds = %bb60, %bb31
109+
ret void
110+
}
111+
112+
attributes #0 = { nofree noinline norecurse nosync nounwind memory(write) "target-cpu"="hexagonv73" "target-features"="+hvx-length64b,+hvxv73,+v73" }

0 commit comments

Comments
 (0)