Skip to content

Commit 22719f3

Browse files
committed
[RISCV] Add pattern for PACK/PACKH in common misaligned load case
PACKH is currently only selected for assembling the first two bytes of a misligned load. A fairly complex RV32-only pattern is added for producing PACKH+PACKH+PACK to assemble the result of a misaligned 32-bit load. Another pattern was added that just covers PACKH for shifted offsets 16 and 24, producing a packh and shift to replace two shifts and an 'or'. This slightly improves RV64IZKBK for a 64-bit load, but fails to match for the misaligned 32-bit load because the load of the upper byte is anyext in the SelectionDAG.
1 parent 39b2e35 commit 22719f3

File tree

2 files changed

+62
-42
lines changed

2 files changed

+62
-42
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfoZb.td

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -599,15 +599,26 @@ def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 8)),
599599
def : Pat<(and (or (shl GPR:$rs2, (XLenVT 8)),
600600
(zexti8 (XLenVT GPR:$rs1))), 0xFFFF),
601601
(PACKH GPR:$rs1, GPR:$rs2)>;
602+
def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 24)),
603+
(shl (zexti8 (XLenVT GPR:$rs1)), (XLenVT 16))),
604+
(SLLI (PACKH GPR:$rs1, GPR:$rs2), (XLenVT 16))>;
602605

603606
def : Pat<(binop_allhusers<or> (shl GPR:$rs2, (XLenVT 8)),
604607
(zexti8 (XLenVT GPR:$rs1))),
605608
(PACKH GPR:$rs1, GPR:$rs2)>;
606609
} // Predicates = [HasStdExtZbkb]
607610

608-
let Predicates = [HasStdExtZbkb, IsRV32] in
611+
let Predicates = [HasStdExtZbkb, IsRV32] in {
609612
def : Pat<(i32 (or (zexti16 (i32 GPR:$rs1)), (shl GPR:$rs2, (i32 16)))),
610613
(PACK GPR:$rs1, GPR:$rs2)>;
614+
def : Pat<(or (or
615+
(shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)),
616+
(shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))),
617+
(or
618+
(shl (zexti8 (XLenVT GPR:$op0rs2)), (XLenVT 8)),
619+
(zexti8 (XLenVT GPR:$op0rs1)))),
620+
(PACK (PACKH GPR:$op0rs1, GPR:$op0rs2), (PACKH GPR:$op1rs1, GPR:$op1rs2))>;
621+
}
611622

612623
let Predicates = [HasStdExtZbkb, IsRV64] in {
613624
def : Pat<(i64 (or (zexti32 (i64 GPR:$rs1)), (shl GPR:$rs2, (i64 32)))),

llvm/test/CodeGen/RISCV/unaligned-load-store.ll

Lines changed: 50 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ define i24 @load_i24(ptr %p) {
8282
ret i24 %res
8383
}
8484

85+
; FIXME: In the RV64IZBKB case, the second packh isn't selected because the
86+
; upper byte is an anyext load in the SDag.
8587
define i32 @load_i32(ptr %p) {
8688
; SLOWBASE-LABEL: load_i32:
8789
; SLOWBASE: # %bb.0:
@@ -97,18 +99,29 @@ define i32 @load_i32(ptr %p) {
9799
; SLOWBASE-NEXT: or a0, a0, a1
98100
; SLOWBASE-NEXT: ret
99101
;
100-
; SLOWZBKB-LABEL: load_i32:
101-
; SLOWZBKB: # %bb.0:
102-
; SLOWZBKB-NEXT: lbu a1, 1(a0)
103-
; SLOWZBKB-NEXT: lbu a2, 0(a0)
104-
; SLOWZBKB-NEXT: lbu a3, 2(a0)
105-
; SLOWZBKB-NEXT: lbu a0, 3(a0)
106-
; SLOWZBKB-NEXT: packh a1, a2, a1
107-
; SLOWZBKB-NEXT: slli a3, a3, 16
108-
; SLOWZBKB-NEXT: slli a0, a0, 24
109-
; SLOWZBKB-NEXT: or a0, a0, a3
110-
; SLOWZBKB-NEXT: or a0, a0, a1
111-
; SLOWZBKB-NEXT: ret
102+
; RV32IZBKB-LABEL: load_i32:
103+
; RV32IZBKB: # %bb.0:
104+
; RV32IZBKB-NEXT: lbu a1, 3(a0)
105+
; RV32IZBKB-NEXT: lbu a2, 2(a0)
106+
; RV32IZBKB-NEXT: lbu a3, 1(a0)
107+
; RV32IZBKB-NEXT: lbu a0, 0(a0)
108+
; RV32IZBKB-NEXT: packh a1, a2, a1
109+
; RV32IZBKB-NEXT: packh a0, a0, a3
110+
; RV32IZBKB-NEXT: pack a0, a0, a1
111+
; RV32IZBKB-NEXT: ret
112+
;
113+
; RV64IZBKB-LABEL: load_i32:
114+
; RV64IZBKB: # %bb.0:
115+
; RV64IZBKB-NEXT: lbu a1, 1(a0)
116+
; RV64IZBKB-NEXT: lbu a2, 0(a0)
117+
; RV64IZBKB-NEXT: lbu a3, 2(a0)
118+
; RV64IZBKB-NEXT: lbu a0, 3(a0)
119+
; RV64IZBKB-NEXT: packh a1, a2, a1
120+
; RV64IZBKB-NEXT: slli a3, a3, 16
121+
; RV64IZBKB-NEXT: slli a0, a0, 24
122+
; RV64IZBKB-NEXT: or a0, a0, a3
123+
; RV64IZBKB-NEXT: or a0, a0, a1
124+
; RV64IZBKB-NEXT: ret
112125
;
113126
; FAST-LABEL: load_i32:
114127
; FAST: # %bb.0:
@@ -172,45 +185,39 @@ define i64 @load_i64(ptr %p) {
172185
;
173186
; RV32IZBKB-LABEL: load_i64:
174187
; RV32IZBKB: # %bb.0:
175-
; RV32IZBKB-NEXT: lbu a1, 1(a0)
176-
; RV32IZBKB-NEXT: lbu a2, 0(a0)
177-
; RV32IZBKB-NEXT: lbu a3, 2(a0)
178-
; RV32IZBKB-NEXT: lbu a4, 3(a0)
188+
; RV32IZBKB-NEXT: lbu a1, 3(a0)
189+
; RV32IZBKB-NEXT: lbu a2, 2(a0)
179190
; RV32IZBKB-NEXT: packh a1, a2, a1
180-
; RV32IZBKB-NEXT: slli a3, a3, 16
181-
; RV32IZBKB-NEXT: slli a4, a4, 24
182-
; RV32IZBKB-NEXT: or a3, a4, a3
183-
; RV32IZBKB-NEXT: lbu a2, 5(a0)
184-
; RV32IZBKB-NEXT: lbu a4, 4(a0)
191+
; RV32IZBKB-NEXT: lbu a2, 1(a0)
192+
; RV32IZBKB-NEXT: lbu a3, 0(a0)
193+
; RV32IZBKB-NEXT: lbu a4, 7(a0)
185194
; RV32IZBKB-NEXT: lbu a5, 6(a0)
186-
; RV32IZBKB-NEXT: lbu a6, 7(a0)
187-
; RV32IZBKB-NEXT: or a0, a3, a1
188-
; RV32IZBKB-NEXT: packh a1, a4, a2
189-
; RV32IZBKB-NEXT: slli a5, a5, 16
190-
; RV32IZBKB-NEXT: slli a6, a6, 24
191-
; RV32IZBKB-NEXT: or a2, a6, a5
192-
; RV32IZBKB-NEXT: or a1, a2, a1
195+
; RV32IZBKB-NEXT: lbu a6, 5(a0)
196+
; RV32IZBKB-NEXT: lbu a7, 4(a0)
197+
; RV32IZBKB-NEXT: packh a0, a3, a2
198+
; RV32IZBKB-NEXT: pack a0, a0, a1
199+
; RV32IZBKB-NEXT: packh a1, a5, a4
200+
; RV32IZBKB-NEXT: packh a2, a7, a6
201+
; RV32IZBKB-NEXT: pack a1, a2, a1
193202
; RV32IZBKB-NEXT: ret
194203
;
195204
; RV64IZBKB-LABEL: load_i64:
196205
; RV64IZBKB: # %bb.0:
197206
; RV64IZBKB-NEXT: lbu a1, 5(a0)
198207
; RV64IZBKB-NEXT: lbu a2, 4(a0)
199-
; RV64IZBKB-NEXT: lbu a3, 6(a0)
200-
; RV64IZBKB-NEXT: lbu a4, 7(a0)
208+
; RV64IZBKB-NEXT: lbu a3, 7(a0)
209+
; RV64IZBKB-NEXT: lbu a4, 6(a0)
201210
; RV64IZBKB-NEXT: packh a1, a2, a1
202-
; RV64IZBKB-NEXT: slli a3, a3, 16
203-
; RV64IZBKB-NEXT: slli a4, a4, 24
204-
; RV64IZBKB-NEXT: or a3, a4, a3
205-
; RV64IZBKB-NEXT: lbu a2, 1(a0)
211+
; RV64IZBKB-NEXT: packh a2, a4, a3
212+
; RV64IZBKB-NEXT: lbu a3, 1(a0)
206213
; RV64IZBKB-NEXT: lbu a4, 0(a0)
207-
; RV64IZBKB-NEXT: lbu a5, 2(a0)
208-
; RV64IZBKB-NEXT: lbu a0, 3(a0)
209-
; RV64IZBKB-NEXT: or a1, a3, a1
210-
; RV64IZBKB-NEXT: packh a2, a4, a2
211-
; RV64IZBKB-NEXT: slli a5, a5, 16
212-
; RV64IZBKB-NEXT: slli a0, a0, 24
213-
; RV64IZBKB-NEXT: or a0, a0, a5
214+
; RV64IZBKB-NEXT: lbu a5, 3(a0)
215+
; RV64IZBKB-NEXT: lbu a0, 2(a0)
216+
; RV64IZBKB-NEXT: slli a2, a2, 16
217+
; RV64IZBKB-NEXT: or a1, a2, a1
218+
; RV64IZBKB-NEXT: packh a2, a4, a3
219+
; RV64IZBKB-NEXT: packh a0, a0, a5
220+
; RV64IZBKB-NEXT: slli a0, a0, 16
214221
; RV64IZBKB-NEXT: or a0, a0, a2
215222
; RV64IZBKB-NEXT: pack a0, a0, a1
216223
; RV64IZBKB-NEXT: ret
@@ -574,3 +581,5 @@ define void @store_large_constant(ptr %x) {
574581
store i64 18364758544493064720, ptr %x, align 1
575582
ret void
576583
}
584+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
585+
; SLOWZBKB: {{.*}}

0 commit comments

Comments
 (0)