Skip to content

Commit 8799d71

Browse files
authored
[NVPTX] Fix the error in a pattern match in v4i8 comparisons. (#81308)
The replacement should've had BFE() as the arguments for the comparison, not the source register. While at that, tighten the patterns a bit, and expand them to cover variants with immediate arguments. Also change the default lowering of bfe() to use unsigned variant, so the value of the upper bits is predictable.
1 parent 73159a9 commit 8799d71

File tree

2 files changed

+260
-198
lines changed

2 files changed

+260
-198
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 69 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1886,10 +1886,14 @@ multiclass PRMT<ValueType T, RegisterClass RC> {
18861886
}
18871887

18881888
let hasSideEffects = false in {
1889-
defm BFE_S32 : BFE<"bfe.s32", i32, Int32Regs>;
1889+
// order is somewhat important here. signed/unsigned variants match
1890+
// the same patterns, so the first one wins. Having unsigned byte extraction
1891+
// has the benefit of always having zero in unused bits, which makes some
1892+
// optimizations easier (e.g. no need to mask them).
18901893
defm BFE_U32 : BFE<"bfe.u32", i32, Int32Regs>;
1891-
defm BFE_S64 : BFE<"bfe.s64", i64, Int64Regs>;
1894+
defm BFE_S32 : BFE<"bfe.s32", i32, Int32Regs>;
18921895
defm BFE_U64 : BFE<"bfe.u64", i64, Int64Regs>;
1896+
defm BFE_S64 : BFE<"bfe.s64", i64, Int64Regs>;
18931897

18941898
defm BFI_B32 : BFI<"bfi.b32", i32, Int32Regs, i32imm>;
18951899
defm BFI_B64 : BFI<"bfi.b64", i64, Int64Regs, i64imm>;
@@ -2259,27 +2263,69 @@ def : Pat<(setueq Int1Regs:$a, Int1Regs:$b),
22592263
(NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
22602264

22612265
// comparisons of i8 extracted with BFE as i32
2262-
def: Pat<(setgt (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)),
2263-
(SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpGT)>;
2264-
def: Pat<(setge (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)),
2265-
(SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpGE)>;
2266-
def: Pat<(setlt (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)),
2267-
(SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpLT)>;
2268-
def: Pat<(setle (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)),
2269-
(SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpLE)>;
2270-
2271-
def: Pat<(setugt (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2272-
(SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpHI)>;
2273-
def: Pat<(setuge (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2274-
(SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpHS)>;
2275-
def: Pat<(setult (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2276-
(SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLO)>;
2277-
def: Pat<(setule (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2278-
(SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLS)>;
2279-
def: Pat<(seteq (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2280-
(SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpEQ)>;
2281-
def: Pat<(setne (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2282-
(SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpNE)>;
2266+
// It's faster to do comparison directly on i32 extracted by BFE,
2267+
// instead of the long conversion and sign extending.
2268+
def: Pat<(setgt (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
2269+
(i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
2270+
(SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGT)>;
2271+
def: Pat<(setgt (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
2272+
(i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
2273+
(SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGT)>;
2274+
def: Pat<(setge (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
2275+
(i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
2276+
(SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGE)>;
2277+
def: Pat<(setge (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
2278+
(i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
2279+
(SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGE)>;
2280+
def: Pat<(setlt (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
2281+
(i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
2282+
(SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLT)>;
2283+
def: Pat<(setlt (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
2284+
(i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
2285+
(SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLT)>;
2286+
def: Pat<(setle (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
2287+
(i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
2288+
(SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLE)>;
2289+
def: Pat<(setle (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
2290+
(i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
2291+
(SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLE)>;
2292+
2293+
def: Pat<(setugt (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2294+
(i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2295+
(SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHI)>;
2296+
def: Pat<(setugt (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2297+
(i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2298+
(SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHI)>;
2299+
def: Pat<(setuge (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2300+
(i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2301+
(SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHS)>;
2302+
def: Pat<(setuge (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2303+
(i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2304+
(SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHS)>;
2305+
def: Pat<(setult (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2306+
(i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2307+
(SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLO)>;
2308+
def: Pat<(setult (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2309+
(i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2310+
(SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLO)>;
2311+
def: Pat<(setule (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2312+
(i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2313+
(SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLS)>;
2314+
def: Pat<(setule (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2315+
(i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2316+
(SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLS)>;
2317+
def: Pat<(seteq (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2318+
(i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2319+
(SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpEQ)>;
2320+
def: Pat<(seteq (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2321+
(i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2322+
(SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpEQ)>;
2323+
def: Pat<(setne (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2324+
(i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2325+
(SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpNE)>;
2326+
def: Pat<(setne (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2327+
(i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2328+
(SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpNE)>;
22832329

22842330
// i1 compare -> i32
22852331
def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)),

0 commit comments

Comments
 (0)