@@ -1886,10 +1886,14 @@ multiclass PRMT<ValueType T, RegisterClass RC> {
1886
1886
}
1887
1887
1888
1888
let hasSideEffects = false in {
1889
- defm BFE_S32 : BFE<"bfe.s32", i32, Int32Regs>;
1889
+ // order is somewhat important here. signed/unsigned variants match
1890
+ // the same patterns, so the first one wins. Having unsigned byte extraction
1891
+ // has the benefit of always having zero in unused bits, which makes some
1892
+ // optimizations easier (e.g. no need to mask them).
1890
1893
defm BFE_U32 : BFE<"bfe.u32", i32, Int32Regs>;
1891
- defm BFE_S64 : BFE<"bfe.s64 ", i64, Int64Regs >;
1894
+ defm BFE_S32 : BFE<"bfe.s32 ", i32, Int32Regs >;
1892
1895
defm BFE_U64 : BFE<"bfe.u64", i64, Int64Regs>;
1896
+ defm BFE_S64 : BFE<"bfe.s64", i64, Int64Regs>;
1893
1897
1894
1898
defm BFI_B32 : BFI<"bfi.b32", i32, Int32Regs, i32imm>;
1895
1899
defm BFI_B64 : BFI<"bfi.b64", i64, Int64Regs, i64imm>;
@@ -2259,27 +2263,69 @@ def : Pat<(setueq Int1Regs:$a, Int1Regs:$b),
2259
2263
(NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
2260
2264
2261
2265
// comparisons of i8 extracted with BFE as i32
2262
- def: Pat<(setgt (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)),
2263
- (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpGT)>;
2264
- def: Pat<(setge (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)),
2265
- (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpGE)>;
2266
- def: Pat<(setlt (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)),
2267
- (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpLT)>;
2268
- def: Pat<(setle (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)),
2269
- (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpLE)>;
2270
-
2271
- def: Pat<(setugt (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2272
- (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpHI)>;
2273
- def: Pat<(setuge (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2274
- (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpHS)>;
2275
- def: Pat<(setult (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2276
- (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLO)>;
2277
- def: Pat<(setule (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2278
- (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLS)>;
2279
- def: Pat<(seteq (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2280
- (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpEQ)>;
2281
- def: Pat<(setne (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2282
- (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpNE)>;
2266
+ // It's faster to do comparison directly on i32 extracted by BFE,
2267
+ // instead of the long conversion and sign extending.
2268
+ def: Pat<(setgt (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
2269
+ (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
2270
+ (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGT)>;
2271
+ def: Pat<(setgt (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
2272
+ (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
2273
+ (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGT)>;
2274
+ def: Pat<(setge (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
2275
+ (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
2276
+ (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGE)>;
2277
+ def: Pat<(setge (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
2278
+ (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
2279
+ (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGE)>;
2280
+ def: Pat<(setlt (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
2281
+ (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
2282
+ (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLT)>;
2283
+ def: Pat<(setlt (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
2284
+ (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
2285
+ (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLT)>;
2286
+ def: Pat<(setle (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
2287
+ (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
2288
+ (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLE)>;
2289
+ def: Pat<(setle (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
2290
+ (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
2291
+ (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLE)>;
2292
+
2293
+ def: Pat<(setugt (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2294
+ (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2295
+ (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHI)>;
2296
+ def: Pat<(setugt (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2297
+ (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2298
+ (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHI)>;
2299
+ def: Pat<(setuge (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2300
+ (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2301
+ (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHS)>;
2302
+ def: Pat<(setuge (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2303
+ (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2304
+ (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHS)>;
2305
+ def: Pat<(setult (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2306
+ (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2307
+ (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLO)>;
2308
+ def: Pat<(setult (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2309
+ (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2310
+ (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLO)>;
2311
+ def: Pat<(setule (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2312
+ (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2313
+ (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLS)>;
2314
+ def: Pat<(setule (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2315
+ (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2316
+ (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLS)>;
2317
+ def: Pat<(seteq (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2318
+ (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2319
+ (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpEQ)>;
2320
+ def: Pat<(seteq (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2321
+ (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2322
+ (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpEQ)>;
2323
+ def: Pat<(setne (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2324
+ (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2325
+ (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpNE)>;
2326
+ def: Pat<(setne (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2327
+ (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2328
+ (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpNE)>;
2283
2329
2284
2330
// i1 compare -> i32
2285
2331
def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)),
0 commit comments