@@ -11436,6 +11436,113 @@ defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11436
11436
defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11437
11437
avx512vl_i64_info>, VEX_W;
11438
11438
11439
+ // Patterns to use VPTERNLOG for vXi16/vXi8 vectors.
11440
+ let Predicates = [HasVLX] in {
11441
+ def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
11442
+ (i8 timm:$src4))),
11443
+ (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
11444
+ timm:$src4)>;
11445
+ def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
11446
+ (loadv16i8 addr:$src3), (i8 timm:$src4))),
11447
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11448
+ timm:$src4)>;
11449
+ def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2,
11450
+ VR128X:$src1, (i8 timm:$src4))),
11451
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11452
+ (VPTERNLOG321_imm8 timm:$src4))>;
11453
+ def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3),
11454
+ VR128X:$src2, (i8 timm:$src4))),
11455
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11456
+ (VPTERNLOG132_imm8 timm:$src4))>;
11457
+
11458
+ def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
11459
+ (i8 timm:$src4))),
11460
+ (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
11461
+ timm:$src4)>;
11462
+ def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
11463
+ (loadv8i16 addr:$src3), (i8 timm:$src4))),
11464
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11465
+ timm:$src4)>;
11466
+ def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2,
11467
+ VR128X:$src1, (i8 timm:$src4))),
11468
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11469
+ (VPTERNLOG321_imm8 timm:$src4))>;
11470
+ def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3),
11471
+ VR128X:$src2, (i8 timm:$src4))),
11472
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11473
+ (VPTERNLOG132_imm8 timm:$src4))>;
11474
+
11475
+ def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
11476
+ (i8 timm:$src4))),
11477
+ (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
11478
+ timm:$src4)>;
11479
+ def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
11480
+ (loadv32i8 addr:$src3), (i8 timm:$src4))),
11481
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11482
+ timm:$src4)>;
11483
+ def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2,
11484
+ VR256X:$src1, (i8 timm:$src4))),
11485
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11486
+ (VPTERNLOG321_imm8 timm:$src4))>;
11487
+ def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3),
11488
+ VR256X:$src2, (i8 timm:$src4))),
11489
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11490
+ (VPTERNLOG132_imm8 timm:$src4))>;
11491
+
11492
+ def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
11493
+ (i8 timm:$src4))),
11494
+ (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
11495
+ timm:$src4)>;
11496
+ def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
11497
+ (loadv16i16 addr:$src3), (i8 timm:$src4))),
11498
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11499
+ timm:$src4)>;
11500
+ def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2,
11501
+ VR256X:$src1, (i8 timm:$src4))),
11502
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11503
+ (VPTERNLOG321_imm8 timm:$src4))>;
11504
+ def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3),
11505
+ VR256X:$src2, (i8 timm:$src4))),
11506
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11507
+ (VPTERNLOG132_imm8 timm:$src4))>;
11508
+ }
11509
+
11510
+ let Predicates = [HasAVX512] in {
11511
+ def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
11512
+ (i8 timm:$src4))),
11513
+ (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
11514
+ timm:$src4)>;
11515
+ def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
11516
+ (loadv64i8 addr:$src3), (i8 timm:$src4))),
11517
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11518
+ timm:$src4)>;
11519
+ def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2,
11520
+ VR512:$src1, (i8 timm:$src4))),
11521
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11522
+ (VPTERNLOG321_imm8 timm:$src4))>;
11523
+ def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3),
11524
+ VR512:$src2, (i8 timm:$src4))),
11525
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11526
+ (VPTERNLOG132_imm8 timm:$src4))>;
11527
+
11528
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
11529
+ (i8 timm:$src4))),
11530
+ (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
11531
+ timm:$src4)>;
11532
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
11533
+ (loadv32i16 addr:$src3), (i8 timm:$src4))),
11534
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11535
+ timm:$src4)>;
11536
+ def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2,
11537
+ VR512:$src1, (i8 timm:$src4))),
11538
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11539
+ (VPTERNLOG321_imm8 timm:$src4))>;
11540
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3),
11541
+ VR512:$src2, (i8 timm:$src4))),
11542
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11543
+ (VPTERNLOG132_imm8 timm:$src4))>;
11544
+ }
11545
+
11439
11546
// Patterns to implement vnot using vpternlog instead of creating all ones
11440
11547
// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11441
11548
// so that the result is only dependent on src0. But we use the same source
@@ -11533,49 +11640,6 @@ let Predicates = [HasVLX] in {
11533
11640
(VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11534
11641
}
11535
11642
11536
- let Predicates = [HasVLX] in {
11537
- def : Pat<(v16i8 (or (and VR128X:$src1, VR128X:$src2),
11538
- (X86andnp VR128X:$src1, VR128X:$src3))),
11539
- (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
11540
- def : Pat<(v8i16 (or (and VR128X:$src1, VR128X:$src2),
11541
- (X86andnp VR128X:$src1, VR128X:$src3))),
11542
- (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
11543
- def : Pat<(v4i32 (or (and VR128X:$src1, VR128X:$src2),
11544
- (X86andnp VR128X:$src1, VR128X:$src3))),
11545
- (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
11546
- def : Pat<(v2i64 (or (and VR128X:$src1, VR128X:$src2),
11547
- (X86andnp VR128X:$src1, VR128X:$src3))),
11548
- (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
11549
-
11550
- def : Pat<(v32i8 (or (and VR256X:$src1, VR256X:$src2),
11551
- (X86andnp VR256X:$src1, VR256X:$src3))),
11552
- (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
11553
- def : Pat<(v16i16 (or (and VR256X:$src1, VR256X:$src2),
11554
- (X86andnp VR256X:$src1, VR256X:$src3))),
11555
- (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
11556
- def : Pat<(v8i32 (or (and VR256X:$src1, VR256X:$src2),
11557
- (X86andnp VR256X:$src1, VR256X:$src3))),
11558
- (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
11559
- def : Pat<(v4i64 (or (and VR256X:$src1, VR256X:$src2),
11560
- (X86andnp VR256X:$src1, VR256X:$src3))),
11561
- (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
11562
- }
11563
-
11564
- let Predicates = [HasAVX512] in {
11565
- def : Pat<(v64i8 (or (and VR512:$src1, VR512:$src2),
11566
- (X86andnp VR512:$src1, VR512:$src3))),
11567
- (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
11568
- def : Pat<(v32i16 (or (and VR512:$src1, VR512:$src2),
11569
- (X86andnp VR512:$src1, VR512:$src3))),
11570
- (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
11571
- def : Pat<(v16i32 (or (and VR512:$src1, VR512:$src2),
11572
- (X86andnp VR512:$src1, VR512:$src3))),
11573
- (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
11574
- def : Pat<(v8i64 (or (and VR512:$src1, VR512:$src2),
11575
- (X86andnp VR512:$src1, VR512:$src3))),
11576
- (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
11577
- }
11578
-
11579
11643
//===----------------------------------------------------------------------===//
11580
11644
// AVX-512 - FixupImm
11581
11645
//===----------------------------------------------------------------------===//
0 commit comments