Skip to content

Commit 329b8cd

Browse files
nemanjailei137
authored andcommitted
[PowerPC] Improve code gen for vector add
Improve codegen for vectors modulo additions. Reviewed By: nemanjai Differential Revision: https://reviews.llvm.org/D154447
1 parent 22a32f7 commit 329b8cd

File tree

2 files changed

+24
-7
lines changed

2 files changed

+24
-7
lines changed

llvm/lib/Target/PowerPC/PPCInstrAltivec.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,6 +1161,13 @@ def : Pat<(v8i16 (srl (sub v8i16:$vA, (v8i16 (bitconvert(vnot v4i32:$vB)))),
11611161
def : Pat<(v16i8 (srl (sub v16i8:$vA, (v16i8 (bitconvert(vnot v4i32:$vB)))),
11621162
(v16i8 (immEQOneV)))), (v16i8 (VAVGUB $vA, $vB))>;
11631163

1164+
def : Pat<(v16i8 (shl v16i8:$vA, (v16i8 (immEQOneV)))),
1165+
(v16i8 (VADDUBM $vA, $vA))>;
1166+
def : Pat<(v8i16 (shl v8i16:$vA, (v8i16 (immEQOneV)))),
1167+
(v8i16 (VADDUHM $vA, $vA))>;
1168+
def : Pat<(v4i32 (shl v4i32:$vA, (v4i32 (immEQOneV)))),
1169+
(v4i32 (VADDUWM $vA, $vA))>;
1170+
11641171
} // end HasAltivec
11651172

11661173
// [PO VRT VRA VRB 1 PS XO], "_o" means CR6 is set.
Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
22
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3-
; RUN: -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
3+
; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
44
; RUN: FileCheck %s
55

66
define dso_local <16 x i8> @x2(<16 x i8> noundef %x) {
77
; CHECK-LABEL: x2:
88
; CHECK: # %bb.0: # %entry
9-
; CHECK-NEXT: vspltisb v3, 1
10-
; CHECK-NEXT: vslb v2, v2, v3
9+
; CHECK-NEXT: vaddubm v2, v2, v2
1110
; CHECK-NEXT: blr
1211
entry:
1312
%add = shl <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -17,8 +16,7 @@ entry:
1716
define dso_local <8 x i16> @x2h(<8 x i16> noundef %x) {
1817
; CHECK-LABEL: x2h:
1918
; CHECK: # %bb.0: # %entry
20-
; CHECK-NEXT: vspltish v3, 1
21-
; CHECK-NEXT: vslh v2, v2, v3
19+
; CHECK-NEXT: vadduhm v2, v2, v2
2220
; CHECK-NEXT: blr
2321
entry:
2422
%add = shl <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -28,10 +26,22 @@ entry:
2826
define dso_local <4 x i32> @x2w(<4 x i32> noundef %x) {
2927
; CHECK-LABEL: x2w:
3028
; CHECK: # %bb.0: # %entry
31-
; CHECK-NEXT: vspltisw v3, 1
32-
; CHECK-NEXT: vslw v2, v2, v3
29+
; CHECK-NEXT: vadduwm v2, v2, v2
3330
; CHECK-NEXT: blr
3431
entry:
3532
%add = shl <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
3633
ret <4 x i32> %add
3734
}
35+
36+
define dso_local <2 x i64> @x2d(<2 x i64> noundef %x) {
37+
; CHECK-LABEL: x2d:
38+
; CHECK: # %bb.0: # %entry
39+
; CHECK-NEXT: addis r3, r2, .LCPI3_0@toc@ha
40+
; CHECK-NEXT: addi r3, r3, .LCPI3_0@toc@l
41+
; CHECK-NEXT: lxvd2x v3, 0, r3
42+
; CHECK-NEXT: vsld v2, v2, v3
43+
; CHECK-NEXT: blr
44+
entry:
45+
%add = shl <2 x i64> %x, <i64 1, i64 1>
46+
ret <2 x i64> %add
47+
}

0 commit comments

Comments
 (0)