Skip to content

Commit 5f78ba0

Browse files
committed
[X86][Codegen] Shift amount mod: sh? i64 x, (32-y) --> sh? i64 x, -(y+32)
I've seen this in the RawSpeed's BitPumpMSB*::push() hotpath, after fixing the buffer abstraction to a more sane one, when looking into a +5% runtime regression. I was hoping that this would fix it, but it does not look it does. This seems to be at least not worse than the original pattern. But i'm actually mainly interested in the case where we already compute `(y+32)` (see last test), https://alive2.llvm.org/ce/z/ZCzJio Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D101944
1 parent dc00cbb commit 5f78ba0

File tree

2 files changed

+39
-22
lines changed

2 files changed

+39
-22
lines changed

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3854,14 +3854,29 @@ bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
38543854
// If we are shifting by N-X where N == 0 mod Size, then just shift by -X
38553855
// to generate a NEG instead of a SUB of a constant.
38563856
} else if (ShiftAmt->getOpcode() == ISD::SUB && Add0C &&
3857-
Add0C->getAPIntValue() != 0 &&
3858-
Add0C->getAPIntValue().urem(Size) == 0) {
3857+
Add0C->getZExtValue() != 0) {
3858+
EVT SubVT = ShiftAmt.getValueType();
3859+
SDValue X;
3860+
if (Add0C->getZExtValue() % Size == 0)
3861+
X = Add1;
3862+
else if (ShiftAmt.hasOneUse() && Size == 64 &&
3863+
Add0C->getZExtValue() % 32 == 0) {
3864+
// We have a 64-bit shift by (n*32-x), turn it into -(x+n*32).
3865+
// This is mainly beneficial if we already compute (x+n*32).
3866+
if (Add1.getOpcode() == ISD::TRUNCATE) {
3867+
Add1 = Add1.getOperand(0);
3868+
SubVT = Add1.getValueType();
3869+
}
3870+
X = CurDAG->getNode(ISD::ADD, DL, SubVT, Add1,
3871+
CurDAG->getZExtOrTrunc(Add0, DL, SubVT));
3872+
insertDAGNode(*CurDAG, OrigShiftAmt, X);
3873+
} else
3874+
return false;
38593875
// Insert a negate op.
38603876
// TODO: This isn't guaranteed to replace the sub if there is a logic cone
38613877
// that uses it that's not a shift.
3862-
EVT SubVT = ShiftAmt.getValueType();
38633878
SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
3864-
SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
3879+
SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, X);
38653880
NewShiftAmt = Neg;
38663881

38673882
// Insert these operands into a valid topological order so they can

llvm/test/CodeGen/X86/64-bit-shift-by-32-minus-y.ll

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,17 @@ define i64 @t0(i64 %val, i64 %shamt) nounwind {
88
; X64-NOBMI2-LABEL: t0:
99
; X64-NOBMI2: # %bb.0:
1010
; X64-NOBMI2-NEXT: movq %rdi, %rax
11-
; X64-NOBMI2-NEXT: movb $32, %cl
12-
; X64-NOBMI2-NEXT: subb %sil, %cl
11+
; X64-NOBMI2-NEXT: leaq 32(%rsi), %rcx
12+
; X64-NOBMI2-NEXT: negq %rcx
13+
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
1314
; X64-NOBMI2-NEXT: shlq %cl, %rax
1415
; X64-NOBMI2-NEXT: retq
1516
;
1617
; X64-BMI2-LABEL: t0:
1718
; X64-BMI2: # %bb.0:
18-
; X64-BMI2-NEXT: movb $32, %al
19-
; X64-BMI2-NEXT: subb %sil, %al
20-
; X64-BMI2-NEXT: shlxq %rax, %rdi, %rax
19+
; X64-BMI2-NEXT: addq $32, %rsi
20+
; X64-BMI2-NEXT: negq %rsi
21+
; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax
2122
; X64-BMI2-NEXT: retq
2223
;
2324
; X32-NOBMI2-LABEL: t0:
@@ -228,16 +229,17 @@ define i64 @t4(i64 %val, i64 %shamt) nounwind {
228229
; X64-NOBMI2-LABEL: t4:
229230
; X64-NOBMI2: # %bb.0:
230231
; X64-NOBMI2-NEXT: movq %rdi, %rax
231-
; X64-NOBMI2-NEXT: movb $96, %cl
232-
; X64-NOBMI2-NEXT: subb %sil, %cl
232+
; X64-NOBMI2-NEXT: leaq 96(%rsi), %rcx
233+
; X64-NOBMI2-NEXT: negq %rcx
234+
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
233235
; X64-NOBMI2-NEXT: shlq %cl, %rax
234236
; X64-NOBMI2-NEXT: retq
235237
;
236238
; X64-BMI2-LABEL: t4:
237239
; X64-BMI2: # %bb.0:
238-
; X64-BMI2-NEXT: movb $96, %al
239-
; X64-BMI2-NEXT: subb %sil, %al
240-
; X64-BMI2-NEXT: shlxq %rax, %rdi, %rax
240+
; X64-BMI2-NEXT: addq $96, %rsi
241+
; X64-BMI2-NEXT: negq %rsi
242+
; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax
241243
; X64-BMI2-NEXT: retq
242244
;
243245
; X32-NOBMI2-LABEL: t4:
@@ -282,21 +284,21 @@ define i64 @t4(i64 %val, i64 %shamt) nounwind {
282284
define i64 @t5_cse(i64 %val, i64 %shamt, i64*%dst) nounwind {
283285
; X64-NOBMI2-LABEL: t5_cse:
284286
; X64-NOBMI2: # %bb.0:
287+
; X64-NOBMI2-NEXT: movq %rsi, %rcx
285288
; X64-NOBMI2-NEXT: movq %rdi, %rax
286-
; X64-NOBMI2-NEXT: leaq 32(%rsi), %rcx
289+
; X64-NOBMI2-NEXT: addq $32, %rcx
287290
; X64-NOBMI2-NEXT: movq %rcx, (%rdx)
288-
; X64-NOBMI2-NEXT: movb $32, %cl
289-
; X64-NOBMI2-NEXT: subb %sil, %cl
291+
; X64-NOBMI2-NEXT: negq %rcx
292+
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
290293
; X64-NOBMI2-NEXT: shlq %cl, %rax
291294
; X64-NOBMI2-NEXT: retq
292295
;
293296
; X64-BMI2-LABEL: t5_cse:
294297
; X64-BMI2: # %bb.0:
295-
; X64-BMI2-NEXT: leaq 32(%rsi), %rax
296-
; X64-BMI2-NEXT: movq %rax, (%rdx)
297-
; X64-BMI2-NEXT: movb $32, %al
298-
; X64-BMI2-NEXT: subb %sil, %al
299-
; X64-BMI2-NEXT: shlxq %rax, %rdi, %rax
298+
; X64-BMI2-NEXT: addq $32, %rsi
299+
; X64-BMI2-NEXT: movq %rsi, (%rdx)
300+
; X64-BMI2-NEXT: negq %rsi
301+
; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax
300302
; X64-BMI2-NEXT: retq
301303
;
302304
; X32-NOBMI2-LABEL: t5_cse:

0 commit comments

Comments
 (0)