Skip to content

Commit 68ab662

Browse files
committed
[InstCombine] Convert or concat to fshl if opposite or concat exists
If there are two 'or' instructions concat variables in opposite order and the first 'or' dominates the second one, the second 'or' can be optimized to fshl to rotate shift first 'or'. This can eliminate an shl and expose more optimization opportunity for bswap/bitreverse.
1 parent 5b3b1bb commit 68ab662

File tree

2 files changed

+86
-2
lines changed

2 files changed

+86
-2
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2727,7 +2727,8 @@ Instruction *InstCombinerImpl::matchBSwapOrBitReverse(Instruction &I,
27272727
}
27282728

27292729
/// Match UB-safe variants of the funnel shift intrinsic.
2730-
static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) {
2730+
static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC,
2731+
const DominatorTree &DT) {
27312732
// TODO: Can we reduce the code duplication between this and the related
27322733
// rotate matching code under visitSelect and visitTrunc?
27332734
unsigned Width = Or.getType()->getScalarSizeInBits();
@@ -2832,6 +2833,47 @@ static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) {
28322833
return nullptr;
28332834

28342835
FShiftArgs = {ShVal0, ShVal1, ShAmt};
2836+
2837+
} else if (isa<ZExtInst>(Or0) || isa<ZExtInst>(Or1)) {
2838+
// If there are two 'or' instructions concat variables in opposite order,
2839+
// the latter one can be safely convert to fshl.
2840+
//
2841+
// LowHigh = or (shl (zext Low), Width - ZextHighShlAmt), (zext High)
2842+
// HighLow = or (shl (zext High), ZextHighShlAmt), (zext Low)
2843+
// ->
2844+
// HighLow = fshl LowHigh, LowHigh, ZextHighShlAmt
2845+
if (!isa<ZExtInst>(Or1))
2846+
std::swap(Or0, Or1);
2847+
2848+
Value *High, *ZextHigh, *Low;
2849+
const APInt *ZextHighShlAmt;
2850+
if (!match(Or0,
2851+
m_OneUse(m_Shl(m_Value(ZextHigh), m_APInt(ZextHighShlAmt)))))
2852+
return nullptr;
2853+
2854+
if (!match(Or1, m_ZExt(m_Value(Low))) ||
2855+
!match(ZextHigh, m_ZExt(m_Value(High))))
2856+
return nullptr;
2857+
2858+
unsigned HighSize = High->getType()->getScalarSizeInBits();
2859+
unsigned LowSize = Low->getType()->getScalarSizeInBits();
2860+
if (*ZextHighShlAmt != LowSize || HighSize + LowSize != Width)
2861+
return nullptr;
2862+
2863+
for (User *U : ZextHigh->users()) {
2864+
Value *X, *Y;
2865+
if (!match(U, m_Or(m_Value(X), m_Value(Y))))
2866+
continue;
2867+
2868+
if (!isa<ZExtInst>(Y))
2869+
std::swap(X, Y);
2870+
2871+
if (match(X, m_Shl(m_Specific(Or1), m_SpecificInt(HighSize))) &&
2872+
match(Y, m_Specific(ZextHigh)) && DT.dominates(U, &Or)) {
2873+
FShiftArgs = {U, U, ConstantInt::get(Or0->getType(), *ZextHighShlAmt)};
2874+
break;
2875+
}
2876+
}
28352877
}
28362878

28372879
if (FShiftArgs.empty())
@@ -3333,7 +3375,7 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
33333375
/*MatchBitReversals*/ true))
33343376
return BitOp;
33353377

3336-
if (Instruction *Funnel = matchFunnelShift(I, *this))
3378+
if (Instruction *Funnel = matchFunnelShift(I, *this, DT))
33373379
return Funnel;
33383380

33393381
if (Instruction *Concat = matchOrConcat(I, Builder))

llvm/test/Transforms/InstCombine/funnel.ll

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,48 @@ define <2 x i64> @fshl_select_vector(<2 x i64> %x, <2 x i64> %y, <2 x i64> %sham
354354
ret <2 x i64> %r
355355
}
356356

357+
; Convert 'or concat' to fshl if opposite 'or concat' exists.
358+
359+
define i32 @fshl_concat(i8 %x, i24 %y, ptr %addr) {
360+
; CHECK-LABEL: @fshl_concat(
361+
; CHECK-NEXT: [[ZEXT_X:%.*]] = zext i8 [[X:%.*]] to i32
362+
; CHECK-NEXT: [[SLX:%.*]] = shl nuw i32 [[ZEXT_X]], 24
363+
; CHECK-NEXT: [[ZEXT_Y:%.*]] = zext i24 [[Y:%.*]] to i32
364+
; CHECK-NEXT: [[XY:%.*]] = or i32 [[SLX]], [[ZEXT_Y]]
365+
; CHECK-NEXT: store i32 [[XY]], ptr [[ADDR:%.*]], align 4
366+
; CHECK-NEXT: [[YX:%.*]] = call i32 @llvm.fshl.i32(i32 [[XY]], i32 [[XY]], i32 8)
367+
; CHECK-NEXT: ret i32 [[YX]]
368+
;
369+
%zext.x = zext i8 %x to i32
370+
%slx = shl nuw i32 %zext.x, 24
371+
%zext.y = zext i24 %y to i32
372+
%xy = or i32 %zext.y, %slx
373+
store i32 %xy, ptr %addr, align 4
374+
%sly = shl nuw i32 %zext.y, 8
375+
%yx = or i32 %zext.x, %sly
376+
ret i32 %yx
377+
}
378+
379+
define <2 x i32> @fshl_concat_vector(<2 x i8> %x, <2 x i24> %y, ptr %addr) {
380+
; CHECK-LABEL: @fshl_concat_vector(
381+
; CHECK-NEXT: [[ZEXT_X:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>
382+
; CHECK-NEXT: [[SLX:%.*]] = shl nuw <2 x i32> [[ZEXT_X]], <i32 24, i32 24>
383+
; CHECK-NEXT: [[ZEXT_Y:%.*]] = zext <2 x i24> [[Y:%.*]] to <2 x i32>
384+
; CHECK-NEXT: [[XY:%.*]] = or <2 x i32> [[SLX]], [[ZEXT_Y]]
385+
; CHECK-NEXT: store <2 x i32> [[XY]], ptr [[ADDR:%.*]], align 4
386+
; CHECK-NEXT: [[YX:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[XY]], <2 x i32> [[XY]], <2 x i32> <i32 8, i32 8>)
387+
; CHECK-NEXT: ret <2 x i32> [[YX]]
388+
;
389+
%zext.x = zext <2 x i8> %x to <2 x i32>
390+
%slx = shl nuw <2 x i32> %zext.x, <i32 24, i32 24>
391+
%zext.y = zext <2 x i24> %y to <2 x i32>
392+
%xy = or <2 x i32> %slx, %zext.y
393+
store <2 x i32> %xy, ptr %addr, align 4
394+
%sly = shl nuw <2 x i32> %zext.y, <i32 8, i32 8>
395+
%yx = or <2 x i32> %sly, %zext.x
396+
ret <2 x i32> %yx
397+
}
398+
357399
; Negative test - an oversized shift in the narrow type would produce the wrong value.
358400

359401
define i8 @unmasked_shlop_unmasked_shift_amount(i32 %x, i32 %y, i32 %shamt) {

0 commit comments

Comments
 (0)