llvm · dtcxzyw · Oct 2, 2024 · Oct 1, 2024 · Oct 1, 2024 · Oct 1, 2024
@@ -592,6 +592,15 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I,
     if (DemandedFromOps.isOne() && DemandedFromOps.isSubsetOf(LHSKnown.Zero))
       return I->getOperand(1);
 
+    // Canonicalize sub mask, X -> ~X
+    const APInt *LHSC;
+    if (match(I->getOperand(0), m_LowBitMask(LHSC)) &&
+        DemandedFromOps.isSubsetOf(*LHSC)) {
+      IRBuilderBase::InsertPointGuard Guard(Builder);
+      Builder.SetInsertPoint(I);
+      return Builder.CreateNot(I->getOperand(1));
+    }
+
     // Otherwise just compute the known bits of the result.
     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
     bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();

diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll
@@ -2797,3 +2797,71 @@ if.then:
 if.else:
   ret i32 0
 }
+
+define i32 @fold_sub_and_into_andn(i32 %x) {
+; CHECK-LABEL: @fold_sub_and_into_andn(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[X:%.*]], 63
+; CHECK-NEXT:    [[AND:%.*]] = xor i32 [[TMP0]], 63
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+entry:
+  %sub = sub i32 63, %x
+  %and = and i32 %sub, 63
+  ret i32 %and
+}
+
+define i1 @fold_sub_and_into_andn_icmp(i32 %x) {
+; CHECK-LABEL: @fold_sub_and_into_andn_icmp(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[X:%.*]], 63
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 63
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %sub = sub i32 63, %x
+  %and = and i32 %sub, 63
+  %cmp = icmp eq i32 %and, 0
+  ret i1 %cmp
+}
+
+define i32 @fold_sub_and_into_andn_subset(i32 %x) {
+; CHECK-LABEL: @fold_sub_and_into_andn_subset(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[X:%.*]], 31
+; CHECK-NEXT:    [[AND:%.*]] = xor i32 [[TMP0]], 31
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+entry:
+  %sub = sub i32 63, %x
+  %and = and i32 %sub, 31
+  ret i32 %and
+}
+
+; Negative tests
+
+define i32 @fold_sub_and_into_andn_nonmask(i32 %x, i32 %y) {
+; CHECK-LABEL: @fold_sub_and_into_andn_nonmask(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SUB]], 63
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+entry:
+  %sub = sub i32 %y, %x
+  %and = and i32 %sub, 63
+  ret i32 %and
+}
+
+define i32 @fold_sub_and_into_andn_superset(i32 %x) {
+; CHECK-LABEL: @fold_sub_and_into_andn_superset(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 63, [[X:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SUB]], 127
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+entry:
+  %sub = sub i32 63, %x
+  %and = and i32 %sub, 127
+  ret i32 %and
+}
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll b/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll
@@ -17,13 +17,13 @@ define noundef i64 @foo(i64 noundef %0) {
 ; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
 ; SSE-LABEL: @foo(
-; SSE-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 44
-; SSE-NEXT:    [[TMP3:%.*]] = sub nuw nsw i64 -17592186044416, [[TMP2]]
+; SSE-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP0:%.*]], -1
+; SSE-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 44
 ; SSE-NEXT:    ret i64 [[TMP3]]
 ;
 ; AVX-LABEL: @foo(
-; AVX-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 44
-; AVX-NEXT:    [[TMP3:%.*]] = sub nuw nsw i64 -17592186044416, [[TMP2]]
+; AVX-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP0:%.*]], -1
+; AVX-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 44
 ; AVX-NEXT:    ret i64 [[TMP3]]
 ;
   %2 = sub i64 1048575, %0
@@ -34,35 +34,35 @@ define noundef i64 @foo(i64 noundef %0) {
 define void @bar(ptr noundef %0) {
 ; SSE-LABEL: @bar(
 ; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[TMP0:%.*]], align 8
-; SSE-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], <i64 44, i64 44>
-; SSE-NEXT:    [[TMP4:%.*]] = sub nuw nsw <2 x i64> <i64 -17592186044416, i64 -17592186044416>, [[TMP3]]
+; SSE-NEXT:    [[TMP3:%.*]] = xor <2 x i64> [[TMP2]], <i64 -1, i64 -1>
+; SSE-NEXT:    [[TMP4:%.*]] = shl <2 x i64> [[TMP3]], <i64 44, i64 44>
 ; SSE-NEXT:    store <2 x i64> [[TMP4]], ptr [[TMP0]], align 8
 ; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
 ; SSE-NEXT:    [[TMP6:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
-; SSE-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[TMP6]], <i64 44, i64 44>
-; SSE-NEXT:    [[TMP8:%.*]] = sub nuw nsw <2 x i64> <i64 -17592186044416, i64 -17592186044416>, [[TMP7]]
+; SSE-NEXT:    [[TMP7:%.*]] = xor <2 x i64> [[TMP6]], <i64 -1, i64 -1>
+; SSE-NEXT:    [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], <i64 44, i64 44>
 ; SSE-NEXT:    store <2 x i64> [[TMP8]], ptr [[TMP5]], align 8
 ; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 32
 ; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8
-; SSE-NEXT:    [[TMP11:%.*]] = shl <2 x i64> [[TMP10]], <i64 44, i64 44>
-; SSE-NEXT:    [[TMP12:%.*]] = sub nuw nsw <2 x i64> <i64 -17592186044416, i64 -17592186044416>, [[TMP11]]
+; SSE-NEXT:    [[TMP11:%.*]] = xor <2 x i64> [[TMP10]], <i64 -1, i64 -1>
+; SSE-NEXT:    [[TMP12:%.*]] = shl <2 x i64> [[TMP11]], <i64 44, i64 44>
 ; SSE-NEXT:    store <2 x i64> [[TMP12]], ptr [[TMP9]], align 8
 ; SSE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 48
 ; SSE-NEXT:    [[TMP14:%.*]] = load <2 x i64>, ptr [[TMP13]], align 8
-; SSE-NEXT:    [[TMP15:%.*]] = shl <2 x i64> [[TMP14]], <i64 44, i64 44>
-; SSE-NEXT:    [[TMP16:%.*]] = sub nuw nsw <2 x i64> <i64 -17592186044416, i64 -17592186044416>, [[TMP15]]
+; SSE-NEXT:    [[TMP15:%.*]] = xor <2 x i64> [[TMP14]], <i64 -1, i64 -1>
+; SSE-NEXT:    [[TMP16:%.*]] = shl <2 x i64> [[TMP15]], <i64 44, i64 44>
 ; SSE-NEXT:    store <2 x i64> [[TMP16]], ptr [[TMP13]], align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @bar(
 ; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 8
-; AVX-NEXT:    [[TMP3:%.*]] = shl <4 x i64> [[TMP2]], <i64 44, i64 44, i64 44, i64 44>
-; AVX-NEXT:    [[TMP4:%.*]] = sub nuw nsw <4 x i64> <i64 -17592186044416, i64 -17592186044416, i64 -17592186044416, i64 -17592186044416>, [[TMP3]]
+; AVX-NEXT:    [[TMP3:%.*]] = xor <4 x i64> [[TMP2]], <i64 -1, i64 -1, i64 -1, i64 -1>
+; AVX-NEXT:    [[TMP4:%.*]] = shl <4 x i64> [[TMP3]], <i64 44, i64 44, i64 44, i64 44>
 ; AVX-NEXT:    store <4 x i64> [[TMP4]], ptr [[TMP0]], align 8
 ; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 32
 ; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
-; AVX-NEXT:    [[TMP7:%.*]] = shl <4 x i64> [[TMP6]], <i64 44, i64 44, i64 44, i64 44>
-; AVX-NEXT:    [[TMP8:%.*]] = sub nuw nsw <4 x i64> <i64 -17592186044416, i64 -17592186044416, i64 -17592186044416, i64 -17592186044416>, [[TMP7]]
+; AVX-NEXT:    [[TMP7:%.*]] = xor <4 x i64> [[TMP6]], <i64 -1, i64 -1, i64 -1, i64 -1>
+; AVX-NEXT:    [[TMP8:%.*]] = shl <4 x i64> [[TMP7]], <i64 44, i64 44, i64 44, i64 44>
 ; AVX-NEXT:    store <4 x i64> [[TMP8]], ptr [[TMP5]], align 8
 ; AVX-NEXT:    ret void
 ;