-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[DAG] Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB) #90860
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-selectiondag Author: David Green (davemgreen) ChangesThis is useful when the inner add has multiple uses, and so cannot be Full diff: https://github.com/llvm/llvm-project/pull/90860.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c0bbea16a64262..c53ab8deaf19d6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2838,6 +2838,36 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
}
+ // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
+ // This can help if the inner add has multiple uses.
+ APInt CM, CA;
+ if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
+ if (sd_match(N0, m_OneUse(m_Mul(m_Add(m_Value(A), m_ConstInt(CA)),
+ m_ConstInt(CM)))) &&
+ TLI.isLegalAddImmediate(
+ (CA * CM + CB->getAPIntValue()).getSExtValue())) {
+ SDValue Mul =
+ DAG.getNode(ISD::MUL, SDLoc(N1), VT, A, DAG.getConstant(CM, DL, VT));
+ return DAG.getNode(
+ ISD::ADD, DL, VT, Mul,
+ DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT));
+ }
+ // Also look in case there is an intermediate add.
+ if (sd_match(
+ N0, m_OneUse(m_Add(m_OneUse(m_Mul(m_Add(m_Value(A), m_ConstInt(CA)),
+ m_ConstInt(CM))),
+ m_Value(B)))) &&
+ TLI.isLegalAddImmediate(
+ (CA * CM + CB->getAPIntValue()).getSExtValue())) {
+ SDValue Mul =
+ DAG.getNode(ISD::MUL, SDLoc(N1), VT, A, DAG.getConstant(CM, DL, VT));
+ SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B);
+ return DAG.getNode(
+ ISD::ADD, DL, VT, Add,
+ DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT));
+ }
+ }
+
if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
return Combined;
diff --git a/llvm/test/CodeGen/AArch64/addimm-mulimm.ll b/llvm/test/CodeGen/AArch64/addimm-mulimm.ll
index cc6523d1bb1d58..a676b8df08e1b4 100644
--- a/llvm/test/CodeGen/AArch64/addimm-mulimm.ll
+++ b/llvm/test/CodeGen/AArch64/addimm-mulimm.ll
@@ -4,8 +4,8 @@
define i64 @addimm_mulimm_accept_00(i64 %a) {
; CHECK-LABEL: addimm_mulimm_accept_00:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #37
-; CHECK-NEXT: mov x9, #1147
+; CHECK-NEXT: mov w8, #37 // =0x25
+; CHECK-NEXT: mov x9, #1147 // =0x47b
; CHECK-NEXT: madd x0, x0, x8, x9
; CHECK-NEXT: ret
%tmp0 = add i64 %a, 31
@@ -16,8 +16,8 @@ define i64 @addimm_mulimm_accept_00(i64 %a) {
define i64 @addimm_mulimm_accept_01(i64 %a) {
; CHECK-LABEL: addimm_mulimm_accept_01:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #37
-; CHECK-NEXT: mov x9, #-1147
+; CHECK-NEXT: mov w8, #37 // =0x25
+; CHECK-NEXT: mov x9, #-1147 // =0xfffffffffffffb85
; CHECK-NEXT: madd x0, x0, x8, x9
; CHECK-NEXT: ret
%tmp0 = add i64 %a, -31
@@ -28,8 +28,8 @@ define i64 @addimm_mulimm_accept_01(i64 %a) {
define signext i32 @addimm_mulimm_accept_02(i32 signext %a) {
; CHECK-LABEL: addimm_mulimm_accept_02:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #37
-; CHECK-NEXT: mov w9, #1147
+; CHECK-NEXT: mov w8, #37 // =0x25
+; CHECK-NEXT: mov w9, #1147 // =0x47b
; CHECK-NEXT: madd w0, w0, w8, w9
; CHECK-NEXT: ret
%tmp0 = add i32 %a, 31
@@ -40,8 +40,8 @@ define signext i32 @addimm_mulimm_accept_02(i32 signext %a) {
define signext i32 @addimm_mulimm_accept_03(i32 signext %a) {
; CHECK-LABEL: addimm_mulimm_accept_03:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #37
-; CHECK-NEXT: mov w9, #-1147
+; CHECK-NEXT: mov w8, #37 // =0x25
+; CHECK-NEXT: mov w9, #-1147 // =0xfffffb85
; CHECK-NEXT: madd w0, w0, w8, w9
; CHECK-NEXT: ret
%tmp0 = add i32 %a, -31
@@ -52,8 +52,8 @@ define signext i32 @addimm_mulimm_accept_03(i32 signext %a) {
define i64 @addimm_mulimm_accept_10(i64 %a) {
; CHECK-LABEL: addimm_mulimm_accept_10:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #37
-; CHECK-NEXT: mov w9, #32888
+; CHECK-NEXT: mov w8, #37 // =0x25
+; CHECK-NEXT: mov w9, #32888 // =0x8078
; CHECK-NEXT: movk w9, #17, lsl #16
; CHECK-NEXT: madd x0, x0, x8, x9
; CHECK-NEXT: ret
@@ -65,8 +65,8 @@ define i64 @addimm_mulimm_accept_10(i64 %a) {
define i64 @addimm_mulimm_accept_11(i64 %a) {
; CHECK-LABEL: addimm_mulimm_accept_11:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #37
-; CHECK-NEXT: mov x9, #-32888
+; CHECK-NEXT: mov w8, #37 // =0x25
+; CHECK-NEXT: mov x9, #-32888 // =0xffffffffffff7f88
; CHECK-NEXT: movk x9, #65518, lsl #16
; CHECK-NEXT: madd x0, x0, x8, x9
; CHECK-NEXT: ret
@@ -78,8 +78,8 @@ define i64 @addimm_mulimm_accept_11(i64 %a) {
define signext i32 @addimm_mulimm_accept_12(i32 signext %a) {
; CHECK-LABEL: addimm_mulimm_accept_12:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #37
-; CHECK-NEXT: mov w9, #32888
+; CHECK-NEXT: mov w8, #37 // =0x25
+; CHECK-NEXT: mov w9, #32888 // =0x8078
; CHECK-NEXT: movk w9, #17, lsl #16
; CHECK-NEXT: madd w0, w0, w8, w9
; CHECK-NEXT: ret
@@ -91,8 +91,8 @@ define signext i32 @addimm_mulimm_accept_12(i32 signext %a) {
define signext i32 @addimm_mulimm_accept_13(i32 signext %a) {
; CHECK-LABEL: addimm_mulimm_accept_13:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #37
-; CHECK-NEXT: mov w9, #32648
+; CHECK-NEXT: mov w8, #37 // =0x25
+; CHECK-NEXT: mov w9, #32648 // =0x7f88
; CHECK-NEXT: movk w9, #65518, lsl #16
; CHECK-NEXT: madd w0, w0, w8, w9
; CHECK-NEXT: ret
@@ -104,7 +104,7 @@ define signext i32 @addimm_mulimm_accept_13(i32 signext %a) {
define i64 @addimm_mulimm_reject_00(i64 %a) {
; CHECK-LABEL: addimm_mulimm_reject_00:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #3700
+; CHECK-NEXT: mov w8, #3700 // =0xe74
; CHECK-NEXT: add x9, x0, #3100
; CHECK-NEXT: mul x0, x9, x8
; CHECK-NEXT: ret
@@ -116,7 +116,7 @@ define i64 @addimm_mulimm_reject_00(i64 %a) {
define i64 @addimm_mulimm_reject_01(i64 %a) {
; CHECK-LABEL: addimm_mulimm_reject_01:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #3700
+; CHECK-NEXT: mov w8, #3700 // =0xe74
; CHECK-NEXT: sub x9, x0, #3100
; CHECK-NEXT: mul x0, x9, x8
; CHECK-NEXT: ret
@@ -128,7 +128,7 @@ define i64 @addimm_mulimm_reject_01(i64 %a) {
define signext i32 @addimm_mulimm_reject_02(i32 signext %a) {
; CHECK-LABEL: addimm_mulimm_reject_02:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #3700
+; CHECK-NEXT: mov w8, #3700 // =0xe74
; CHECK-NEXT: add w9, w0, #3100
; CHECK-NEXT: mul w0, w9, w8
; CHECK-NEXT: ret
@@ -140,7 +140,7 @@ define signext i32 @addimm_mulimm_reject_02(i32 signext %a) {
define signext i32 @addimm_mulimm_reject_03(i32 signext %a) {
; CHECK-LABEL: addimm_mulimm_reject_03:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #3700
+; CHECK-NEXT: mov w8, #3700 // =0xe74
; CHECK-NEXT: sub w9, w0, #3100
; CHECK-NEXT: mul w0, w9, w8
; CHECK-NEXT: ret
@@ -148,3 +148,200 @@ define signext i32 @addimm_mulimm_reject_03(i32 signext %a) {
%tmp1 = mul i32 %tmp0, 3700
ret i32 %tmp1
}
+
+define signext i32 @addmuladd(i32 signext %a) {
+; CHECK-LABEL: addmuladd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #324 // =0x144
+; CHECK-NEXT: mov w9, #1300 // =0x514
+; CHECK-NEXT: madd w0, w0, w8, w9
+; CHECK-NEXT: ret
+ %tmp0 = add i32 %a, 4
+ %tmp1 = mul i32 %tmp0, 324
+ %tmp2 = add i32 %tmp1, 4
+ ret i32 %tmp2
+}
+
+define signext i32 @addmuladd_multiuse(i32 signext %a) {
+; CHECK-LABEL: addmuladd_multiuse:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #324 // =0x144
+; CHECK-NEXT: mov w9, #1300 // =0x514
+; CHECK-NEXT: madd w8, w0, w8, w9
+; CHECK-NEXT: add w9, w0, #4
+; CHECK-NEXT: eor w0, w9, w8
+; CHECK-NEXT: ret
+ %tmp0 = add i32 %a, 4
+ %tmp1 = mul i32 %tmp0, 324
+ %tmp2 = add i32 %tmp1, 4
+ %tmp3 = xor i32 %tmp0, %tmp2
+ ret i32 %tmp3
+}
+
+define signext i32 @addmuladd_multiusemul(i32 signext %a) {
+; CHECK-LABEL: addmuladd_multiusemul:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #324 // =0x144
+; CHECK-NEXT: mul w8, w0, w8
+; CHECK-NEXT: add w9, w8, #1296
+; CHECK-NEXT: add w8, w8, #1300
+; CHECK-NEXT: eor w0, w9, w8
+; CHECK-NEXT: ret
+ %tmp0 = add i32 %a, 4
+ %tmp1 = mul i32 %tmp0, 324
+ %tmp2 = add i32 %tmp1, 4
+ %tmp3 = xor i32 %tmp1, %tmp2
+ ret i32 %tmp3
+}
+
+define signext i32 @addmuladd_multiuse2(i32 signext %a) {
+; CHECK-LABEL: addmuladd_multiuse2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #324 // =0x144
+; CHECK-NEXT: lsl w9, w0, #2
+; CHECK-NEXT: mov w10, #1300 // =0x514
+; CHECK-NEXT: madd w8, w0, w8, w10
+; CHECK-NEXT: add w9, w9, #20
+; CHECK-NEXT: eor w0, w8, w9
+; CHECK-NEXT: ret
+ %tmp0 = add i32 %a, 4
+ %tmp1 = mul i32 %tmp0, 4
+ %tmp2 = add i32 %tmp1, 4
+ %tmp3 = mul i32 %tmp0, 324
+ %tmp4 = add i32 %tmp3, 4
+ %tmp5 = xor i32 %tmp4, %tmp2
+ ret i32 %tmp5
+}
+
+define signext i32 @addaddmuladd(i32 signext %a, i32 %b) {
+; CHECK-LABEL: addaddmuladd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #324 // =0x144
+; CHECK-NEXT: madd w8, w0, w8, w1
+; CHECK-NEXT: add w0, w8, #1300
+; CHECK-NEXT: ret
+ %tmp0 = add i32 %a, 4
+ %tmp1 = mul i32 %tmp0, 324
+ %tmp2 = add i32 %tmp1, %b
+ %tmp3 = add i32 %tmp2, 4
+ ret i32 %tmp3
+}
+
+define signext i32 @addaddmuladd_multiuse(i32 signext %a, i32 %b) {
+; CHECK-LABEL: addaddmuladd_multiuse:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #324 // =0x144
+; CHECK-NEXT: add w9, w0, #4
+; CHECK-NEXT: madd w8, w0, w8, w1
+; CHECK-NEXT: add w8, w8, #1300
+; CHECK-NEXT: eor w0, w9, w8
+; CHECK-NEXT: ret
+ %tmp0 = add i32 %a, 4
+ %tmp1 = mul i32 %tmp0, 324
+ %tmp2 = add i32 %tmp1, %b
+ %tmp3 = add i32 %tmp2, 4
+ %tmp4 = xor i32 %tmp0, %tmp3
+ ret i32 %tmp4
+}
+
+define signext i32 @addaddmuladd_multiuse2(i32 signext %a, i32 %b) {
+; CHECK-LABEL: addaddmuladd_multiuse2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #324 // =0x144
+; CHECK-NEXT: mov w9, #162 // =0xa2
+; CHECK-NEXT: madd w8, w0, w8, w1
+; CHECK-NEXT: madd w9, w0, w9, w1
+; CHECK-NEXT: add w8, w8, #1300
+; CHECK-NEXT: add w9, w9, #652
+; CHECK-NEXT: eor w0, w9, w8
+; CHECK-NEXT: ret
+ %tmp0 = add i32 %a, 4
+ %tmp1 = mul i32 %tmp0, 324
+ %tmp2 = add i32 %tmp1, %b
+ %tmp3 = add i32 %tmp2, 4
+ %tmp1b = mul i32 %tmp0, 162
+ %tmp2b = add i32 %tmp1b, %b
+ %tmp3b = add i32 %tmp2b, 4
+ %tmp4 = xor i32 %tmp3b, %tmp3
+ ret i32 %tmp4
+}
+
+define <4 x i32> @addmuladd_vec(<4 x i32> %a) {
+; CHECK-LABEL: addmuladd_vec:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #324 // =0x144
+; CHECK-NEXT: mov w9, #1300 // =0x514
+; CHECK-NEXT: dup v2.4s, w8
+; CHECK-NEXT: dup v1.4s, w9
+; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %tmp0 = add <4 x i32> %a, <i32 4, i32 4, i32 4, i32 4>
+ %tmp1 = mul <4 x i32> %tmp0, <i32 324, i32 324, i32 324, i32 324>
+ %tmp2 = add <4 x i32> %tmp1, <i32 4, i32 4, i32 4, i32 4>
+ ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @addmuladd_vec_multiuse(<4 x i32> %a) {
+; CHECK-LABEL: addmuladd_vec_multiuse:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.4s, #4
+; CHECK-NEXT: mov w8, #324 // =0x144
+; CHECK-NEXT: dup v2.4s, w8
+; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s
+; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %tmp0 = add <4 x i32> %a, <i32 4, i32 4, i32 4, i32 4>
+ %tmp1 = mul <4 x i32> %tmp0, <i32 324, i32 324, i32 324, i32 324>
+ %tmp2 = add <4 x i32> %tmp1, <i32 4, i32 4, i32 4, i32 4>
+ %tmp3 = xor <4 x i32> %tmp0, %tmp2
+ ret <4 x i32> %tmp3
+}
+
+define void @addmuladd_gep(ptr %p, i64 %a) {
+; CHECK-LABEL: addmuladd_gep:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #40 // =0x28
+; CHECK-NEXT: str wzr, [x0, #10]!
+; CHECK-NEXT: madd x8, x1, x8, x0
+; CHECK-NEXT: str wzr, [x8, #20]
+; CHECK-NEXT: ret
+ %q = getelementptr i8, ptr %p, i64 10
+ %r = getelementptr [10 x [10 x i32]], ptr %q, i64 0, i64 %a, i64 5
+ store i32 0, ptr %q
+ store i32 0, ptr %r
+ ret void
+}
+
+define i32 @addmuladd_gep2(ptr %p, i32 %a) {
+; CHECK-LABEL: addmuladd_gep2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #3240 // =0xca8
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: smaddl x8, w1, w8, x0
+; CHECK-NEXT: ldr w8, [x8, #3260]
+; CHECK-NEXT: tbnz w8, #31, .LBB22_2
+; CHECK-NEXT: // %bb.1:
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB22_2: // %then
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: add x8, x8, #1
+; CHECK-NEXT: str x8, [x0]
+; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: ret
+ %b = sext i32 %a to i64
+ %c = add nsw i64 %b, 1
+ %d = mul nsw i64 %c, 81
+ %g = getelementptr [10 x [10 x i32]], ptr %p, i64 0, i64 %d, i64 5
+ %l = load i32, ptr %g, align 4
+ %cc = icmp slt i32 %l, 0
+ br i1 %cc, label %then, label %else
+then:
+ store i64 %c, ptr %p
+ ret i32 1
+else:
+ ret i32 0
+}
+
|
SDValue Mul = | ||
DAG.getNode(ISD::MUL, SDLoc(N1), VT, A, DAG.getConstant(CM, DL, VT)); | ||
return DAG.getNode( | ||
ISD::ADD, DL, VT, Mul, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can flags be preserved?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need to drop flags the flags for general reassociations. I think that would apply here unless you know of a reason why it wouldn't?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mean I always just stick it in alive and see if it works
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the generic combine: https://alive2.llvm.org/ce/z/k7Yvo3. In general the flags need removing.
In specific cases the flags might be retained, but it feels to me a bit niche and difficult to specify: https://alive2.llvm.org/ce/z/aK5Az3.
Let me know what you think.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think using constants in the proof is making this more permissive looking than it should be
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The first link is the general proof with any values. noundef %CA/%CB/%CM are the constants. In general the flags from the add/mul need to be dropped.
The second link was a specific example where certain constant can keep nuw/nsw, but it is specific to those constants.
It does look like if all the input adds/mul are nsw/nuw then we might be able to keep some flags on the remaining instructions. I'm not sure if it's worth it considering all the other transforms in DAG, but I can try and add that to the patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I played around with a bit and I'm not sure what the rule is, probably best to keep that in a separate patch if it's worth it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, seems to just be and on all instructions
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh - sorry. This is the version with all nuw: https://alive2.llvm.org/ce/z/nxaMNR
And the version with both: https://alive2.llvm.org/ce/z/65Xyyf
It didn't apply with just nsw though: https://alive2.llvm.org/ce/z/7eZnnp
I can remove that patch if you like, it would be just a case of dropping the last patch from this review.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Keep it - as long as some tests actually hit this
Also, as I think it might come up - there is no change in vector code because isLegalAddImmediate can't handle the immediates (and the examples I tried looked a little worse overall). |
0e0b2d2
to
f8147dd
Compare
else: | ||
ret i32 0 | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably missing all the flag tests?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK - will do.
This is useful when the inner add has multiple uses, and so cannot be canonicalized by pushing the constants down through the mul. I have added patterns for both `add(mul(add(A, CA), CM), CB)` and with an extra add `add(add(mul(add(A, CA), CM), B) CB)` as the second can come up when lowering geps.
This is useful when the inner add has multiple uses, and so cannot be
canonicalized by pushing the constants down through the mul. I have added
patterns for both
add(mul(add(A, CA), CM), CB)
and with an extra addadd(add(mul(add(A, CA), CM), B) CB)
as the second can come up when loweringgeps.