@@ -1023,8 +1023,8 @@ void HexagonDAGToDAGISel::PreprocessISelDAG() {
1023
1023
}
1024
1024
}
1025
1025
1026
- // Transform: (store ch addr (add x (add (shl y c) e)))
1027
- // to: (store ch addr (add x (shl (add y d) c))),
1026
+ // Transform: (store ch val (add x (add (shl y c) e)))
1027
+ // to: (store ch val (add x (shl (add y d) c))),
1028
1028
// where e = (shl d c) for some integer d.
1029
1029
// The purpose of this is to enable generation of loads/stores with
1030
1030
// shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
@@ -1033,7 +1033,7 @@ void HexagonDAGToDAGISel::PreprocessISelDAG() {
1033
1033
if (I->getOpcode () != ISD::STORE)
1034
1034
continue ;
1035
1035
1036
- // I matched: (store ch addr Off)
1036
+ // I matched: (store ch val Off)
1037
1037
SDValue Off = I->getOperand (2 );
1038
1038
// Off needs to match: (add x (add (shl y c) (shl d c))))
1039
1039
if (Off.getOpcode () != ISD::ADD)
@@ -1076,6 +1076,78 @@ void HexagonDAGToDAGISel::PreprocessISelDAG() {
1076
1076
ReplaceNode (T0.getNode (), NewShl.getNode ());
1077
1077
}
1078
1078
1079
+ // Transform (load ch (add x (and (srl y c) Mask)))
1080
+ // to: (load ch (add x (shl (srl y d) d-c)))
1081
+ // where
1082
+ // Mask = 00..0 111..1 0.0
1083
+ // | | +-- d-c 0s, and d-c is 0, 1 or 2.
1084
+ // | +-------- 1s
1085
+ // +-------------- at most c 0s
1086
+ // Motivating example:
1087
+ // DAG combiner optimizes (add x (shl (srl y 5) 2))
1088
+ // to (add x (and (srl y 3) 1FFFFFFC))
1089
+ // which results in a constant-extended and(##...,lsr). This transformation
1090
+ // undoes this simplification for cases where the shl can be folded into
1091
+ // an addressing mode.
1092
+ for (SDNode *N : Nodes) {
1093
+ unsigned Opc = N->getOpcode ();
1094
+ if (Opc != ISD::LOAD && Opc != ISD::STORE)
1095
+ continue ;
1096
+ SDValue Addr = Opc == ISD::LOAD ? N->getOperand (1 ) : N->getOperand (2 );
1097
+ // Addr must match: (add x T0)
1098
+ if (Addr.getOpcode () != ISD::ADD)
1099
+ continue ;
1100
+ SDValue T0 = Addr.getOperand (1 );
1101
+ // T0 must match: (and T1 Mask)
1102
+ if (T0.getOpcode () != ISD::AND)
1103
+ continue ;
1104
+
1105
+ // We have an AND.
1106
+ //
1107
+ // Check the first operand. It must be: (srl y c).
1108
+ SDValue S = T0.getOperand (0 );
1109
+ if (S.getOpcode () != ISD::SRL)
1110
+ continue ;
1111
+ ConstantSDNode *SN = dyn_cast<ConstantSDNode>(S.getOperand (1 ).getNode ());
1112
+ if (SN == nullptr )
1113
+ continue ;
1114
+ if (SN->getAPIntValue ().getBitWidth () != 32 )
1115
+ continue ;
1116
+ uint32_t CV = SN->getZExtValue ();
1117
+
1118
+ // Check the second operand: the supposed mask.
1119
+ ConstantSDNode *MN = dyn_cast<ConstantSDNode>(T0.getOperand (1 ).getNode ());
1120
+ if (MN == nullptr )
1121
+ continue ;
1122
+ if (MN->getAPIntValue ().getBitWidth () != 32 )
1123
+ continue ;
1124
+ uint32_t Mask = MN->getZExtValue ();
1125
+ // Examine the mask.
1126
+ uint32_t TZ = countTrailingZeros (Mask);
1127
+ uint32_t M1 = countTrailingOnes (Mask >> TZ);
1128
+ uint32_t LZ = countLeadingZeros (Mask);
1129
+ // Trailing zeros + middle ones + leading zeros must equal the width.
1130
+ if (TZ + M1 + LZ != 32 )
1131
+ continue ;
1132
+ // The number of trailing zeros will be encoded in the addressing mode.
1133
+ if (TZ > 2 )
1134
+ continue ;
1135
+ // The number of leading zeros must be at most c.
1136
+ if (LZ > CV)
1137
+ continue ;
1138
+
1139
+ // All looks good.
1140
+ SDValue Y = S.getOperand (0 );
1141
+ EVT VT = Addr.getValueType ();
1142
+ SDLoc dl (S);
1143
+ // TZ = D-C, so D = TZ+C.
1144
+ SDValue D = DAG.getConstant (TZ+CV, dl, VT);
1145
+ SDValue DC = DAG.getConstant (TZ, dl, VT);
1146
+ SDValue NewSrl = DAG.getNode (ISD::SRL, dl, VT, Y, D);
1147
+ SDValue NewShl = DAG.getNode (ISD::SHL, dl, VT, NewSrl, DC);
1148
+ ReplaceNode (T0.getNode (), NewShl.getNode ());
1149
+ }
1150
+
1079
1151
if (EnableAddressRebalancing) {
1080
1152
rebalanceAddressTrees ();
1081
1153
0 commit comments