@@ -1173,7 +1173,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
1173
1173
return true ;
1174
1174
}
1175
1175
1176
- static bool isVectorElementTypeUpsized (EVT EltVT) {
1176
+ static bool isSubVectorPackedInI32 (EVT EltVT) {
1177
1177
// Despite vectors like v8i8, v16i8, v8i16 being within the bit-limit for
1178
1178
// total load/store size, PTX syntax only supports v2/v4. Thus, we can't use
1179
1179
// vectorized loads/stores with the actual element type for i8/i16 as that
@@ -1186,60 +1186,54 @@ static bool isVectorElementTypeUpsized(EVT EltVT) {
1186
1186
1187
1187
bool NVPTXDAGToDAGISel::tryLoadVector (SDNode *N) {
1188
1188
MemSDNode *MemSD = cast<MemSDNode>(N);
1189
- EVT LoadedVT = MemSD->getMemoryVT ();
1190
- if (!LoadedVT .isSimple ())
1189
+ const EVT MemEVT = MemSD->getMemoryVT ();
1190
+ if (!MemEVT .isSimple ())
1191
1191
return false ;
1192
+ const MVT MemVT = MemEVT.getSimpleVT ();
1192
1193
1193
1194
// Address Space Setting
1194
1195
unsigned int CodeAddrSpace = getCodeAddrSpace (MemSD);
1195
1196
if (canLowerToLDG (MemSD, *Subtarget, CodeAddrSpace, MF)) {
1196
1197
return tryLDGLDU (N);
1197
1198
}
1198
1199
1200
+ EVT EltVT = N->getValueType (0 );
1199
1201
SDLoc DL (N);
1200
1202
SDValue Chain = N->getOperand (0 );
1201
1203
auto [Ordering, Scope] = insertMemoryInstructionFence (DL, Chain, MemSD);
1202
1204
1203
- // Vector Setting
1204
- MVT SimpleVT = LoadedVT.getSimpleVT ();
1205
-
1206
1205
// Type Setting: fromType + fromTypeWidth
1207
1206
//
1208
1207
// Sign : ISD::SEXTLOAD
1209
1208
// Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
1210
1209
// type is integer
1211
1210
// Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
1212
- MVT ScalarVT = SimpleVT.getScalarType ();
1213
1211
// Read at least 8 bits (predicates are stored as 8-bit values)
1214
- unsigned FromTypeWidth = std::max (8U , (unsigned )ScalarVT.getSizeInBits ());
1215
- unsigned int FromType;
1216
1212
// The last operand holds the original LoadSDNode::getExtensionType() value
1217
- unsigned ExtensionType = cast<ConstantSDNode>(
1218
- N->getOperand (N->getNumOperands () - 1 ))->getZExtValue ();
1219
- if (ExtensionType == ISD::SEXTLOAD)
1220
- FromType = NVPTX::PTXLdStInstCode::Signed;
1221
- else
1222
- FromType = getLdStRegType (ScalarVT);
1213
+ const unsigned TotalWidth = MemVT.getSizeInBits ();
1214
+ unsigned ExtensionType = N->getConstantOperandVal (N->getNumOperands () - 1 );
1215
+ unsigned FromType = (ExtensionType == ISD::SEXTLOAD)
1216
+ ? NVPTX::PTXLdStInstCode::Signed
1217
+ : getLdStRegType (MemVT.getScalarType ());
1223
1218
1224
1219
unsigned VecType;
1225
-
1220
+ unsigned FromTypeWidth;
1226
1221
switch (N->getOpcode ()) {
1227
1222
case NVPTXISD::LoadV2:
1223
+ FromTypeWidth = TotalWidth / 2 ;
1228
1224
VecType = NVPTX::PTXLdStInstCode::V2;
1229
1225
break ;
1230
1226
case NVPTXISD::LoadV4:
1227
+ FromTypeWidth = TotalWidth / 4 ;
1231
1228
VecType = NVPTX::PTXLdStInstCode::V4;
1232
1229
break ;
1233
1230
default :
1234
1231
return false ;
1235
1232
}
1236
1233
1237
- EVT EltVT = N->getValueType (0 );
1238
-
1239
- if (isVectorElementTypeUpsized (EltVT)) {
1234
+ if (isSubVectorPackedInI32 (EltVT)) {
1240
1235
EltVT = MVT::i32 ;
1241
1236
FromType = NVPTX::PTXLdStInstCode::Untyped;
1242
- FromTypeWidth = 32 ;
1243
1237
}
1244
1238
1245
1239
SDValue Offset, Base;
@@ -1289,9 +1283,14 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1289
1283
// LDG/LDU SD node (from custom vector handling), then its the second operand
1290
1284
SDValue Op1 = N->getOperand (N->getOpcode () == ISD::INTRINSIC_W_CHAIN ? 2 : 1 );
1291
1285
1292
- EVT OrigType = N->getValueType (0 );
1286
+ const EVT OrigType = N->getValueType (0 );
1293
1287
EVT EltVT = Mem->getMemoryVT ();
1294
1288
unsigned NumElts = 1 ;
1289
+
1290
+ if (EltVT == MVT::i128 || EltVT == MVT::f128 ) {
1291
+ EltVT = MVT::i64 ;
1292
+ NumElts = 2 ;
1293
+ }
1295
1294
if (EltVT.isVector ()) {
1296
1295
NumElts = EltVT.getVectorNumElements ();
1297
1296
EltVT = EltVT.getVectorElementType ();
@@ -1311,11 +1310,9 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1311
1310
// Build the "promoted" result VTList for the load. If we are really loading
1312
1311
// i8s, then the return type will be promoted to i16 since we do not expose
1313
1312
// 8-bit registers in NVPTX.
1314
- EVT NodeVT = (EltVT == MVT::i8 ) ? MVT::i16 : EltVT;
1313
+ const EVT NodeVT = (EltVT == MVT::i8 ) ? MVT::i16 : EltVT;
1315
1314
SmallVector<EVT, 5 > InstVTs;
1316
- for (unsigned i = 0 ; i != NumElts; ++i) {
1317
- InstVTs.push_back (NodeVT);
1318
- }
1315
+ InstVTs.append (NumElts, NodeVT);
1319
1316
InstVTs.push_back (MVT::Other);
1320
1317
SDVTList InstVTList = CurDAG->getVTList (InstVTs);
1321
1318
SDValue Chain = N->getOperand (0 );
@@ -1494,6 +1491,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
1494
1491
EVT EltVT = Op1.getValueType ();
1495
1492
MemSDNode *MemSD = cast<MemSDNode>(N);
1496
1493
EVT StoreVT = MemSD->getMemoryVT ();
1494
+ assert (StoreVT.isSimple () && " Store value is not simple" );
1497
1495
1498
1496
// Address Space Setting
1499
1497
unsigned CodeAddrSpace = getCodeAddrSpace (MemSD);
@@ -1508,35 +1506,35 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
1508
1506
1509
1507
// Type Setting: toType + toTypeWidth
1510
1508
// - for integer type, always use 'u'
1511
- assert (StoreVT.isSimple () && " Store value is not simple" );
1512
- MVT ScalarVT = StoreVT.getSimpleVT ().getScalarType ();
1513
- unsigned ToTypeWidth = ScalarVT.getSizeInBits ();
1514
- unsigned ToType = getLdStRegType (ScalarVT);
1509
+ const unsigned TotalWidth = StoreVT.getSimpleVT ().getSizeInBits ();
1510
+ unsigned ToType = getLdStRegType (StoreVT.getSimpleVT ().getScalarType ());
1515
1511
1516
1512
SmallVector<SDValue, 12 > Ops;
1517
1513
SDValue N2;
1518
1514
unsigned VecType;
1515
+ unsigned ToTypeWidth;
1519
1516
1520
1517
switch (N->getOpcode ()) {
1521
1518
case NVPTXISD::StoreV2:
1522
1519
VecType = NVPTX::PTXLdStInstCode::V2;
1523
1520
Ops.append ({N->getOperand (1 ), N->getOperand (2 )});
1524
1521
N2 = N->getOperand (3 );
1522
+ ToTypeWidth = TotalWidth / 2 ;
1525
1523
break ;
1526
1524
case NVPTXISD::StoreV4:
1527
1525
VecType = NVPTX::PTXLdStInstCode::V4;
1528
1526
Ops.append ({N->getOperand (1 ), N->getOperand (2 ), N->getOperand (3 ),
1529
1527
N->getOperand (4 )});
1530
1528
N2 = N->getOperand (5 );
1529
+ ToTypeWidth = TotalWidth / 4 ;
1531
1530
break ;
1532
1531
default :
1533
1532
return false ;
1534
1533
}
1535
1534
1536
- if (isVectorElementTypeUpsized (EltVT)) {
1535
+ if (isSubVectorPackedInI32 (EltVT)) {
1537
1536
EltVT = MVT::i32 ;
1538
1537
ToType = NVPTX::PTXLdStInstCode::Untyped;
1539
- ToTypeWidth = 32 ;
1540
1538
}
1541
1539
1542
1540
SDValue Offset, Base;
0 commit comments