@@ -2182,6 +2182,84 @@ bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
2182
2182
return true ;
2183
2183
}
2184
2184
2185
+ // Helpers for constructing opcode (ex: NVPTX::StoreParamV4F32_iiri)
2186
+ #define getOpcV2H (ty, op0, op1 ) NVPTX::StoreParamV2##ty##_##op0##op1
2187
+
2188
+ #define getOpcV2H1 (ty, op0, op1 ) \
2189
+ (op1) ? getOpcV2H(ty, op0, i) : getOpcV2H(ty, op0, r)
2190
+
2191
+ #define getOpcodeForVectorStParamV2 (ty, isimm ) \
2192
+ (isimm[0 ]) ? getOpcV2H1(ty, i, isimm[1 ]) : getOpcV2H1(ty, r, isimm[1 ])
2193
+
2194
+ #define getOpcV4H (ty, op0, op1, op2, op3 ) \
2195
+ NVPTX::StoreParamV4##ty##_##op0##op1##op2##op3
2196
+
2197
+ #define getOpcV4H3 (ty, op0, op1, op2, op3 ) \
2198
+ (op3) ? getOpcV4H(ty, op0, op1, op2, i) : getOpcV4H(ty, op0, op1, op2, r)
2199
+
2200
+ #define getOpcV4H2 (ty, op0, op1, op2, op3 ) \
2201
+ (op2) ? getOpcV4H3(ty, op0, op1, i, op3) : getOpcV4H3(ty, op0, op1, r, op3)
2202
+
2203
+ #define getOpcV4H1 (ty, op0, op1, op2, op3 ) \
2204
+ (op1) ? getOpcV4H2(ty, op0, i, op2, op3) : getOpcV4H2(ty, op0, r, op2, op3)
2205
+
2206
+ #define getOpcodeForVectorStParamV4 (ty, isimm ) \
2207
+ (isimm[0 ]) ? getOpcV4H1(ty, i, isimm[1 ], isimm[2 ], isimm[3 ]) \
2208
+ : getOpcV4H1(ty, r, isimm[1 ], isimm[2 ], isimm[3 ])
2209
+
2210
+ #define getOpcodeForVectorStParam (n, ty, isimm ) \
2211
+ (n == 2 ) ? getOpcodeForVectorStParamV2(ty, isimm) \
2212
+ : getOpcodeForVectorStParamV4(ty, isimm)
2213
+
2214
+ static std::optional<unsigned >
2215
+ pickOpcodeForVectorStParam (SmallVector<SDValue, 8 > &Ops, unsigned NumElts,
2216
+ MVT::SimpleValueType MemTy, SelectionDAG *CurDAG,
2217
+ SDLoc DL) {
2218
+ // Determine which inputs are registers and immediates make new operators
2219
+ // with constant values
2220
+ SmallVector<bool , 4 > IsImm (NumElts, false );
2221
+ for (unsigned i = 0 ; i < NumElts; i++) {
2222
+ IsImm[i] = (isa<ConstantSDNode>(Ops[i]) || isa<ConstantFPSDNode>(Ops[i]));
2223
+ if (IsImm[i]) {
2224
+ SDValue Imm = Ops[i];
2225
+ if (MemTy == MVT::f32 || MemTy == MVT::f64 ) {
2226
+ const ConstantFPSDNode *ConstImm = cast<ConstantFPSDNode>(Imm);
2227
+ const ConstantFP *CF = ConstImm->getConstantFPValue ();
2228
+ Imm = CurDAG->getTargetConstantFP (*CF, DL, Imm->getValueType (0 ));
2229
+ } else {
2230
+ const ConstantSDNode *ConstImm = cast<ConstantSDNode>(Imm);
2231
+ const ConstantInt *CI = ConstImm->getConstantIntValue ();
2232
+ Imm = CurDAG->getTargetConstant (*CI, DL, Imm->getValueType (0 ));
2233
+ }
2234
+ Ops[i] = Imm;
2235
+ }
2236
+ }
2237
+
2238
+ // Get opcode for MemTy, size, and register/immediate operand ordering
2239
+ switch (MemTy) {
2240
+ case MVT::i8 :
2241
+ return getOpcodeForVectorStParam (NumElts, I8, IsImm);
2242
+ case MVT::i16 :
2243
+ return getOpcodeForVectorStParam (NumElts, I16, IsImm);
2244
+ case MVT::i32 :
2245
+ return getOpcodeForVectorStParam (NumElts, I32, IsImm);
2246
+ case MVT::i64 :
2247
+ if (NumElts == 4 )
2248
+ return std::nullopt;
2249
+ return getOpcodeForVectorStParamV2 (I64, IsImm);
2250
+ case MVT::f32 :
2251
+ return getOpcodeForVectorStParam (NumElts, F32, IsImm);
2252
+ case MVT::f64 :
2253
+ if (NumElts == 4 )
2254
+ return std::nullopt;
2255
+ return getOpcodeForVectorStParamV2 (F64, IsImm);
2256
+ case MVT::f16 :
2257
+ case MVT::v2f16:
2258
+ default :
2259
+ return std::nullopt;
2260
+ }
2261
+ }
2262
+
2185
2263
bool NVPTXDAGToDAGISel::tryStoreParam (SDNode *N) {
2186
2264
SDLoc DL (N);
2187
2265
SDValue Chain = N->getOperand (0 );
@@ -2228,12 +2306,34 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2228
2306
switch (NumElts) {
2229
2307
default :
2230
2308
return false ;
2231
- case 1 :
2232
- Opcode = pickOpcodeForVT (Mem->getMemoryVT ().getSimpleVT ().SimpleTy ,
2233
- NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2234
- NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2235
- NVPTX::StoreParamF32, NVPTX::StoreParamF64);
2236
- if (Opcode == NVPTX::StoreParamI8) {
2309
+ case 1 : {
2310
+ MVT::SimpleValueType MemTy = Mem->getMemoryVT ().getSimpleVT ().SimpleTy ;
2311
+ SDValue Imm = Ops[0 ];
2312
+ if (MemTy != MVT::f16 && MemTy != MVT::v2f16 &&
2313
+ (isa<ConstantSDNode>(Imm) || isa<ConstantFPSDNode>(Imm))) {
2314
+ // Convert immediate to target constant
2315
+ if (MemTy == MVT::f32 || MemTy == MVT::f64 ) {
2316
+ const ConstantFPSDNode *ConstImm = cast<ConstantFPSDNode>(Imm);
2317
+ const ConstantFP *CF = ConstImm->getConstantFPValue ();
2318
+ Imm = CurDAG->getTargetConstantFP (*CF, DL, Imm->getValueType (0 ));
2319
+ } else {
2320
+ const ConstantSDNode *ConstImm = cast<ConstantSDNode>(Imm);
2321
+ const ConstantInt *CI = ConstImm->getConstantIntValue ();
2322
+ Imm = CurDAG->getTargetConstant (*CI, DL, Imm->getValueType (0 ));
2323
+ }
2324
+ Ops[0 ] = Imm;
2325
+ // Use immediate version of store param
2326
+ Opcode = pickOpcodeForVT (MemTy, NVPTX::StoreParamI8_i,
2327
+ NVPTX::StoreParamI16_i, NVPTX::StoreParamI32_i,
2328
+ NVPTX::StoreParamI64_i, NVPTX::StoreParamF32_i,
2329
+ NVPTX::StoreParamF64_i);
2330
+ } else
2331
+ Opcode =
2332
+ pickOpcodeForVT (Mem->getMemoryVT ().getSimpleVT ().SimpleTy ,
2333
+ NVPTX::StoreParamI8_r, NVPTX::StoreParamI16_r,
2334
+ NVPTX::StoreParamI32_r, NVPTX::StoreParamI64_r,
2335
+ NVPTX::StoreParamF32_r, NVPTX::StoreParamF64_r);
2336
+ if (Opcode == NVPTX::StoreParamI8_r) {
2237
2337
// Fine tune the opcode depending on the size of the operand.
2238
2338
// This helps to avoid creating redundant COPY instructions in
2239
2339
// InstrEmitter::AddRegisterOperand().
@@ -2249,27 +2349,22 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2249
2349
}
2250
2350
}
2251
2351
break ;
2352
+ }
2252
2353
case 2 :
2253
- Opcode = pickOpcodeForVT (Mem->getMemoryVT ().getSimpleVT ().SimpleTy ,
2254
- NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2255
- NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2256
- NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
2257
- break ;
2258
- case 4 :
2259
- Opcode = pickOpcodeForVT (Mem->getMemoryVT ().getSimpleVT ().SimpleTy ,
2260
- NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2261
- NVPTX::StoreParamV4I32, std::nullopt,
2262
- NVPTX::StoreParamV4F32, std::nullopt);
2354
+ case 4 : {
2355
+ MVT::SimpleValueType MemTy = Mem->getMemoryVT ().getSimpleVT ().SimpleTy ;
2356
+ Opcode = pickOpcodeForVectorStParam (Ops, NumElts, MemTy, CurDAG, DL);
2263
2357
break ;
2264
2358
}
2359
+ }
2265
2360
if (!Opcode)
2266
2361
return false ;
2267
2362
break ;
2268
2363
// Special case: if we have a sign-extend/zero-extend node, insert the
2269
2364
// conversion instruction first, and use that as the value operand to
2270
2365
// the selected StoreParam node.
2271
2366
case NVPTXISD::StoreParamU32: {
2272
- Opcode = NVPTX::StoreParamI32 ;
2367
+ Opcode = NVPTX::StoreParamI32_r ;
2273
2368
SDValue CvtNone = CurDAG->getTargetConstant (NVPTX::PTXCvtMode::NONE, DL,
2274
2369
MVT::i32 );
2275
2370
SDNode *Cvt = CurDAG->getMachineNode (NVPTX::CVT_u32_u16, DL,
@@ -2278,7 +2373,7 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2278
2373
break ;
2279
2374
}
2280
2375
case NVPTXISD::StoreParamS32: {
2281
- Opcode = NVPTX::StoreParamI32 ;
2376
+ Opcode = NVPTX::StoreParamI32_r ;
2282
2377
SDValue CvtNone = CurDAG->getTargetConstant (NVPTX::PTXCvtMode::NONE, DL,
2283
2378
MVT::i32 );
2284
2379
SDNode *Cvt = CurDAG->getMachineNode (NVPTX::CVT_s32_s16, DL,
0 commit comments