Skip to content

Commit da7462a

Browse files
authored
[NVPTX] Add tex.grad.cube{array} intrinsics (llvm#77693)
Extend IR support for PTX `tex` instruction described in [PTX ISA. 9.7.9.3. Texture Instructions: tex](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#texture-instructions-tex). Add support for unified-move versions of `tex.grad.cube{array}` variants added in PTX ISA 4.3.
1 parent 735adbf commit da7462a

File tree

7 files changed

+200
-11
lines changed

7 files changed

+200
-11
lines changed

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2499,6 +2499,47 @@ def int_nvvm_tex_unified_cube_array_level_v4u32_f32
24992499
llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
25002500
"llvm.nvvm.tex.unified.cube.array.level.v4u32.f32">;
25012501

2502+
def int_nvvm_tex_unified_cube_grad_v4f32_f32
2503+
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2504+
[llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2505+
llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2506+
llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2507+
"llvm.nvvm.tex.unified.cube.grad.v4f32.f32">;
2508+
def int_nvvm_tex_unified_cube_grad_v4s32_f32
2509+
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2510+
[llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2511+
llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2512+
llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2513+
"llvm.nvvm.tex.unified.cube.grad.v4s32.f32">;
2514+
def int_nvvm_tex_unified_cube_grad_v4u32_f32
2515+
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2516+
[llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2517+
llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2518+
llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2519+
"llvm.nvvm.tex.unified.cube.grad.v4u32.f32">;
2520+
2521+
def int_nvvm_tex_unified_cube_array_grad_v4f32_f32
2522+
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2523+
[llvm_i64_ty, llvm_i32_ty,
2524+
llvm_float_ty, llvm_float_ty,
2525+
llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2526+
llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2527+
"llvm.nvvm.tex.unified.cube.array.grad.v4f32.f32">;
2528+
def int_nvvm_tex_unified_cube_array_grad_v4s32_f32
2529+
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2530+
[llvm_i64_ty, llvm_i32_ty,
2531+
llvm_float_ty, llvm_float_ty,
2532+
llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2533+
llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2534+
"llvm.nvvm.tex.unified.cube.array.grad.v4s32.f32">;
2535+
def int_nvvm_tex_unified_cube_array_grad_v4u32_f32
2536+
: Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2537+
[llvm_i64_ty, llvm_i32_ty,
2538+
llvm_float_ty, llvm_float_ty,
2539+
llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2540+
llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2541+
"llvm.nvvm.tex.unified.cube.array.grad.v4u32.f32">;
2542+
25022543
def int_nvvm_tld4_unified_r_2d_v4f32_f32
25032544
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
25042545
[llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,12 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
309309
case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
310310
case NVPTXISD::TexUnifiedCubeArrayU32Float:
311311
case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
312+
case NVPTXISD::TexUnifiedCubeFloatFloatGrad:
313+
case NVPTXISD::TexUnifiedCubeS32FloatGrad:
314+
case NVPTXISD::TexUnifiedCubeU32FloatGrad:
315+
case NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad:
316+
case NVPTXISD::TexUnifiedCubeArrayS32FloatGrad:
317+
case NVPTXISD::TexUnifiedCubeArrayU32FloatGrad:
312318
case NVPTXISD::Tld4UnifiedR2DFloatFloat:
313319
case NVPTXISD::Tld4UnifiedG2DFloatFloat:
314320
case NVPTXISD::Tld4UnifiedB2DFloatFloat:
@@ -2763,6 +2769,24 @@ bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
27632769
case NVPTXISD::Tld4UnifiedA2DU64Float:
27642770
Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R;
27652771
break;
2772+
case NVPTXISD::TexUnifiedCubeFloatFloatGrad:
2773+
Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_GRAD_R;
2774+
break;
2775+
case NVPTXISD::TexUnifiedCubeS32FloatGrad:
2776+
Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_GRAD_R;
2777+
break;
2778+
case NVPTXISD::TexUnifiedCubeU32FloatGrad:
2779+
Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_GRAD_R;
2780+
break;
2781+
case NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad:
2782+
Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD_R;
2783+
break;
2784+
case NVPTXISD::TexUnifiedCubeArrayS32FloatGrad:
2785+
Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD_R;
2786+
break;
2787+
case NVPTXISD::TexUnifiedCubeArrayU32FloatGrad:
2788+
Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD_R;
2789+
break;
27662790
}
27672791

27682792
// Copy over operands

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1257,6 +1257,18 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
12571257
return "NVPTXISD::TexUnifiedCubeArrayU32Float";
12581258
case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
12591259
return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel";
1260+
case NVPTXISD::TexUnifiedCubeFloatFloatGrad:
1261+
return "NVPTXISD::TexUnifiedCubeFloatFloatGrad";
1262+
case NVPTXISD::TexUnifiedCubeS32FloatGrad:
1263+
return "NVPTXISD::TexUnifiedCubeS32FloatGrad";
1264+
case NVPTXISD::TexUnifiedCubeU32FloatGrad:
1265+
return "NVPTXISD::TexUnifiedCubeU32FloatGrad";
1266+
case NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad:
1267+
return "NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad";
1268+
case NVPTXISD::TexUnifiedCubeArrayS32FloatGrad:
1269+
return "NVPTXISD::TexUnifiedCubeArrayS32FloatGrad";
1270+
case NVPTXISD::TexUnifiedCubeArrayU32FloatGrad:
1271+
return "NVPTXISD::TexUnifiedCubeArrayU32FloatGrad";
12601272
case NVPTXISD::Tld4UnifiedR2DFloatFloat:
12611273
return "NVPTXISD::Tld4UnifiedR2DFloatFloat";
12621274
case NVPTXISD::Tld4UnifiedG2DFloatFloat:
@@ -3654,6 +3666,19 @@ static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
36543666
case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
36553667
return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel;
36563668

3669+
case Intrinsic::nvvm_tex_unified_cube_grad_v4f32_f32:
3670+
return NVPTXISD::TexUnifiedCubeFloatFloatGrad;
3671+
case Intrinsic::nvvm_tex_unified_cube_grad_v4s32_f32:
3672+
return NVPTXISD::TexUnifiedCubeS32FloatGrad;
3673+
case Intrinsic::nvvm_tex_unified_cube_grad_v4u32_f32:
3674+
return NVPTXISD::TexUnifiedCubeU32FloatGrad;
3675+
case Intrinsic::nvvm_tex_unified_cube_array_grad_v4f32_f32:
3676+
return NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad;
3677+
case Intrinsic::nvvm_tex_unified_cube_array_grad_v4s32_f32:
3678+
return NVPTXISD::TexUnifiedCubeArrayS32FloatGrad;
3679+
case Intrinsic::nvvm_tex_unified_cube_array_grad_v4u32_f32:
3680+
return NVPTXISD::TexUnifiedCubeArrayU32FloatGrad;
3681+
36573682
case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
36583683
return NVPTXISD::Tld4UnifiedR2DFloatFloat;
36593684
case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
@@ -4538,6 +4563,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
45384563
case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
45394564
case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
45404565
case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
4566+
case Intrinsic::nvvm_tex_unified_cube_grad_v4f32_f32:
4567+
case Intrinsic::nvvm_tex_unified_cube_array_grad_v4f32_f32:
45414568
case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
45424569
case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
45434570
case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
@@ -4654,6 +4681,10 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
46544681
case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
46554682
case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
46564683
case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
4684+
case Intrinsic::nvvm_tex_unified_cube_grad_v4s32_f32:
4685+
case Intrinsic::nvvm_tex_unified_cube_grad_v4u32_f32:
4686+
case Intrinsic::nvvm_tex_unified_cube_array_grad_v4s32_f32:
4687+
case Intrinsic::nvvm_tex_unified_cube_array_grad_v4u32_f32:
46574688
case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
46584689
case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
46594690
case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:

llvm/lib/Target/NVPTX/NVPTXISelLowering.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,12 @@ enum NodeType : unsigned {
240240
TexUnifiedCubeArrayS32FloatLevel,
241241
TexUnifiedCubeArrayU32Float,
242242
TexUnifiedCubeArrayU32FloatLevel,
243+
TexUnifiedCubeFloatFloatGrad,
244+
TexUnifiedCubeS32FloatGrad,
245+
TexUnifiedCubeU32FloatGrad,
246+
TexUnifiedCubeArrayFloatFloatGrad,
247+
TexUnifiedCubeArrayS32FloatGrad,
248+
TexUnifiedCubeArrayU32FloatGrad,
243249
Tld4UnifiedR2DFloatFloat,
244250
Tld4UnifiedG2DFloatFloat,
245251
Tld4UnifiedB2DFloatFloat,

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3764,6 +3764,62 @@ defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
37643764
: TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
37653765
Int32Regs, Float32Regs>;
37663766

3767+
class TEX_UNIFIED_CUBE_GRAD_base<string inst, NVPTXRegClass outtype,
3768+
NVPTXRegClass intype, dag tex>
3769+
: NVPTXInst<(outs outtype:$r, outtype:$g,
3770+
outtype:$b, outtype:$a),
3771+
!con(tex, (ins intype:$x, intype:$y, intype:$z,
3772+
intype:$gradx0, intype:$gradx1,
3773+
intype:$gradx2, intype:$grady0,
3774+
intype:$grady1, intype:$grady2)),
3775+
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
3776+
" \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3777+
" \\{$grady0, $grady1, $grady2, $grady2\\};",
3778+
[]>;
3779+
3780+
multiclass TEX_UNIFIED_CUBE_GRAD<string inst, NVPTXRegClass outtype,
3781+
NVPTXRegClass intype> {
3782+
def _R : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3783+
def _I : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3784+
}
3785+
3786+
defm TEX_UNIFIED_CUBE_F32_F32_GRAD
3787+
: TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3788+
defm TEX_UNIFIED_CUBE_S32_F32_GRAD
3789+
: TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3790+
defm TEX_UNIFIED_CUBE_U32_F32_GRAD
3791+
: TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3792+
3793+
class TEX_UNIFIED_CUBE_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3794+
NVPTXRegClass intype, dag tex>
3795+
: NVPTXInst<(outs outtype:$r, outtype:$g,
3796+
outtype:$b, outtype:$a),
3797+
!con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
3798+
intype:$gradx0, intype:$gradx1,
3799+
intype:$gradx2, intype:$grady0,
3800+
intype:$grady1, intype:$grady2)),
3801+
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}],"
3802+
" \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3803+
" \\{$grady0, $grady1, $grady2, $grady2\\};",
3804+
[]>;
3805+
multiclass TEX_UNIFIED_CUBE_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3806+
NVPTXRegClass intype> {
3807+
def _R : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
3808+
(ins Int64Regs:$t)>;
3809+
def _I : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
3810+
(ins i64imm:$t)>;
3811+
}
3812+
3813+
defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD
3814+
: TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.f32.f32",
3815+
Float32Regs, Float32Regs>;
3816+
defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD
3817+
: TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.s32.f32",
3818+
Int32Regs, Float32Regs>;
3819+
defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD
3820+
: TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.u32.f32",
3821+
Int32Regs, Float32Regs>;
3822+
37673823
class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
37683824
NVPTXRegClass intype, dag tex>
37693825
: NVPTXInst<(outs outtype:$v0, outtype:$v1,

llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1319,6 +1319,18 @@ static unsigned texRegisterToIndexOpcode(unsigned RegOC) {
13191319
return NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_I;
13201320
case NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R:
13211321
return NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_I;
1322+
case NVPTX::TEX_UNIFIED_CUBE_F32_F32_GRAD_R:
1323+
return NVPTX::TEX_UNIFIED_CUBE_F32_F32_GRAD_I;
1324+
case NVPTX::TEX_UNIFIED_CUBE_S32_F32_GRAD_R:
1325+
return NVPTX::TEX_UNIFIED_CUBE_S32_F32_GRAD_I;
1326+
case NVPTX::TEX_UNIFIED_CUBE_U32_F32_GRAD_R:
1327+
return NVPTX::TEX_UNIFIED_CUBE_U32_F32_GRAD_I;
1328+
case NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD_R:
1329+
return NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD_I;
1330+
case NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD_R:
1331+
return NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD_I;
1332+
case NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD_R:
1333+
return NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD_I;
13221334
case NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R:
13231335
return NVPTX::TLD4_UNIFIED_R_2D_F32_F32_I;
13241336
case NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R:

llvm/test/CodeGen/NVPTX/surf-tex.py

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# RUN: %python %s --target=cuda --tests=suld,sust,tex,tld4 --gen-list=%t.list > %t-cuda.ll
2-
# RUN: llc -mcpu=sm_20 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll
3-
# RUN: %if ptxas %{ llc -mcpu=sm_20 %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %}
2+
# RUN: llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | FileCheck %t-cuda.ll --check-prefixes=CHECK,CHECK-CUDA
3+
# RUN: %if ptxas %{ llc -mcpu=sm_60 -mattr=+ptx43 %t-cuda.ll -verify-machineinstrs -o - | %ptxas-verify %}
44

55
# We only need to run this second time for texture tests, because
66
# there is a difference between unified and non-unified intrinsics.
77
#
88
# RUN: %python %s --target=nvcl --tests=suld,sust,tex,tld4 --gen-list-append --gen-list=%t.list > %t-nvcl.ll
9-
# RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll
9+
# RUN: llc %t-nvcl.ll -verify-machineinstrs -o - | FileCheck %t-nvcl.ll --check-prefixes=CHECK,CHECK-NVCL
1010
# RUN: %if ptxas %{ llc %t-nvcl.ll -verify-machineinstrs -o - | %ptxas-verify %}
1111

1212
# Verify that all instructions and intrinsics defined in TableGen
@@ -115,6 +115,15 @@ def get_llvm_value_type(vec, ty_ptx):
115115
return value[vec].format(ty=ty)
116116

117117

118+
id_counter = 0
119+
120+
121+
def get_table_gen_id():
122+
global id_counter
123+
id_counter += 1
124+
return id_counter
125+
126+
118127
def gen_triple(target):
119128
if target == "cuda":
120129
print('target triple = "nvptx64-unknown-cuda"\n')
@@ -260,8 +269,9 @@ def gen_suld_tests(target, global_surf):
260269
ret void
261270
}
262271
; CHECK-LABEL: .entry ${test_name}_global
263-
; CHECK: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}]
264-
;
272+
; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_surf}
273+
; CHECK-CUDA: ${instruction} ${reg_ret}, [[[REG${reg_id}]], ${reg_access}]
274+
; CHECK-NVCL: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}]
265275
define void @${test_name}_global(${retty}* %ret, ${access}) {
266276
%gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
267277
%val = tail call ${retty} @${intrinsic}(i64 %gs, ${access})
@@ -304,6 +314,7 @@ def gen_suld_tests(target, global_surf):
304314
"reg_ret": get_ptx_vec_reg(vec, dtype),
305315
"reg_surf": get_ptx_surface(target),
306316
"reg_access": get_ptx_surface_access(geom),
317+
"reg_id": get_table_gen_id(),
307318
}
308319
gen_test(template, params)
309320
generated_items.append((params["intrinsic"], params["instruction"]))
@@ -353,8 +364,9 @@ def gen_sust_tests(target, global_surf):
353364
ret void
354365
}
355366
; CHECK-LABEL: .entry ${test_name}_global
356-
; CHECK: ${instruction} [${global_surf}, ${reg_access}], ${reg_value}
357-
;
367+
; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_surf}
368+
; CHECK-CUDA: ${instruction} [[[REG${reg_id}]], ${reg_access}], ${reg_value}
369+
; CHECK-NVCL: ${instruction} [${global_surf}, ${reg_access}], ${reg_value}
358370
define void @${test_name}_global(${value}, ${access}) {
359371
%gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf})
360372
tail call void @${intrinsic}(i64 %gs, ${access}, ${value})
@@ -408,6 +420,7 @@ def gen_sust_tests(target, global_surf):
408420
"reg_value": get_ptx_vec_reg(vec, ctype),
409421
"reg_surf": get_ptx_surface(target),
410422
"reg_access": get_ptx_surface_access(geom),
423+
"reg_id": get_table_gen_id(),
411424
}
412425
gen_test(template, params)
413426
generated_items.append((params["intrinsic"], params["instruction"]))
@@ -614,7 +627,9 @@ def gen_tex_tests(target, global_tex, global_sampler):
614627
ret void
615628
}
616629
; CHECK-LABEL: .entry ${test_name}_global
617-
; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
630+
; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_tex}
631+
; CHECK-CUDA: ${instruction} ${ptx_ret}, [[[REG${reg_id}]], ${ptx_global_sampler} ${ptx_access}]
632+
; CHECK-NVCL: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
618633
define void @${test_name}_global(${retty}* %ret, ${access}) {
619634
%gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
620635
${get_sampler_handle}
@@ -656,8 +671,8 @@ def gen_tex_tests(target, global_tex, global_sampler):
656671

657672
# FIXME: missing intrinsics.
658673
# Support for tex.grad.{cube, acube} introduced in PTX ISA version
659-
# 4.3.
660-
if mipmap == "grad" and geom in ("cube", "acube"):
674+
# 4.3, currently supported only in unified mode.
675+
if not is_unified(target) and mipmap == "grad" and geom in ("cube", "acube"):
661676
continue
662677

663678
# The instruction returns a two-element vector for destination
@@ -698,6 +713,7 @@ def gen_tex_tests(target, global_tex, global_sampler):
698713
"ptx_tex": get_ptx_texture(target),
699714
"ptx_access": get_ptx_texture_access(geom, ctype),
700715
"ptx_global_sampler": get_ptx_global_sampler(target, global_sampler),
716+
"reg_id": get_table_gen_id(),
701717
}
702718
gen_test(template, params)
703719
generated_items.append((params["intrinsic"], params["instruction"]))
@@ -798,7 +814,9 @@ def gen_tld4_tests(target, global_tex, global_sampler):
798814
ret void
799815
}
800816
; CHECK-LABEL: .entry ${test_name}_global
801-
; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
817+
; CHECK-CUDA: mov.u64 [[REG${reg_id}:%.*]], ${global_tex}
818+
; CHECK-CUDA: ${instruction} ${ptx_ret}, [[[REG${reg_id}]], ${ptx_global_sampler} ${ptx_access}]
819+
; CHECK-NVCL: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}]
802820
define void @${test_name}_global(${retty}* %ret, ${access}) {
803821
%gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex})
804822
${get_sampler_handle}
@@ -844,6 +862,7 @@ def gen_tld4_tests(target, global_tex, global_sampler):
844862
"ptx_tex": get_ptx_texture(target),
845863
"ptx_access": get_ptx_tld4_access(geom),
846864
"ptx_global_sampler": get_ptx_global_sampler(target, global_sampler),
865+
"reg_id": get_table_gen_id(),
847866
}
848867
gen_test(template, params)
849868
generated_items.append((params["intrinsic"], params["instruction"]))

0 commit comments

Comments
 (0)