@@ -39,7 +39,7 @@ declare a function as a kernel function. This metadata is attached to the
39
39
40
40
.. code-block :: llvm
41
41
42
- !0 = metadata !{<function-ref>, metadata !"kernel", i32 1}
42
+ !0 = !{<function-ref>, metadata !"kernel", i32 1}
43
43
44
44
The first parameter is a reference to the kernel function. The following
45
45
example shows a kernel function calling a device function in LLVM IR. The
@@ -54,14 +54,14 @@ function ``@my_kernel`` is callable from host code, but ``@my_fmad`` is not.
54
54
}
55
55
56
56
define void @my_kernel(float* %ptr) {
57
- %val = load float* %ptr
57
+ %val = load float, float * %ptr
58
58
%ret = call float @my_fmad(float %val, float %val, float %val)
59
59
store float %ret, float* %ptr
60
60
ret void
61
61
}
62
62
63
63
!nvvm.annotations = !{!1}
64
- !1 = metadata !{void (float*)* @my_kernel, metadata !"kernel", i32 1}
64
+ !1 = !{void (float*)* @my_kernel, !"kernel", i32 1}
65
65
66
66
When compiled, the PTX kernel functions are callable by host-side code.
67
67
@@ -446,13 +446,13 @@ The Kernel
446
446
%id = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() readnone nounwind
447
447
448
448
; Compute pointers into A, B, and C
449
- %ptrA = getelementptr float addrspace(1)* %A, i32 %id
450
- %ptrB = getelementptr float addrspace(1)* %B, i32 %id
451
- %ptrC = getelementptr float addrspace(1)* %C, i32 %id
449
+ %ptrA = getelementptr float, float addrspace(1)* %A, i32 %id
450
+ %ptrB = getelementptr float, float addrspace(1)* %B, i32 %id
451
+ %ptrC = getelementptr float, float addrspace(1)* %C, i32 %id
452
452
453
453
; Read A, B
454
- %valA = load float addrspace(1)* %ptrA, align 4
455
- %valB = load float addrspace(1)* %ptrB, align 4
454
+ %valA = load float, float addrspace(1)* %ptrA, align 4
455
+ %valB = load float, float addrspace(1)* %ptrB, align 4
456
456
457
457
; Compute C = A + B
458
458
%valC = fadd float %valA, %valB
@@ -464,9 +464,9 @@ The Kernel
464
464
}
465
465
466
466
!nvvm.annotations = !{!0}
467
- !0 = metadata !{void (float addrspace(1)*,
468
- float addrspace(1)*,
469
- float addrspace(1)*)* @kernel, metadata !"kernel", i32 1}
467
+ !0 = !{void (float addrspace(1)*,
468
+ float addrspace(1)*,
469
+ float addrspace(1)*)* @kernel, !"kernel", i32 1}
470
470
471
471
472
472
We can use the LLVM ``llc `` tool to directly run the NVPTX code generator:
@@ -608,16 +608,16 @@ as a PTX `kernel` function. These metadata nodes take the form:
608
608
609
609
.. code-block :: text
610
610
611
- metadata !{<function ref>, metadata !"kernel", i32 1}
611
+ !{<function ref>, metadata !"kernel", i32 1}
612
612
613
613
For the previous example, we have:
614
614
615
615
.. code-block :: llvm
616
616
617
617
!nvvm.annotations = !{!0}
618
- !0 = metadata !{void (float addrspace(1)*,
619
- float addrspace(1)*,
620
- float addrspace(1)*)* @kernel, metadata !"kernel", i32 1}
618
+ !0 = !{void (float addrspace(1)*,
619
+ float addrspace(1)*,
620
+ float addrspace(1)*)* @kernel, !"kernel", i32 1}
621
621
622
622
Here, we have a single metadata declaration in ``nvvm.annotations ``. This
623
623
metadata annotates our ``@kernel `` function with the ``kernel `` attribute.
@@ -830,13 +830,13 @@ Libdevice provides an ``__nv_powf`` function that we will use.
830
830
%id = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() readnone nounwind
831
831
832
832
; Compute pointers into A, B, and C
833
- %ptrA = getelementptr float addrspace(1)* %A, i32 %id
834
- %ptrB = getelementptr float addrspace(1)* %B, i32 %id
835
- %ptrC = getelementptr float addrspace(1)* %C, i32 %id
833
+ %ptrA = getelementptr float, float addrspace(1)* %A, i32 %id
834
+ %ptrB = getelementptr float, float addrspace(1)* %B, i32 %id
835
+ %ptrC = getelementptr float, float addrspace(1)* %C, i32 %id
836
836
837
837
; Read A, B
838
- %valA = load float addrspace(1)* %ptrA, align 4
839
- %valB = load float addrspace(1)* %ptrB, align 4
838
+ %valA = load float, float addrspace(1)* %ptrA, align 4
839
+ %valB = load float, float addrspace(1)* %ptrB, align 4
840
840
841
841
; Compute C = pow(A, B)
842
842
%valC = call float @__nv_powf(float %valA, float %valB)
@@ -848,9 +848,9 @@ Libdevice provides an ``__nv_powf`` function that we will use.
848
848
}
849
849
850
850
!nvvm.annotations = !{!0}
851
- !0 = metadata !{void (float addrspace(1)*,
852
- float addrspace(1)*,
853
- float addrspace(1)*)* @kernel, metadata !"kernel", i32 1}
851
+ !0 = !{void (float addrspace(1)*,
852
+ float addrspace(1)*,
853
+ float addrspace(1)*)* @kernel, !"kernel", i32 1}
854
854
855
855
856
856
To compile this kernel, we perform the following steps:
0 commit comments