-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[NVPTX] Add 'activemask' builtin and intrinsic support #79768
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,7 +40,7 @@ foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53, | |
|
||
def SM90a: FeatureSM<"90a", 901>; | ||
|
||
foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 63, 64, 65, | ||
foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65, | ||
70, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81, 82, 83] in | ||
def PTX#version: FeaturePTX<version>; | ||
|
||
|
@@ -65,7 +65,7 @@ def : Proc<"sm_61", [SM61, PTX50]>; | |
def : Proc<"sm_62", [SM62, PTX50]>; | ||
def : Proc<"sm_70", [SM70, PTX60]>; | ||
def : Proc<"sm_72", [SM72, PTX61]>; | ||
def : Proc<"sm_75", [SM75, PTX63]>; | ||
def : Proc<"sm_75", [SM75, PTX62, PTX63]>; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we adding PTX62 here? According to PTX docs sm_75 has been introduced in PTX ISA 6.3 in CUDA-10.0. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah I wasn't sure where it should go. The docs specify it's PTX62, but I couldn't find which one that came from, so I just put it before 63. Maybe on 72? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What are you trying to do with PTX62 feature to start with? Why do you need to add it here to start with? In general, the features will be supplied externally. This particular place just sets the minimum required to support this particular GPU variant. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay, so I can just get rid of it for this definition and it will still work? I could've just said it came with 63 and been lazy I suppose. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm confused a bit here. Constraints on PTX version for GPU and for instrunctions are independent. You need both satisfied in order to use a given instruction on a given GPU. So, to use activemask on sm_75, you do need PTX63. You do not need to change anything here. You already have correct predicates applied to the instruction itself and to the target builtin. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay, so I'll remove it from the definition here and just add the PTX62. I don't have the fullest understanding of how this pTX stuff works. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should be fixed now, also I added the one for |
||
def : Proc<"sm_80", [SM80, PTX70]>; | ||
def : Proc<"sm_86", [SM86, PTX71]>; | ||
def : Proc<"sm_87", [SM87, PTX74]>; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
; RUN: llc < %s -march=nvptx64 -O2 -mcpu=sm_52 -mattr=+ptx62 | FileCheck %s | ||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_52 -mattr=+ptx62 | %ptxas-verify %} | ||
|
||
declare i32 @llvm.nvvm.activemask() | ||
|
||
; CHECK-LABEL: activemask( | ||
; | ||
; CHECK: activemask.b32 %[[REG:.+]]; | ||
; CHECK-NEXT: st.param.b32 [func_retval0+0], %[[REG]]; | ||
; CHECK-NEXT: ret; | ||
define dso_local i32 @activemask() { | ||
entry: | ||
%mask = call i32 @llvm.nvvm.activemask() | ||
ret i32 %mask | ||
} | ||
|
||
; CHECK-LABEL: convergent( | ||
; | ||
; CHECK: activemask.b32 %[[REG:.+]]; | ||
; CHECK: activemask.b32 %[[REG]]; | ||
; CHECK: .param.b32 [func_retval0+0], %[[REG]]; | ||
; CHECK-NEXT: ret; | ||
define dso_local i32 @convergent(i1 %cond) { | ||
entry: | ||
br i1 %cond, label %if.else, label %if.then | ||
|
||
if.then: | ||
%0 = call i32 @llvm.nvvm.activemask() | ||
br label %if.end | ||
|
||
if.else: | ||
%1 = call i32 @llvm.nvvm.activemask() | ||
br label %if.end | ||
|
||
if.end: | ||
%mask = phi i32 [ %0, %if.then ], [ %1, %if.else ] | ||
ret i32 %mask | ||
} |
Uh oh!
There was an error while loading. Please reload this page.