Skip to content

Commit b60b3d9

Browse files
author
z1_cciauto
authored
merge main into amd-staging (llvm#2708)
2 parents d026212 + d675534 commit b60b3d9

File tree

176 files changed

+8105
-3428
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

176 files changed

+8105
-3428
lines changed

clang/include/clang/AST/StmtOpenMP.h

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5787,10 +5787,13 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective {
57875787
TransformedStmtOffset,
57885788
};
57895789

5790-
explicit OMPReverseDirective(SourceLocation StartLoc, SourceLocation EndLoc)
5790+
explicit OMPReverseDirective(SourceLocation StartLoc, SourceLocation EndLoc,
5791+
unsigned NumLoops)
57915792
: OMPLoopTransformationDirective(OMPReverseDirectiveClass,
57925793
llvm::omp::OMPD_reverse, StartLoc,
5793-
EndLoc, 1) {}
5794+
EndLoc, NumLoops) {
5795+
setNumGeneratedLoops(NumLoops);
5796+
}
57945797

57955798
void setPreInits(Stmt *PreInits) {
57965799
Data->getChildren()[PreInitsOffset] = PreInits;
@@ -5806,19 +5809,23 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective {
58065809
/// \param C Context of the AST.
58075810
/// \param StartLoc Location of the introducer (e.g. the 'omp' token).
58085811
/// \param EndLoc Location of the directive's end (e.g. the tok::eod).
5812+
/// \param NumLoops Number of affected loops
58095813
/// \param AssociatedStmt The outermost associated loop.
58105814
/// \param TransformedStmt The loop nest after tiling, or nullptr in
58115815
/// dependent contexts.
58125816
/// \param PreInits Helper preinits statements for the loop nest.
5813-
static OMPReverseDirective *
5814-
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
5815-
Stmt *AssociatedStmt, Stmt *TransformedStmt, Stmt *PreInits);
5817+
static OMPReverseDirective *Create(const ASTContext &C,
5818+
SourceLocation StartLoc,
5819+
SourceLocation EndLoc,
5820+
Stmt *AssociatedStmt, unsigned NumLoops,
5821+
Stmt *TransformedStmt, Stmt *PreInits);
58165822

58175823
/// Build an empty '#pragma omp reverse' AST node for deserialization.
58185824
///
58195825
/// \param C Context of the AST.
5820-
/// \param NumClauses Number of clauses to allocate.
5821-
static OMPReverseDirective *CreateEmpty(const ASTContext &C);
5826+
/// \param NumLoops Number of associated loops to allocate
5827+
static OMPReverseDirective *CreateEmpty(const ASTContext &C,
5828+
unsigned NumLoops);
58225829

58235830
/// Gets/sets the associated loops after the transformation, i.e. after
58245831
/// de-sugaring.
@@ -5857,7 +5864,7 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective {
58575864
: OMPLoopTransformationDirective(OMPInterchangeDirectiveClass,
58585865
llvm::omp::OMPD_interchange, StartLoc,
58595866
EndLoc, NumLoops) {
5860-
setNumGeneratedLoops(3 * NumLoops);
5867+
setNumGeneratedLoops(NumLoops);
58615868
}
58625869

58635870
void setPreInits(Stmt *PreInits) {

clang/lib/AST/StmtOpenMP.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -471,18 +471,21 @@ OMPUnrollDirective *OMPUnrollDirective::CreateEmpty(const ASTContext &C,
471471
OMPReverseDirective *
472472
OMPReverseDirective::Create(const ASTContext &C, SourceLocation StartLoc,
473473
SourceLocation EndLoc, Stmt *AssociatedStmt,
474-
Stmt *TransformedStmt, Stmt *PreInits) {
474+
unsigned NumLoops, Stmt *TransformedStmt,
475+
Stmt *PreInits) {
475476
OMPReverseDirective *Dir = createDirective<OMPReverseDirective>(
476-
C, {}, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc);
477+
C, {}, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc,
478+
NumLoops);
477479
Dir->setTransformedStmt(TransformedStmt);
478480
Dir->setPreInits(PreInits);
479481
return Dir;
480482
}
481483

482-
OMPReverseDirective *OMPReverseDirective::CreateEmpty(const ASTContext &C) {
484+
OMPReverseDirective *OMPReverseDirective::CreateEmpty(const ASTContext &C,
485+
unsigned NumLoops) {
483486
return createEmptyDirective<OMPReverseDirective>(
484487
C, /*NumClauses=*/0, /*HasAssociatedStmt=*/true,
485-
TransformedStmtOffset + 1, SourceLocation(), SourceLocation());
488+
TransformedStmtOffset + 1, SourceLocation(), SourceLocation(), NumLoops);
486489
}
487490

488491
OMPInterchangeDirective *OMPInterchangeDirective::Create(

clang/lib/CodeGen/CGHLSLRuntime.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,7 @@ static llvm::Value *createSPIRVBuiltinLoad(IRBuilder<> &B, llvm::Module &M,
375375
llvm::GlobalVariable::GeneralDynamicTLSModel,
376376
/* AddressSpace */ 7, /* isExternallyInitialized= */ true);
377377
addSPIRVBuiltinDecoration(GV, BuiltInID);
378+
GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
378379
return B.CreateLoad(Ty, GV);
379380
}
380381

clang/lib/Headers/__clang_cuda_intrinsics.h

Lines changed: 0 additions & 284 deletions
Original file line numberDiff line numberDiff line change
@@ -479,290 +479,6 @@ inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32,
479479
return ret;
480480
}
481481

482-
#pragma push_macro("__INTRINSIC_LOAD")
483-
#define __INTRINSIC_LOAD(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType, \
484-
__Clobber) \
485-
inline __device__ __DeclType __FnName(const __DeclType *__ptr) { \
486-
__TmpType __ret; \
487-
asm(__AsmOp " %0, [%1];" : __AsmType(__ret) : "l"(__ptr)__Clobber); \
488-
return (__DeclType)__ret; \
489-
}
490-
491-
#pragma push_macro("__INTRINSIC_LOAD2")
492-
#define __INTRINSIC_LOAD2(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType, \
493-
__Clobber) \
494-
inline __device__ __DeclType __FnName(const __DeclType *__ptr) { \
495-
__DeclType __ret; \
496-
__TmpType __tmp; \
497-
asm(__AsmOp " {%0,%1}, [%2];" \
498-
: __AsmType(__tmp.x), __AsmType(__tmp.y) \
499-
: "l"(__ptr)__Clobber); \
500-
using __ElementType = decltype(__ret.x); \
501-
__ret.x = (__ElementType)(__tmp.x); \
502-
__ret.y = (__ElementType)__tmp.y; \
503-
return __ret; \
504-
}
505-
506-
#pragma push_macro("__INTRINSIC_LOAD4")
507-
#define __INTRINSIC_LOAD4(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType, \
508-
__Clobber) \
509-
inline __device__ __DeclType __FnName(const __DeclType *__ptr) { \
510-
__DeclType __ret; \
511-
__TmpType __tmp; \
512-
asm(__AsmOp " {%0,%1,%2,%3}, [%4];" \
513-
: __AsmType(__tmp.x), __AsmType(__tmp.y), __AsmType(__tmp.z), \
514-
__AsmType(__tmp.w) \
515-
: "l"(__ptr)__Clobber); \
516-
using __ElementType = decltype(__ret.x); \
517-
__ret.x = (__ElementType)__tmp.x; \
518-
__ret.y = (__ElementType)__tmp.y; \
519-
__ret.z = (__ElementType)__tmp.z; \
520-
__ret.w = (__ElementType)__tmp.w; \
521-
return __ret; \
522-
}
523-
524-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s8", char, unsigned int, "=r", );
525-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s8", signed char, unsigned int, "=r", );
526-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s16", short, unsigned short, "=h", );
527-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s32", int, unsigned int, "=r", );
528-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s64", long long, unsigned long long,
529-
"=l", );
530-
531-
__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s8", char2, int2, "=r", );
532-
__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.s8", char4, int4, "=r", );
533-
__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s16", short2, short2, "=h", );
534-
__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.s16", short4, short4, "=h", );
535-
__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s32", int2, int2, "=r", );
536-
__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.s32", int4, int4, "=r", );
537-
__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s64 ", longlong2, longlong2, "=l", );
538-
539-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.u8", unsigned char, unsigned int,
540-
"=r", );
541-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.u16", unsigned short, unsigned short,
542-
"=h", );
543-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.u32", unsigned int, unsigned int,
544-
"=r", );
545-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.u64", unsigned long long,
546-
unsigned long long, "=l", );
547-
548-
__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u8", uchar2, int2, "=r", );
549-
__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.u8", uchar4, int4, "=r", );
550-
__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u16", ushort2, ushort2, "=h", );
551-
__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.u16", ushort4, ushort4, "=h", );
552-
__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u32", uint2, uint2, "=r", );
553-
__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.u32", uint4, uint4, "=r", );
554-
__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u64", ulonglong2, ulonglong2,
555-
"=l", );
556-
557-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.f32", float, float, "=f", );
558-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.f64", double, double, "=d", );
559-
__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.f32", float2, float2, "=f", );
560-
__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.f32", float4, float4, "=f", );
561-
__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.f64", double2, double2, "=d", );
562-
563-
inline __device__ long __ldcg(const long *__ptr) {
564-
unsigned long __ret;
565-
if (sizeof(long) == 8) {
566-
asm("ld.global.cg.s64 %0, [%1];" : "=l"(__ret) : "l"(__ptr));
567-
} else {
568-
asm("ld.global.cg.s32 %0, [%1];" : "=r"(__ret) : "l"(__ptr));
569-
}
570-
return (long)__ret;
571-
}
572-
573-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.u8", unsigned char, unsigned int,
574-
"=r", : "memory");
575-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.u16", unsigned short, unsigned short,
576-
"=h", : "memory");
577-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.u32", unsigned int, unsigned int,
578-
"=r", : "memory");
579-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.u64", unsigned long long,
580-
unsigned long long, "=l", : "memory");
581-
582-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s8", char, unsigned int,
583-
"=r", : "memory");
584-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s8", signed char, unsigned int,
585-
"=r", : "memory");
586-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s16", short, unsigned short,
587-
"=h", : "memory");
588-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s32", int, unsigned int,
589-
"=r", : "memory");
590-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s64", long long, unsigned long long,
591-
"=l", : "memory");
592-
593-
__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u8", uchar2, uint2,
594-
"=r", : "memory");
595-
__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.u8", uchar4, uint4,
596-
"=r", : "memory");
597-
__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u16", ushort2, ushort2,
598-
"=h", : "memory");
599-
__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.u16", ushort4, ushort4,
600-
"=h", : "memory");
601-
__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u32", uint2, uint2,
602-
"=r", : "memory");
603-
__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.u32", uint4, uint4,
604-
"=r", : "memory");
605-
__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u64", ulonglong2, ulonglong2,
606-
"=l", : "memory");
607-
608-
__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s8", char2, int2, "=r", : "memory");
609-
__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.s8", char4, int4, "=r", : "memory");
610-
__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s16", short2, short2,
611-
"=h", : "memory");
612-
__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.s16", short4, short4,
613-
"=h", : "memory");
614-
__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s32", int2, int2, "=r", : "memory");
615-
__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.s32", int4, int4, "=r", : "memory");
616-
__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s64", longlong2, longlong2,
617-
"=l", : "memory");
618-
619-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.f32", float, float, "=f", : "memory");
620-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.f64", double, double, "=d", : "memory");
621-
622-
__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.f32", float2, float2,
623-
"=f", : "memory");
624-
__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.f32", float4, float4,
625-
"=f", : "memory");
626-
__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.f64", double2, double2,
627-
"=d", : "memory");
628-
629-
inline __device__ long __ldcv(const long *__ptr) {
630-
unsigned long __ret;
631-
if (sizeof(long) == 8) {
632-
asm("ld.global.cv.s64 %0, [%1];" : "=l"(__ret) : "l"(__ptr));
633-
} else {
634-
asm("ld.global.cv.s32 %0, [%1];" : "=r"(__ret) : "l"(__ptr));
635-
}
636-
return (long)__ret;
637-
}
638-
639-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s8", char, unsigned int, "=r", );
640-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s8", signed char, signed int, "=r", );
641-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s16", short, unsigned short, "=h", );
642-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s32", int, unsigned int, "=r", );
643-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s64", long long, unsigned long long,
644-
"=l", );
645-
646-
__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s8", char2, int2, "=r", );
647-
__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.s8", char4, int4, "=r", );
648-
__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s16", short2, short2, "=h", );
649-
__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.s16", short4, short4, "=h", );
650-
__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s32", int2, int2, "=r", );
651-
__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.s32", int4, int4, "=r", );
652-
__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s64", longlong2, longlong2, "=l", );
653-
654-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.u8", unsigned char, unsigned int,
655-
"=r", );
656-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.u16", unsigned short, unsigned short,
657-
"=h", );
658-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.u32", unsigned int, unsigned int,
659-
"=r", );
660-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.u64", unsigned long long,
661-
unsigned long long, "=l", );
662-
663-
__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u8", uchar2, uint2, "=r", );
664-
__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.u8", uchar4, uint4, "=r", );
665-
__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u16", ushort2, ushort2, "=h", );
666-
__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.u16", ushort4, ushort4, "=h", );
667-
__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u32", uint2, uint2, "=r", );
668-
__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.u32", uint4, uint4, "=r", );
669-
__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u64", ulonglong2, ulonglong2,
670-
"=l", );
671-
672-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.f32", float, float, "=f", );
673-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.f64", double, double, "=d", );
674-
__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.f32", float2, float2, "=f", );
675-
__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.f32", float4, float4, "=f", );
676-
__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.f64", double2, double2, "=d", );
677-
678-
#pragma pop_macro("__INTRINSIC_LOAD")
679-
#pragma pop_macro("__INTRINSIC_LOAD2")
680-
#pragma pop_macro("__INTRINSIC_LOAD4")
681-
682-
inline __device__ long __ldcs(const long *__ptr) {
683-
unsigned long __ret;
684-
if (sizeof(long) == 8) {
685-
asm("ld.global.cs.s64 %0, [%1];" : "=l"(__ret) : "l"(__ptr));
686-
} else {
687-
asm("ld.global.cs.s32 %0, [%1];" : "=r"(__ret) : "l"(__ptr));
688-
}
689-
return (long)__ret;
690-
}
691-
692-
#pragma push_macro("__INTRINSIC_STORE")
693-
#define __INTRINSIC_STORE(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType) \
694-
inline __device__ void __FnName(__DeclType *__ptr, __DeclType __value) { \
695-
__TmpType __tmp = (__TmpType)__value; \
696-
asm(__AsmOp " [%0], %1;" ::"l"(__ptr), __AsmType(__tmp) : "memory"); \
697-
}
698-
699-
#pragma push_macro("__INTRINSIC_STORE2")
700-
#define __INTRINSIC_STORE2(__FnName, __AsmOp, __DeclType, __TmpType, \
701-
__AsmType) \
702-
inline __device__ void __FnName(__DeclType *__ptr, __DeclType __value) { \
703-
__TmpType __tmp; \
704-
using __ElementType = decltype(__tmp.x); \
705-
__tmp.x = (__ElementType)(__value.x); \
706-
__tmp.y = (__ElementType)(__value.y); \
707-
asm(__AsmOp " [%0], {%1,%2};" ::"l"(__ptr), __AsmType(__tmp.x), \
708-
__AsmType(__tmp.y) \
709-
: "memory"); \
710-
}
711-
712-
#pragma push_macro("__INTRINSIC_STORE4")
713-
#define __INTRINSIC_STORE4(__FnName, __AsmOp, __DeclType, __TmpType, \
714-
__AsmType) \
715-
inline __device__ void __FnName(__DeclType *__ptr, __DeclType __value) { \
716-
__TmpType __tmp; \
717-
using __ElementType = decltype(__tmp.x); \
718-
__tmp.x = (__ElementType)(__value.x); \
719-
__tmp.y = (__ElementType)(__value.y); \
720-
__tmp.z = (__ElementType)(__value.z); \
721-
__tmp.w = (__ElementType)(__value.w); \
722-
asm(__AsmOp " [%0], {%1,%2,%3,%4};" ::"l"(__ptr), __AsmType(__tmp.x), \
723-
__AsmType(__tmp.y), __AsmType(__tmp.z), __AsmType(__tmp.w) \
724-
: "memory"); \
725-
}
726-
727-
__INTRINSIC_STORE(__stwt, "st.global.wt.s8", char, int, "r");
728-
__INTRINSIC_STORE(__stwt, "st.global.wt.s8", signed char, int, "r");
729-
__INTRINSIC_STORE(__stwt, "st.global.wt.s16", short, short, "h");
730-
__INTRINSIC_STORE(__stwt, "st.global.wt.s32", int, int, "r");
731-
__INTRINSIC_STORE(__stwt, "st.global.wt.s64", long long, long long, "l");
732-
733-
__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s8", char2, int2, "r");
734-
__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.s8", char4, int4, "r");
735-
__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s16", short2, short2, "h");
736-
__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.s16", short4, short4, "h");
737-
__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s32", int2, int2, "r");
738-
__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.s32", int4, int4, "r");
739-
__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s64", longlong2, longlong2, "l");
740-
741-
__INTRINSIC_STORE(__stwt, "st.global.wt.u8", unsigned char, int, "r");
742-
__INTRINSIC_STORE(__stwt, "st.global.wt.u16", unsigned short, unsigned short,
743-
"h");
744-
__INTRINSIC_STORE(__stwt, "st.global.wt.u32", unsigned int, unsigned int, "r");
745-
__INTRINSIC_STORE(__stwt, "st.global.wt.u64", unsigned long long,
746-
unsigned long long, "l");
747-
748-
__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u8", uchar2, uchar2, "r");
749-
__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.u8", uchar4, uint4, "r");
750-
__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u16", ushort2, ushort2, "h");
751-
__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.u16", ushort4, ushort4, "h");
752-
__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u32", uint2, uint2, "r");
753-
__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.u32", uint4, uint4, "r");
754-
__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u64", ulonglong2, ulonglong2, "l");
755-
756-
__INTRINSIC_STORE(__stwt, "st.global.wt.f32", float, float, "f");
757-
__INTRINSIC_STORE(__stwt, "st.global.wt.f64", double, double, "d");
758-
__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.f32", float2, float2, "f");
759-
__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.f32", float4, float4, "f");
760-
__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.f64", double2, double2, "d");
761-
762-
#pragma pop_macro("__INTRINSIC_STORE")
763-
#pragma pop_macro("__INTRINSIC_STORE2")
764-
#pragma pop_macro("__INTRINSIC_STORE4")
765-
766482
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320
767483

768484
#if CUDA_VERSION >= 11000

0 commit comments

Comments
 (0)