@@ -18,6 +18,8 @@ SPDX-License-Identifier: MIT
18
18
#include < llvmWrapper/Support/Alignment.h>
19
19
#include < llvmWrapper/IR/DerivedTypes.h>
20
20
21
+ #include < cstddef>
22
+
21
23
using namespace llvm ;
22
24
using namespace IGC ;
23
25
@@ -213,6 +215,22 @@ void WIFuncResolution::visitCallInst(CallInst& CI)
213
215
214
216
*************************************************************************************************/
215
217
218
+ #pragma pack(push, 1)
219
+ namespace packed
220
+ {
221
+ #include " implicit_args_struct.h"
222
+ }
223
+ #pragma pack(pop)
224
+
225
+ #undef IMPLICIT_ARGS_STRUCT_H_
226
+ #include " implicit_args_struct.h"
227
+
228
+ // According to the ABI specification, implicit_args struct must be naturally aligned.
229
+ // To ensure that offsets to struct members are compiler-independent, it is necessary to
230
+ // align struct member offset to be divisible by the size of the member. It implicates
231
+ // inserting an additional padding member in some cases.
232
+ static_assert (sizeof (packed::implicit_args) == sizeof(implicit_args), "Implicit args struct is not properly aligned!");
233
+
216
234
// Structure of side buffer generated by NEO:
217
235
// struct implicit_args {
218
236
// uint8_t struct_size;
@@ -233,6 +251,8 @@ void WIFuncResolution::visitCallInst(CallInst& CI)
233
251
// uint32_t group_count_x;
234
252
// uint32_t group_count_y;
235
253
// uint32_t group_count_z;
254
+ // uint32_t padding0;
255
+ // uint64_t rt_global_buffer_ptr;
236
256
// };
237
257
238
258
// For SIMD8:
@@ -259,46 +279,6 @@ void WIFuncResolution::visitCallInst(CallInst& CI)
259
279
// uint16_t lz[32];
260
280
// };
261
281
262
-
263
- class GLOBAL_STATE_FIELD_OFFSETS
264
- {
265
- public:
266
- // This class holds offsets of various fields in side buffer
267
- static const uint32_t STRUCT_SIZE = 0 ;
268
-
269
- static const uint32_t VERSION = STRUCT_SIZE + sizeof (uint8_t );
270
-
271
- static const uint32_t NUM_WORK_DIM = VERSION + sizeof (uint8_t );
272
-
273
- static const uint32_t SIMDSIZE = NUM_WORK_DIM + sizeof (uint8_t );
274
-
275
- static const uint32_t LOCAL_SIZES = SIMDSIZE + sizeof (uint8_t );
276
- static const uint32_t LOCAL_SIZE_X = LOCAL_SIZES;
277
- static const uint32_t LOCAL_SIZE_Y = LOCAL_SIZE_X + sizeof (uint32_t );
278
- static const uint32_t LOCAL_SIZE_Z = LOCAL_SIZE_Y + sizeof (uint32_t );
279
-
280
- static const uint32_t GLOBAL_SIZES = LOCAL_SIZE_Z + sizeof (uint32_t );
281
- static const uint32_t GLOBAL_SIZE_X = GLOBAL_SIZES;
282
- static const uint32_t GLOBAL_SIZE_Y = GLOBAL_SIZE_X + sizeof (uint64_t );
283
- static const uint32_t GLOBAL_SIZE_Z = GLOBAL_SIZE_Y + sizeof (uint64_t );
284
-
285
- static const uint32_t PRINTF_BUFFER = GLOBAL_SIZE_Z + sizeof (uint64_t );
286
-
287
- static const uint32_t GLOBAL_OFFSETS = PRINTF_BUFFER + sizeof (uint64_t );
288
- static const uint32_t GLOBAL_OFFSET_X = GLOBAL_OFFSETS;
289
- static const uint32_t GLOBAL_OFFSET_Y = GLOBAL_OFFSET_X + sizeof (uint64_t );
290
- static const uint32_t GLOBAL_OFFSET_Z = GLOBAL_OFFSET_Y + sizeof (uint64_t );
291
-
292
- static const uint32_t LOCAL_IDS = GLOBAL_OFFSET_Z + sizeof (uint64_t );
293
-
294
- static const uint32_t GROUP_COUNTS = LOCAL_IDS + sizeof (uint64_t );
295
- static const uint32_t GROUP_COUNT_X = GROUP_COUNTS;
296
- static const uint32_t GROUP_COUNT_Y = GROUP_COUNT_X + sizeof (uint32_t );
297
- static const uint32_t GROUP_COUNT_Z = GROUP_COUNT_Y + sizeof (uint32_t );
298
-
299
- static const uint32_t TOTAL_SIZE = GROUP_COUNT_Z + sizeof (uint32_t );
300
- };
301
-
302
282
llvm::Value* LowerImplicitArgIntrinsics::BuildLoadInst (llvm::CallInst& CI, unsigned int Offset, llvm::Type* DataType)
303
283
{
304
284
// This function computes type aligned address that includes Offset.
@@ -357,9 +337,9 @@ llvm::Value* LowerImplicitArgIntrinsics::BuildLoadInst(llvm::CallInst& CI, unsig
357
337
358
338
if (Offset != AlignedOffset)
359
339
{
360
- auto ByteType = Type::getInt8Ty (Builder.getContext () );
361
- auto BitCastToByte = Builder.CreateBitCast (LoadedData, ByteType );
362
- Value* NewVector = UndefValue::get (IGCLLVM::FixedVectorType::get (ByteType , Size));
340
+ auto ByteVectorType = IGCLLVM::FixedVectorType::get (Builder.getInt8Ty (), LoadByteSize );
341
+ auto BitCastToByte = Builder.CreateBitCast (LoadedData, ByteVectorType );
342
+ Value* NewVector = UndefValue::get (IGCLLVM::FixedVectorType::get (Builder. getInt8Ty () , Size));
363
343
for (unsigned int I = Offset; I != (Offset + Size); ++I)
364
344
{
365
345
auto Elem = Builder.CreateExtractElement (BitCastToByte, I - AlignedOffset);
@@ -790,7 +770,7 @@ void LowerImplicitArgIntrinsics::visitCallInst(CallInst& CI)
790
770
791
771
// Get Local ID Base Ptr
792
772
auto DataTypeI64 = Type::getInt64Ty (F->getParent ()->getContext ());
793
- unsigned int Offset = GLOBAL_STATE_FIELD_OFFSETS::LOCAL_IDS ;
773
+ unsigned int Offset = offsetof (implicit_args, local_id_table_ptr) ;
794
774
auto LocalIDBase = BuildLoadInst (CI, Offset, DataTypeI64);
795
775
796
776
// Get local thread id
@@ -842,7 +822,7 @@ void LowerImplicitArgIntrinsics::visitCallInst(CallInst& CI)
842
822
// Assume local size and enqueued local size are the same
843
823
auto ElemTypeD = Type::getInt32Ty (F->getParent ()->getContext ());
844
824
auto VecTyD = IGCLLVM::FixedVectorType::get (ElemTypeD, 3 );
845
- unsigned int Offset = GLOBAL_STATE_FIELD_OFFSETS::LOCAL_SIZE_X ;
825
+ unsigned int Offset = offsetof (implicit_args, local_size_x) ;
846
826
auto LoadInst = BuildLoadInst (CI, Offset, VecTyD);
847
827
V = LoadInst;
848
828
break ;
@@ -851,7 +831,7 @@ void LowerImplicitArgIntrinsics::visitCallInst(CallInst& CI)
851
831
{
852
832
// global_offset is loaded from PayloadHeader[0:2]
853
833
// currently there are no other uses for payload header.
854
- unsigned int Offset = GLOBAL_STATE_FIELD_OFFSETS::GLOBAL_OFFSET_X ;
834
+ unsigned int Offset = offsetof (implicit_args, global_offset_x) ;
855
835
auto ElemTypeD = Type::getInt32Ty (F->getParent ()->getContext ());
856
836
auto VecTyQ = IGCLLVM::FixedVectorType::get (Type::getInt64Ty (F->getParent ()->getContext ()), 3 );
857
837
auto LoadInst = BuildLoadInst (CI, Offset, VecTyQ);
@@ -868,7 +848,7 @@ void LowerImplicitArgIntrinsics::visitCallInst(CallInst& CI)
868
848
case GenISAIntrinsic::GenISA_getGlobalSize:
869
849
case GenISAIntrinsic::GenISA_getStageInGridSize:
870
850
{
871
- unsigned int Offset = GLOBAL_STATE_FIELD_OFFSETS::GLOBAL_SIZE_X ;
851
+ unsigned int Offset = offsetof (implicit_args, global_size_x) ;
872
852
auto VecTyQ = IGCLLVM::FixedVectorType::get (Type::getInt64Ty (F->getParent ()->getContext ()), 3 );
873
853
auto ElemTypeD = Type::getInt32Ty (F->getParent ()->getContext ());
874
854
auto LoadInst = BuildLoadInst (CI, Offset, VecTyQ);
@@ -886,15 +866,15 @@ void LowerImplicitArgIntrinsics::visitCallInst(CallInst& CI)
886
866
{
887
867
auto ElemTypeUD = Type::getInt32Ty (F->getParent ()->getContext ());
888
868
auto VecTyUD = IGCLLVM::FixedVectorType::get (ElemTypeUD, 3 );
889
- unsigned int Offset = GLOBAL_STATE_FIELD_OFFSETS::GROUP_COUNT_X ;
869
+ unsigned int Offset = offsetof (implicit_args, group_count_x) ;
890
870
auto LoadInst = BuildLoadInst (CI, Offset, VecTyUD);
891
871
V = LoadInst;
892
872
break ;
893
873
}
894
874
case GenISAIntrinsic::GenISA_getWorkDim:
895
875
{
896
876
unsigned int Size = 4 ;
897
- unsigned int Offset = GLOBAL_STATE_FIELD_OFFSETS::NUM_WORK_DIM / Size;
877
+ unsigned int Offset = offsetof (implicit_args, num_work_dim) / Size;
898
878
auto TypeUD = Type::getInt32Ty (F->getParent ()->getContext ());
899
879
auto LoadInst = BuildLoadInst (CI, Offset, TypeUD);
900
880
auto LShr = Builder.CreateLShr (LoadInst, (uint64_t )16 );
@@ -906,12 +886,20 @@ void LowerImplicitArgIntrinsics::visitCallInst(CallInst& CI)
906
886
{
907
887
// This function is invoked when expanding printf call to retrieve printf buffer ptr.
908
888
auto DataTypeI64 = Type::getInt64Ty (CI.getFunction ()->getParent ()->getContext ());
909
- unsigned int Offset = GLOBAL_STATE_FIELD_OFFSETS::PRINTF_BUFFER ;
889
+ unsigned int Offset = offsetof (implicit_args, printf_buffer_ptr) ;
910
890
auto Result = BuildLoadInst (CI, Offset, DataTypeI64);
911
891
Result = Builder.CreateIntToPtr (Result, CI.getType ());
912
892
V = Result;
913
893
break ;
914
894
}
895
+ case GenISAIntrinsic::GenISA_getRtGlobalBufferPtr:
896
+ {
897
+ unsigned int Offset = offsetof (implicit_args, rt_global_buffer_ptr);
898
+ auto Result = BuildLoadInst (CI, Offset, Builder.getInt64Ty ());
899
+ Result = Builder.CreateIntToPtr (Result, CI.getType ());
900
+ V = Result;
901
+ break ;
902
+ }
915
903
default :
916
904
break ;
917
905
}
0 commit comments