@@ -156,8 +156,8 @@ void OpenCLPrintfResolution::visitCallInst(CallInst& callInst)
156
156
}
157
157
158
158
StringRef funcName = callInst.getCalledFunction ()->getName ();
159
- if (funcName == OpenCLPrintfAnalysis::OPENCL_PRINTF_FUNCTION_NAME)
160
- {
159
+ if (funcName == OpenCLPrintfAnalysis::OPENCL_PRINTF_FUNCTION_NAME ||
160
+ funcName == OpenCLPrintfAnalysis::BUILTIN_PRINTF_FUNCTION_NAME) {
161
161
m_printfCalls.push_back (&callInst);
162
162
}
163
163
}
@@ -358,12 +358,18 @@ std::string OpenCLPrintfResolution::getPrintfStringsMDNodeName(Function& F)
358
358
return " printf.strings" ;
359
359
}
360
360
361
- static StoreInst* genStoreInternal (Value* Val, Value* Ptr, BasicBlock* InsertAtEnd, DebugLoc DL)
361
+ static StoreInst* genStoreInternal (Value* Val, Value* Ptr, BasicBlock* InsertAtEnd, DebugLoc DL, bool isNontemporal )
362
362
{
363
363
bool isVolatile = false ;
364
364
unsigned Align = 4 ;
365
365
auto SI = new llvm::StoreInst (Val, Ptr, isVolatile, IGCLLVM::getCorrectAlign (Align), InsertAtEnd);
366
366
SI->setDebugLoc (DL);
367
+ if (isNontemporal) {
368
+ Constant *One = ConstantInt::get (Type::getInt32Ty (SI->getContext ()), 1 );
369
+ MDNode *Node =
370
+ MDNode::get (SI->getContext (), ConstantAsMetadata::get (One));
371
+ SI->setMetadata (LLVMContext::MD_nontemporal, Node);
372
+ }
367
373
return SI;
368
374
}
369
375
@@ -419,10 +425,23 @@ void OpenCLPrintfResolution::expandPrintfCall(CallInst& printfCall, Function& F)
419
425
// printf returns -1 if failed |
420
426
return_val = -1; /
421
427
}
428
+
429
+ We also support printf to any provided buffer.
430
+ This is done with special builtin with following signature:
431
+ int __builtin_IB_printf_to_buffer(global char* buf, global char* currentOffset, int bufSize, ...);
432
+ buf - pointer to the begging of the buffer.
433
+ currentOffset - pointer to the location with the current offset that will be atomically incremented.
434
+ In the case of regular printf this offset is on the first DWORD of printfBuffer.
435
+ E.g. in assert buffer it is on the third DWORD.
436
+ bufSize - total size of the buffer.
437
+ Note: in the case of builtin printf, all the stores will be nontemporal.
438
+
439
+
422
440
----------------------------------------------------------------------
423
441
*/
424
442
MetaDataUtils* MdUtils = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils ();
425
443
ImplicitArgs implicitArgs (F, MdUtils);
444
+ bool isPrintfBuiltin = OpenCLPrintfAnalysis::isBuiltinPrintf (printfCall.getCalledFunction ());
426
445
427
446
BasicBlock* currentBBlock = printfCall.getParent ();
428
447
@@ -431,10 +450,14 @@ void OpenCLPrintfResolution::expandPrintfCall(CallInst& printfCall, Function& F)
431
450
preprocessPrintfArgs (printfCall);
432
451
433
452
// writeOffset = atomic_add(bufferPtr, dataSize)
434
- Value* basebufferPtr = implicitArgs.getImplicitArgValue (F, ImplicitArg::PRINTF_BUFFER, MdUtils);
453
+ Value *basebufferPtr = isPrintfBuiltin
454
+ ? printfCall.getArgOperand (0 )
455
+ : implicitArgs.getImplicitArgValue (
456
+ F, ImplicitArg::PRINTF_BUFFER, MdUtils);
435
457
436
458
Value* dataSizeVal = ConstantInt::get (m_int32Type, getTotalDataSize ());
437
- Instruction* writeOffsetStart = genAtomicAdd (basebufferPtr, dataSizeVal, printfCall, " write_offset" );
459
+ Value* currentOffsetPtr = isPrintfBuiltin ? printfCall.getArgOperand (1 ) : basebufferPtr;
460
+ Instruction* writeOffsetStart = genAtomicAdd (currentOffsetPtr, dataSizeVal, printfCall, " write_offset" );
438
461
writeOffsetStart->setDebugLoc (m_DL);
439
462
440
463
Instruction* writeOffset = writeOffsetStart;
@@ -444,7 +467,11 @@ void OpenCLPrintfResolution::expandPrintfCall(CallInst& printfCall, Function& F)
444
467
Instruction* endOffset = BinaryOperator::CreateAdd (writeOffset, dataSizeVal, " end_offset" , &printfCall);
445
468
endOffset->setDebugLoc (m_DL);
446
469
447
- Value* bufferMaxSize = ConstantInt::get (m_int32Type, m_CGContext->m_DriverInfo .getPrintfBufferSize ());
470
+ Value *bufferMaxSize =
471
+ isPrintfBuiltin
472
+ ? printfCall.getArgOperand (2 )
473
+ : ConstantInt::get (m_int32Type,
474
+ m_CGContext->m_DriverInfo .getPrintfBufferSize ());
448
475
449
476
// write_ptr = buffer_ptr + write_offset;
450
477
if (m_ptrSizeIntType != writeOffset->getType ())
@@ -509,7 +536,7 @@ void OpenCLPrintfResolution::expandPrintfCall(CallInst& printfCall, Function& F)
509
536
writeOffsetPtr = CastInst::Create (Instruction::CastOps::IntToPtr, writeOffset,
510
537
m_int32Type->getPointerTo (ADDRESS_SPACE_GLOBAL), " write_offset_ptr" , bblockTrue);
511
538
writeOffsetPtr->setDebugLoc (m_DL);
512
- genStoreInternal (argTypeVal, writeOffsetPtr, bblockTrue, m_DL);
539
+ genStoreInternal (argTypeVal, writeOffsetPtr, bblockTrue, m_DL, isPrintfBuiltin );
513
540
514
541
// write_offset += 4
515
542
writeOffset = BinaryOperator::CreateAdd (writeOffset, constVal4, " write_offset" , bblockTrue);
@@ -521,7 +548,7 @@ void OpenCLPrintfResolution::expandPrintfCall(CallInst& printfCall, Function& F)
521
548
writeOffsetPtr = CastInst::Create (Instruction::CastOps::IntToPtr, writeOffset,
522
549
m_int32Type->getPointerTo (ADDRESS_SPACE_GLOBAL), " write_offset_ptr" , bblockTrue);
523
550
writeOffsetPtr->setDebugLoc (m_DL);
524
- genStoreInternal (vecSizeVal, writeOffsetPtr, bblockTrue, m_DL);
551
+ genStoreInternal (vecSizeVal, writeOffsetPtr, bblockTrue, m_DL, isPrintfBuiltin );
525
552
526
553
// write_offset += 4
527
554
writeOffset = BinaryOperator::CreateAdd (writeOffset, constVal4, " write_offset" , bblockTrue);
@@ -542,7 +569,7 @@ void OpenCLPrintfResolution::expandPrintfCall(CallInst& printfCall, Function& F)
542
569
}
543
570
544
571
// *write_offset = argument[i].value
545
- genStoreInternal (printfArg, writeOffsetPtr, bblockTrue, m_DL);
572
+ genStoreInternal (printfArg, writeOffsetPtr, bblockTrue, m_DL, isPrintfBuiltin );
546
573
547
574
// write_offset += argument[i].size
548
575
Value* offsetInc = ConstantInt::get (m_ptrSizeIntType, getArgTypeSize (dataType, argDesc->vecSize ));
@@ -582,7 +609,7 @@ void OpenCLPrintfResolution::expandPrintfCall(CallInst& printfCall, Function& F)
582
609
" write_offset_ptr" ,
583
610
bblockErrorString);
584
611
writeOffsetPtr->setDebugLoc (m_DL);
585
- genStoreInternal (constValErrStringIdx, writeOffsetPtr, bblockErrorString, m_DL);
612
+ genStoreInternal (constValErrStringIdx, writeOffsetPtr, bblockErrorString, m_DL, isPrintfBuiltin );
586
613
brInst = BranchInst::Create (bblockFalseJoin, bblockErrorString);
587
614
brInst->setDebugLoc (m_DL);
588
615
@@ -680,7 +707,13 @@ Value* OpenCLPrintfResolution::fixupPrintfArg(CallInst& printfCall, Value* arg,
680
707
681
708
void OpenCLPrintfResolution::preprocessPrintfArgs (CallInst& printfCall)
682
709
{
683
- for (int i = 0 , numArgs = IGCLLVM::getNumArgOperands (&printfCall); i < numArgs; ++i)
710
+ int i = 0 ;
711
+ if (OpenCLPrintfAnalysis::isBuiltinPrintf (printfCall.getCalledFunction ())) {
712
+ // printf builtin function has buffer pointer, current offset pointer and buffer size as first three arguments.
713
+ // Skip them here, as we want to collect the arguments starting from format string.
714
+ i = 3 ;
715
+ }
716
+ for (int numArgs = IGCLLVM::getNumArgOperands (&printfCall); i < numArgs; ++i)
684
717
{
685
718
Value* arg = printfCall.getOperand (i);
686
719
Type* argType = arg->getType ();
0 commit comments