@@ -98,10 +98,6 @@ class ModelState : public BackendModel {
98
98
return enable_jit_executor_pair_;
99
99
}
100
100
bool EnabledInferenceMode () { return enable_inference_mode_; }
101
- const std::pair<bool , bool >& EnabledNvfuserPair () const
102
- {
103
- return enable_nvfuser_pair_;
104
- }
105
101
bool EnabledCacheCleaning () { return enable_cache_cleaning_; }
106
102
107
103
bool EnabledWeightSharing () { return enable_weight_sharing_; }
@@ -132,16 +128,11 @@ class ModelState : public BackendModel {
132
128
133
129
// Flag pairs to indicate if various JIT settings are set and
134
130
// enabled respectively. Defaults to (false, true). Default behavior
135
- // is to do nothing if not explicitly set. Tensor fuser flag is
136
- // ignore if nvfuser is explicitly set.
131
+ // is to do nothing if not explicitly set.
137
132
std::pair<bool , bool > enable_tensor_fuser_pair_;
138
133
std::pair<bool , bool > enable_jit_profiling_pair_;
139
134
std::pair<bool , bool > enable_jit_executor_pair_;
140
135
141
- // Flag pair to indicate whether nvfuser is set and enabled respectively.
142
- // Defaults to (false, false).
143
- std::pair<bool , bool > enable_nvfuser_pair_;
144
-
145
136
// Model mapping for shared TorchScript model across all instances on the
146
137
// same device. The key is a pair of isGPU and device index.
147
138
std::map<
@@ -233,8 +224,7 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
233
224
enable_inference_mode_ (true ), enable_cache_cleaning_(false ),
234
225
enable_weight_sharing_(false ), enable_tensor_fuser_pair_({false , true }),
235
226
enable_jit_profiling_pair_({false , true }),
236
- enable_jit_executor_pair_({false , true }),
237
- enable_nvfuser_pair_({false , false })
227
+ enable_jit_executor_pair_({false , true })
238
228
{
239
229
}
240
230
@@ -475,29 +465,6 @@ ModelState::ParseParameters()
475
465
" for model instance '" + Name () + " '" )
476
466
.c_str ());
477
467
}
478
-
479
- // If 'ENABLE_NVFUSER' is not present in 'parameters' then no
480
- // update is made to 'enable_nvfuser'.
481
- bool enable_nvfuser = false ;
482
- err = ParseParameter (params, " ENABLE_NVFUSER" , &enable_nvfuser);
483
- if (err != nullptr ) {
484
- if (TRITONSERVER_ErrorCode (err) != TRITONSERVER_ERROR_NOT_FOUND) {
485
- return err;
486
- } else {
487
- LOG_MESSAGE (
488
- TRITONSERVER_LOG_INFO, (std::string (" NvFuser is not specified" ) +
489
- " for model instance '" + Name () + " '" )
490
- .c_str ());
491
- TRITONSERVER_ErrorDelete (err);
492
- }
493
- } else {
494
- enable_nvfuser_pair_ = {true , enable_nvfuser};
495
- LOG_MESSAGE (
496
- TRITONSERVER_LOG_INFO, (std::string (" NvFuser is " ) +
497
- (enable_nvfuser ? " enabled" : " disabled" ) +
498
- " for model instance '" + Name () + " '" )
499
- .c_str ());
500
- }
501
468
}
502
469
503
470
return nullptr ;
@@ -1552,34 +1519,13 @@ ModelInstanceState::Execute(
1552
1519
std::get<1 >(model_state_->EnabledJitExecutor ());
1553
1520
}
1554
1521
1555
- // Fuser. Parameter is ignored if NVFuser parameter is explicitly
1556
- // set (either enabled or disabled). No change is made unless
1557
- // fuser is explicitly set in parameters.
1558
- if (!std::get<0 >(model_state_->EnabledNvfuserPair ()) &&
1559
- std::get<0 >(model_state_->EnabledTensorExprFuser ())) {
1522
+ // Fuser. No change is made unless fuser is explicitly set in
1523
+ // parameters.
1524
+ if (std::get<0 >(model_state_->EnabledTensorExprFuser ())) {
1560
1525
torch::jit::setTensorExprFuserEnabled (
1561
1526
std::get<1 >(model_state_->EnabledTensorExprFuser ()));
1562
1527
}
1563
1528
1564
- // NV-Fuser. No change is made unless parameter is explicitly set.
1565
- if (std::get<0 >(model_state_->EnabledNvfuserPair ())) {
1566
- bool is_device_gpu =
1567
- (device_.is_cuda () ||
1568
- ((Kind () == TRITONSERVER_INSTANCEGROUPKIND_MODEL) &&
1569
- (device_cnt_ > 0 )));
1570
- if (std::get<1 >(model_state_->EnabledNvfuserPair ()) && is_device_gpu) {
1571
- torch::jit::overrideCanFuseOnCPU (false );
1572
- torch::jit::overrideCanFuseOnGPU (false );
1573
- torch::jit::setTensorExprFuserEnabled (false );
1574
- torch::jit::fuser::cuda::setEnabled (true );
1575
- } else {
1576
- torch::jit::overrideCanFuseOnCPU (true );
1577
- torch::jit::overrideCanFuseOnGPU (true );
1578
- torch::jit::setTensorExprFuserEnabled (true );
1579
- torch::jit::fuser::cuda::setEnabled (false );
1580
- }
1581
- }
1582
-
1583
1529
torch::NoGradGuard no_grad;
1584
1530
1585
1531
// If input is a dictionary, prepare dictionary from 'input_tensors'.
0 commit comments