Skip to content

Commit 48e2e29

Browse files
mc-nvtanmayv25
andauthored
Update main branch post-23.12 release (#121)
* Fix library list (#118) Remove nvfuser header Co-authored-by: Misha Chornyi <[email protected]> * Remove nvfuser implementation (#119) --------- Co-authored-by: Tanmay Verma <[email protected]>
1 parent 304c2e8 commit 48e2e29

File tree

4 files changed

+5
-84
lines changed

4 files changed

+5
-84
lines changed

CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,6 @@ set(PT_LIBS
138138
"libtorch_cuda.so"
139139
"libtorch_cuda_linalg.so"
140140
"libtorch_global_deps.so"
141-
"libnvfuser_codegen.so"
142141
)
143142

144143
if (${TRITON_PYTORCH_ENABLE_TORCHVISION})
@@ -214,7 +213,6 @@ if (${TRITON_PYTORCH_DOCKER_BUILD})
214213
COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libtorch_cpu.so libtorch_cpu.so
215214
COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libtorch_cuda.so libtorch_cuda.so
216215
COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libtorch_cuda_linalg.so libtorch_cuda_linalg.so
217-
COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libnvfuser_codegen.so libnvfuser_codegen.so
218216
COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libtorch_global_deps.so libtorch_global_deps.so
219217
COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/python3.10/dist-packages/torch/lib/libcaffe2_nvrtc.so libcaffe2_nvrtc.so
220218
COMMAND docker cp pytorch_backend_ptlib:/usr/local/lib/libtorchvision.so libtorchvision.so

README.md

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -144,26 +144,6 @@ key: "INFERENCE_MODE"
144144
}
145145
```
146146

147-
* `ENABLE_NVFUSER`: Boolean flag to enable the NvFuser (CUDA Graph
148-
Fuser) optimization for TorchScript models. If not specified, the
149-
default PyTorch fuser is used. If `ENABLE_NVFUSER` is specified, the
150-
`ENABLE_TENSOR_FUSER` configuration (see below) is ignored.
151-
152-
Please note that in some models generated using trace in old PyTorch versions might not work
153-
correctly with NvFuser. We recommend using scripting and a recent version of PyTorch
154-
to generate these models.
155-
156-
The section of model config file specifying this parameter will look like:
157-
158-
```
159-
parameters: {
160-
key: "ENABLE_NVFUSER"
161-
value: {
162-
string_value: "true"
163-
}
164-
}
165-
```
166-
167147
* `ENABLE_WEIGHT_SHARING`: Boolean flag to enable model instances on the same device to
168148
share weights. This optimization should not be used with stateful models. If not specified,
169149
weight sharing is disabled.
@@ -204,8 +184,6 @@ complex execution modes and dynamic shapes. If not specified, all are enabled by
204184

205185
`ENABLE_JIT_PROFILING`
206186

207-
`ENABLE_TENSOR_FUSER`
208-
209187
### Support
210188

211189
#### Model Instance Group Kind

src/libtorch.cc

Lines changed: 5 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,6 @@ class ModelState : public BackendModel {
9898
return enable_jit_executor_pair_;
9999
}
100100
bool EnabledInferenceMode() { return enable_inference_mode_; }
101-
const std::pair<bool, bool>& EnabledNvfuserPair() const
102-
{
103-
return enable_nvfuser_pair_;
104-
}
105101
bool EnabledCacheCleaning() { return enable_cache_cleaning_; }
106102

107103
bool EnabledWeightSharing() { return enable_weight_sharing_; }
@@ -132,16 +128,11 @@ class ModelState : public BackendModel {
132128

133129
// Flag pairs to indicate if various JIT settings are set and
134130
// enabled respectively. Defaults to (false, true). Default behavior
135-
// is to do nothing if not explicitly set. Tensor fuser flag is
136-
// ignore if nvfuser is explicitly set.
131+
// is to do nothing if not explicitly set.
137132
std::pair<bool, bool> enable_tensor_fuser_pair_;
138133
std::pair<bool, bool> enable_jit_profiling_pair_;
139134
std::pair<bool, bool> enable_jit_executor_pair_;
140135

141-
// Flag pair to indicate whether nvfuser is set and enabled respectively.
142-
// Defaults to (false, false).
143-
std::pair<bool, bool> enable_nvfuser_pair_;
144-
145136
// Model mapping for shared TorchScript model across all instances on the
146137
// same device. The key is a pair of isGPU and device index.
147138
std::map<
@@ -233,8 +224,7 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
233224
enable_inference_mode_(true), enable_cache_cleaning_(false),
234225
enable_weight_sharing_(false), enable_tensor_fuser_pair_({false, true}),
235226
enable_jit_profiling_pair_({false, true}),
236-
enable_jit_executor_pair_({false, true}),
237-
enable_nvfuser_pair_({false, false})
227+
enable_jit_executor_pair_({false, true})
238228
{
239229
}
240230

@@ -475,29 +465,6 @@ ModelState::ParseParameters()
475465
" for model instance '" + Name() + "'")
476466
.c_str());
477467
}
478-
479-
// If 'ENABLE_NVFUSER' is not present in 'parameters' then no
480-
// update is made to 'enable_nvfuser'.
481-
bool enable_nvfuser = false;
482-
err = ParseParameter(params, "ENABLE_NVFUSER", &enable_nvfuser);
483-
if (err != nullptr) {
484-
if (TRITONSERVER_ErrorCode(err) != TRITONSERVER_ERROR_NOT_FOUND) {
485-
return err;
486-
} else {
487-
LOG_MESSAGE(
488-
TRITONSERVER_LOG_INFO, (std::string("NvFuser is not specified") +
489-
" for model instance '" + Name() + "'")
490-
.c_str());
491-
TRITONSERVER_ErrorDelete(err);
492-
}
493-
} else {
494-
enable_nvfuser_pair_ = {true, enable_nvfuser};
495-
LOG_MESSAGE(
496-
TRITONSERVER_LOG_INFO, (std::string("NvFuser is ") +
497-
(enable_nvfuser ? "enabled" : "disabled") +
498-
" for model instance '" + Name() + "'")
499-
.c_str());
500-
}
501468
}
502469

503470
return nullptr;
@@ -1552,34 +1519,13 @@ ModelInstanceState::Execute(
15521519
std::get<1>(model_state_->EnabledJitExecutor());
15531520
}
15541521

1555-
// Fuser. Parameter is ignored if NVFuser parameter is explicitly
1556-
// set (either enabled or disabled). No change is made unless
1557-
// fuser is explicitly set in parameters.
1558-
if (!std::get<0>(model_state_->EnabledNvfuserPair()) &&
1559-
std::get<0>(model_state_->EnabledTensorExprFuser())) {
1522+
// Fuser. No change is made unless fuser is explicitly set in
1523+
// parameters.
1524+
if (std::get<0>(model_state_->EnabledTensorExprFuser())) {
15601525
torch::jit::setTensorExprFuserEnabled(
15611526
std::get<1>(model_state_->EnabledTensorExprFuser()));
15621527
}
15631528

1564-
// NV-Fuser. No change is made unless parameter is explicitly set.
1565-
if (std::get<0>(model_state_->EnabledNvfuserPair())) {
1566-
bool is_device_gpu =
1567-
(device_.is_cuda() ||
1568-
((Kind() == TRITONSERVER_INSTANCEGROUPKIND_MODEL) &&
1569-
(device_cnt_ > 0)));
1570-
if (std::get<1>(model_state_->EnabledNvfuserPair()) && is_device_gpu) {
1571-
torch::jit::overrideCanFuseOnCPU(false);
1572-
torch::jit::overrideCanFuseOnGPU(false);
1573-
torch::jit::setTensorExprFuserEnabled(false);
1574-
torch::jit::fuser::cuda::setEnabled(true);
1575-
} else {
1576-
torch::jit::overrideCanFuseOnCPU(true);
1577-
torch::jit::overrideCanFuseOnGPU(true);
1578-
torch::jit::setTensorExprFuserEnabled(true);
1579-
torch::jit::fuser::cuda::setEnabled(false);
1580-
}
1581-
}
1582-
15831529
torch::NoGradGuard no_grad;
15841530

15851531
// If input is a dictionary, prepare dictionary from 'input_tensors'.

src/libtorch_utils.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
#pragma warning(push, 0)
3636
#include <torch/csrc/jit/codegen/cuda/interface.h>
3737
#include <torch/csrc/jit/codegen/fuser/interface.h>
38-
#include <torch/csrc/jit/passes/cuda_graph_fuser.h>
3938
#include <torch/csrc/jit/passes/tensorexpr_fuser.h>
4039
#include <torch/csrc/jit/runtime/graph_executor.h>
4140
#include <torch/script.h> // One-stop header for TorchScript

0 commit comments

Comments
 (0)