Skip to content

Commit 46bb485

Browse files
committed
docs(//cpp/ptq): Last comment cleanup
Signed-off-by: Naren Dasan <[email protected]> Signed-off-by: Naren Dasan <[email protected]>
1 parent 6facbf1 commit 46bb485

File tree

1 file changed

+14
-15
lines changed

1 file changed

+14
-15
lines changed

cpp/ptq/main.cpp

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,15 @@ int main(int argc, const char* argv[]) {
2020

2121
torch::jit::script::Module mod;
2222
try {
23-
// Deserialize the ScriptModule from a file using torch::jit::load().
23+
/// Deserialize the ScriptModule from a file using torch::jit::load().
2424
mod = torch::jit::load(argv[1]);
2525
}
2626
catch (const c10::Error& e) {
2727
std::cerr << "error loading the model\n";
2828
return -1;
2929
}
3030

31-
// Create the calibration dataset
31+
/// Create the calibration dataset
3232
const std::string data_dir = std::string(argv[2]);
3333
auto calibration_dataset = datasets::CIFAR10(data_dir, datasets::CIFAR10::Mode::kTest)
3434
.use_subset(320)
@@ -42,24 +42,23 @@ int main(int argc, const char* argv[]) {
4242
std::string calibration_cache_file = "/tmp/vgg16_TRT_ptq_calibration.cache";
4343

4444
auto calibrator = trtorch::ptq::make_int8_calibrator(std::move(calibration_dataloader), calibration_cache_file, true);
45-
//auto calibrator = trtorch::ptq::make_int8_cache_calibrator(calibration_cache_file);
4645

4746

4847
std::vector<std::vector<int64_t>> input_shape = {{32, 3, 32, 32}};
49-
// Configure settings for compilation
48+
/// Configure settings for compilation
5049
auto extra_info = trtorch::ExtraInfo({input_shape});
51-
// Set operating precision to INT8
50+
/// Set operating precision to INT8
5251
extra_info.op_precision = torch::kI8;
53-
// Use the TensorRT Entropy Calibrator
52+
/// Use the TensorRT Entropy Calibrator
5453
extra_info.ptq_calibrator = calibrator;
55-
// Set max batch size for the engine
54+
/// Set max batch size for the engine
5655
extra_info.max_batch_size = 32;
57-
// Set a larger workspace
56+
/// Set a larger workspace
5857
extra_info.workspace_size = 1 << 28;
5958

6059
mod.eval();
6160

62-
// Dataloader moved into calibrator so need another for inference
61+
/// Dataloader moved into calibrator so need another for inference
6362
auto eval_dataset = datasets::CIFAR10(data_dir, datasets::CIFAR10::Mode::kTest)
6463
.map(torch::data::transforms::Normalize<>({0.4914, 0.4822, 0.4465},
6564
{0.2023, 0.1994, 0.2010}))
@@ -68,7 +67,7 @@ int main(int argc, const char* argv[]) {
6867
.batch_size(32)
6968
.workers(2));
7069

71-
// Check the FP32 accuracy in JIT
70+
/// Check the FP32 accuracy in JIT
7271
float correct = 0.0, total = 0.0;
7372
for (auto batch : *eval_dataloader) {
7473
auto images = batch.data.to(torch::kCUDA);
@@ -82,19 +81,19 @@ int main(int argc, const char* argv[]) {
8281
}
8382
std::cout << "Accuracy of JIT model on test set: " << 100 * (correct / total) << "%" << std::endl;
8483

85-
// Compile Graph
84+
/// Compile Graph
8685
std::cout << "Compiling and quantizing module" << std::endl;
8786
auto trt_mod = trtorch::CompileGraph(mod, extra_info);
8887

89-
// Check the INT8 accuracy in TRT
88+
/// Check the INT8 accuracy in TRT
9089
correct = 0.0;
9190
total = 0.0;
9291
for (auto batch : *eval_dataloader) {
9392
auto images = batch.data.to(torch::kCUDA);
9493
auto targets = batch.target.to(torch::kCUDA);
9594

9695
if (images.sizes()[0] < 32) {
97-
// To handle smaller batches util Optimization profiles work with Int8
96+
/// To handle smaller batches util Optimization profiles work with Int8
9897
auto diff = 32 - images.sizes()[0];
9998
auto img_padding = torch::zeros({diff, 3, 32, 32}, {torch::kCUDA});
10099
auto target_padding = torch::zeros({diff}, {torch::kCUDA});
@@ -107,7 +106,7 @@ int main(int argc, const char* argv[]) {
107106
predictions = predictions.reshape(predictions.sizes()[0]);
108107

109108
if (predictions.sizes()[0] != targets.sizes()[0]) {
110-
// To handle smaller batches util Optimization profiles work with Int8
109+
/// To handle smaller batches util Optimization profiles work with Int8
111110
predictions = predictions.slice(0, 0, targets.sizes()[0]);
112111
}
113112

@@ -116,7 +115,7 @@ int main(int argc, const char* argv[]) {
116115
}
117116
std::cout << "Accuracy of quantized model on test set: " << 100 * (correct / total) << "%" << std::endl;
118117

119-
// Time execution in INT8
118+
/// Time execution in JIT-FP32 and TRT-INT8
120119
auto execution_timer = timers::PreciseCPUTimer();
121120
auto images = (*(*eval_dataloader).begin()).data.to(torch::kCUDA);
122121

0 commit comments

Comments
 (0)