pytorch
diff --git a/‎.circleci/config.yml
Lines changed: 2 additions & 2 deletions b/‎.circleci/config.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.pre-commit-config.yaml
Lines changed: 2 additions & 0 deletions b/‎.pre-commit-config.yaml
Lines changed: 2 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 1 addition & 0 deletions b/‎README.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎WORKSPACE
Lines changed: 10 additions & 10 deletions b/‎WORKSPACE
Lines changed: 10 additions & 10 deletions
diff --git a/‎core/conversion/converters/converter_util.cpp
Lines changed: 22 additions & 5 deletions b/‎core/conversion/converters/converter_util.cpp
Lines changed: 22 additions & 5 deletions
diff --git a/‎core/conversion/converters/converter_util.h
Lines changed: 9 additions & 3 deletions b/‎core/conversion/converters/converter_util.h
Lines changed: 9 additions & 3 deletions
diff --git a/‎core/conversion/converters/impl/cast.cpp
Lines changed: 4 additions & 4 deletions b/‎core/conversion/converters/impl/cast.cpp
Lines changed: 4 additions & 4 deletions
diff --git a/‎core/conversion/converters/impl/element_wise.cpp
Lines changed: 2 additions & 1 deletion b/‎core/conversion/converters/impl/element_wise.cpp
Lines changed: 2 additions & 1 deletion
diff --git a/‎core/conversion/converters/impl/reduce.cpp
Lines changed: 15 additions & 0 deletions b/‎core/conversion/converters/impl/reduce.cpp
Lines changed: 15 additions & 0 deletions
diff --git a/‎core/conversion/converters/impl/topk.cpp
Lines changed: 16 additions & 2 deletions b/‎core/conversion/converters/impl/topk.cpp
Lines changed: 16 additions & 2 deletions
diff --git a/‎cpp/CMakeLists.txt
Lines changed: 3 additions & 3 deletions b/‎cpp/CMakeLists.txt
Lines changed: 3 additions & 3 deletions
@@ -428,7 +428,7 @@ commands:
       - run:
           name: Test torch
           command: |
-            python3 -c "import torch; print(torch.cuda.is_available()); print(torch.cuda.device_count())"
+            python3 -c "import torch; print(torch.cuda.is_available()); print(torch.cuda.device_count()); print(torch.__version__)"
 
       - run:
           name: Get torch-tensorrt version information
@@ -988,7 +988,7 @@ parameters:
   # Nightly platform config
   torch-build:
     type: string
-    default: "1.13.0"
+    default: "1.14.0.dev20221114+cu117"
   torch-build-index:
     type: string
     default: "https://download.pytorch.org/whl/nightly/cu117"
 
@@ -6,6 +6,8 @@ repos:
       - id: check-yaml
       - id: trailing-whitespace
       - id: check-added-large-files
+        args:
+          - --maxkb=1000
       - id: check-vcs-permalinks
       - id: check-merge-conflict
       - id: mixed-line-ending
 
@@ -1,6 +1,7 @@
 # Torch-TensorRT
 
 [![Documentation](https://img.shields.io/badge/docs-master-brightgreen)](https://nvidia.github.io/Torch-TensorRT/)
+[![CircleCI](https://circleci.com/gh/pytorch/TensorRT.svg?style=svg)](https://app.circleci.com/pipelines/github/pytorch/TensorRT)
 
 > Ahead of Time (AOT) compiling for PyTorch JIT and FX
 
 
@@ -56,17 +56,17 @@ new_local_repository(
 http_archive(
     name = "libtorch",
     build_file = "@//third_party/libtorch:BUILD",
-    sha256 = "0a013dceedb252f4965b666a2ad772d962135597db5889bd5d43644697c17dbc",
+    sha256 = "b565c662435fd58ec295fa0791388ea52ad0f5fd33517b2d7c0fdcc91b6db531",
     strip_prefix = "libtorch",
-    urls = ["https://download.pytorch.org/libtorch/cu117/libtorch-cxx11-abi-shared-with-deps-1.13.0%2Bcu117.zip"],
+    urls = ["https://download.pytorch.org/libtorch/nightly/cu116/libtorch-cxx11-abi-shared-with-deps-1.14.0.dev20221114%2Bcu117.zip"],
 )
 
 http_archive(
     name = "libtorch_pre_cxx11_abi",
     build_file = "@//third_party/libtorch:BUILD",
-    sha256 = "cdbd43985ad9d5886793d5dc455d665cf3fd4b4617ef1094479678ff210ed0af",
+    sha256 = "fbb37446c33b05c1e26256c09f6ffb46cea1f6ff9ee2ad5b79b146d09023b0c1",
     strip_prefix = "libtorch",
-    urls = ["https://download.pytorch.org/libtorch/cu117/libtorch-shared-with-deps-1.13.0%2Bcu117.zip"],
+    urls = ["https://download.pytorch.org/libtorch/nightly/cu116/libtorch-shared-with-deps-1.14.0.dev20221114%2Bcu117.zip"],
 )
 
 # Download these tarballs manually from the NVIDIA website
@@ -76,20 +76,20 @@ http_archive(
 http_archive(
     name = "cudnn",
     build_file = "@//third_party/cudnn/archive:BUILD",
-    sha256 = "ec96d2376d81fca42bdd3d4c3d705a99b29a065bab57f920561c763e29c67d01",
-    strip_prefix = "cudnn-linux-x86_64-8.4.1.50_cuda11.6-archive",
+    sha256 = "5454a6fd94f008728caae9adad993c4e85ef36302e26bce43bea7d458a5e7b6d",
+    strip_prefix = "cudnn-linux-x86_64-8.5.0.96_cuda11-archive",
     urls = [
-        "https://developer.nvidia.com/compute/cudnn/secure/8.4.1/local_installers/11.6/cudnn-linux-x86_64-8.4.1.50_cuda11.6-archive.tar.xz",
+        "https://developer.nvidia.com/compute/cudnn/secure/8.5.0/local_installers/11.7/cudnn-linux-x86_64-8.5.0.96_cuda11-archive.tar.xz",
     ],
 )
 
 http_archive(
     name = "tensorrt",
     build_file = "@//third_party/tensorrt/archive:BUILD",
-    sha256 = "8d7c2085c1639dcc73875048c23598a8526ce3089136876e31d90258e49e4f61",
-    strip_prefix = "TensorRT-8.4.3.1",
+    sha256 = "39cc7f077057d1363794e8ff51c4cf21a5dbeccf1116b0020ba0dae0f3063076",
+    strip_prefix = "TensorRT-8.5.1.7",
     urls = [
-        "https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.4.3/tars/tensorrt-8.4.3.1.linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz",
+        "https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.5.1/tars/TensorRT-8.5.1.7.Linux.x86_64-gnu.cuda-11.8.cudnn8.6.tar.gz",
     ],
 )
 
 
@@ -13,7 +13,8 @@ nvinfer1::ITensor* addPadding(
     nvinfer1::ITensor* tensor,
     int nDim,
     bool trailing,
-    bool use_zeros) {
+    bool use_zeros,
+    const std::string& name) {
   const auto dims = tensor->getDimensions();
 
   if (dims.nbDims < nDim) {
@@ -27,7 +28,11 @@ nvinfer1::ITensor* addPadding(
     TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer");
     shuffle_layer->setReshapeDimensions(newDims);
     shuffle_layer->setZeroIsPlaceholder(use_zeros);
-    shuffle_layer->setName((util::node_info(n) + " [Reshape to " + util::toStr(newDims) + ']').c_str());
+    if (name.size()) {
+      shuffle_layer->setName(name.c_str());
+    } else {
+      shuffle_layer->setName((util::node_info(n) + " [Reshape to " + util::toStr(newDims) + ']').c_str());
+    }
     return shuffle_layer->getOutput(0);
   } else {
     return tensor;
@@ -40,7 +45,8 @@ nvinfer1::ITensor* addUnpadding(
     nvinfer1::ITensor* tensor,
     int nDim,
     bool trailing,
-    bool use_zeros) {
+    bool use_zeros,
+    const std::string& name) {
   const auto dims = tensor->getDimensions();
   if (dims.nbDims > nDim) {
     auto newDims = dims;
@@ -52,7 +58,11 @@ nvinfer1::ITensor* addUnpadding(
     TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer");
     shuffle_layer->setReshapeDimensions(newDims);
     shuffle_layer->setZeroIsPlaceholder(use_zeros);
-    shuffle_layer->setName((util::node_info(n) + " [Reshape to " + util::toStr(newDims) + "]").c_str());
+    if (name.size()) {
+      shuffle_layer->setName(name.c_str());
+    } else {
+      shuffle_layer->setName((util::node_info(n) + " [Reshape to " + util::toStr(newDims) + ']').c_str());
+    }
     return shuffle_layer->getOutput(0);
   } else {
     return tensor;
@@ -195,7 +205,11 @@ nvinfer1::ITensor* applyIdentityOp(ConversionCtx* ctx, nvinfer1::ITensor* tensor
   return id_out_tensor;
 }
 
-nvinfer1::ITensor* castITensor(ConversionCtx* ctx, nvinfer1::ITensor* tensor, nvinfer1::DataType dtype) {
+nvinfer1::ITensor* castITensor(
+    ConversionCtx* ctx,
+    nvinfer1::ITensor* tensor,
+    nvinfer1::DataType dtype,
+    const std::string& layer_name_prefix) {
   if (tensor->getType() != dtype) {
     std::ostringstream tensor_id;
     tensor_id << reinterpret_cast<int*>(tensor);
@@ -209,6 +223,9 @@ nvinfer1::ITensor* castITensor(ConversionCtx* ctx, nvinfer1::ITensor* tensor, nv
     LOG_DEBUG(ctx->logger, "Casting ITensor " << tensor_id.str() << " from " << tensor->getType() << " to " << dtype);
 
     std::stringstream ss;
+    if (layer_name_prefix.size()) {
+      ss << layer_name_prefix << " ";
+    }
     ss << "[Cast ITensor " << tensor_id.str() << " from " << tensor->getType() << " to " << dtype << "]";
     id_layer->setName(ss.str().c_str());
     return casted_tensor;
 
@@ -22,7 +22,8 @@ nvinfer1::ITensor* addPadding(
     nvinfer1::ITensor* tensor,
     int nDim,
     bool trailing = true,
-    bool use_zeros = true);
+    bool use_zeros = true,
+    const std::string& name = "");
 
 // If nDim < tensor size, adds shuffle layer to un-pad tensor (at the end if trailing) and returns (nDim-dimensional)
 // shuffle layer's output Otherwise, does nothing and passes tensor through. use _zeros controls whether we should be
@@ -33,7 +34,8 @@ nvinfer1::ITensor* addUnpadding(
     nvinfer1::ITensor* tensor,
     int nDim,
     bool trailing = true,
-    bool use_zeros = true);
+    bool use_zeros = true,
+    const std::string& name = "");
 
 // TODO: Change add_elementwise schema to output nvinfer1::ITensor* instead of nvinfer1::ILayer*,
 // for consistency with other utils. Need to change schema and usage in all calling contexts
@@ -54,7 +56,11 @@ nvinfer1::ITensor* add_abs(
 nvinfer1::ITensor* applyIdentityOp(ConversionCtx* ctx, nvinfer1::ITensor* tensor, const std::string& name);
 
 // If an ITensor is of a type not dtype, add an Identity layer to cast it to dtype
-nvinfer1::ITensor* castITensor(ConversionCtx* ctx, nvinfer1::ITensor* tensor, nvinfer1::DataType dtype);
+nvinfer1::ITensor* castITensor(
+    ConversionCtx* ctx,
+    nvinfer1::ITensor* tensor,
+    nvinfer1::DataType dtype,
+    const std::string& layer_name_prefix = "");
 
 // Freeze an at::Tensor in a IConstant layer
 nvinfer1::ITensor* tensor_to_const(ConversionCtx* ctx, at::Tensor t, const std::string& name = std::string());
 
@@ -26,7 +26,7 @@ auto cast_registrations TORCHTRT_UNUSED =
                } else {
                  trt_dtype = util::ScalarTypeToTRTDataType(static_cast<at::ScalarType>(output_dtype));
                }
-               auto casted_itensor = castITensor(ctx, self, trt_dtype);
+               auto casted_itensor = castITensor(ctx, self, trt_dtype, util::node_info(n));
                auto output = ctx->AssociateValueAndTensor(n->outputs()[0], casted_itensor);
                LOG_DEBUG("[aten::to.dtype] Output tensor shape: " << output->getDimensions());
 
@@ -48,7 +48,7 @@ auto cast_registrations TORCHTRT_UNUSED =
                } else {
                  trt_dtype = util::ScalarTypeToTRTDataType(static_cast<at::ScalarType>(output_dtype));
                }
-               auto casted_itensor = castITensor(ctx, self, trt_dtype);
+               auto casted_itensor = castITensor(ctx, self, trt_dtype, util::node_info(n));
                auto output = ctx->AssociateValueAndTensor(n->outputs()[0], casted_itensor);
                LOG_DEBUG("[aten::to.device] Output tensor shape: " << output->getDimensions());
 
@@ -59,7 +59,7 @@ auto cast_registrations TORCHTRT_UNUSED =
              [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
                auto self = args[0].ITensorOrFreeze(ctx);
                nvinfer1::DataType other_dtype = args[1].ITensorOrFreeze(ctx)->getType();
-               auto casted_itensor = castITensor(ctx, self, other_dtype);
+               auto casted_itensor = castITensor(ctx, self, other_dtype, util::node_info(n));
                auto output = ctx->AssociateValueAndTensor(n->outputs()[0], casted_itensor);
                LOG_DEBUG("[aten::to.other] Output tensor shape: " << output->getDimensions());
 
@@ -77,7 +77,7 @@ auto cast_registrations TORCHTRT_UNUSED =
 
                auto output_dtype = args[2].unwrapToScalar().to<int64_t>();
                auto trt_dtype = util::ScalarTypeToTRTDataType(static_cast<at::ScalarType>(output_dtype));
-               auto casted_itensor = castITensor(ctx, self, trt_dtype);
+               auto casted_itensor = castITensor(ctx, self, trt_dtype, util::node_info(n));
                auto output = ctx->AssociateValueAndTensor(n->outputs()[0], casted_itensor);
                LOG_DEBUG("[aten::to.prim_Device] Output tensor shape: " << output->getDimensions());
 
 
@@ -325,7 +325,8 @@ auto element_wise_registrations TORCHTRT_UNUSED =
                      add_elementwise(ctx, nvinfer1::ElementWiseOperation::kFLOOR_DIV, self, other, util::node_info(n));
                } else if (rounding_mode == "trunc") {
                  // trunc = floor(abs(div)) * sign(div)
-                 auto tmp_div = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kDIV, self, other, "tmp_div");
+                 auto tmp_div = add_elementwise(
+                     ctx, nvinfer1::ElementWiseOperation::kDIV, self, other, util::node_info(n) + "_tmp_div");
                  auto abs = add_abs(ctx, n, tmp_div->getOutput(0), util::node_info(n) + "_absolute_val");
 
                  // In this case, we allow the floor unary on non-TRT Unary types, as it is needed for this
 
@@ -72,6 +72,13 @@ auto reduce_registrations TORCHTRT_UNUSED =
                auto in_dims = util::toVec(in_tensor->getDimensions());
                LOG_WARNING("Sum Converter disregards dtype");
 
+               if (in_tensor->getType() == nvinfer1::DataType::kBOOL) {
+                 LOG_DEBUG(
+                     "Found type  " << in_tensor->getType() << " in aten::sum, casting to "
+                                    << nvinfer1::DataType::kINT32 << " for compatibility.");
+                 in_tensor = castITensor(ctx, in_tensor, nvinfer1::DataType::kINT32);
+               }
+
                uint32_t axis_mask = (uint32_t)(((uint64_t)1 << in_dims.size()) - 1);
 
                auto sum_layer = ctx->net->addReduce(*in_tensor, nvinfer1::ReduceOperation::kSUM, axis_mask, false);
@@ -113,6 +120,14 @@ auto reduce_registrations TORCHTRT_UNUSED =
                LOG_DEBUG("Keep dims: " << keepdim);
 
                LOG_WARNING("Sum converter disregards dtype");
+
+               if (in_tensor->getType() == nvinfer1::DataType::kBOOL) {
+                 LOG_DEBUG(
+                     "Found type  " << in_tensor->getType() << " in aten::sum, casting to "
+                                    << nvinfer1::DataType::kINT32 << " for compatibility.");
+                 in_tensor = castITensor(ctx, in_tensor, nvinfer1::DataType::kINT32);
+               }
+
                auto sum_layer = ctx->net->addReduce(*in_tensor, nvinfer1::ReduceOperation::kSUM, axis_mask, keepdim);
 
                TORCHTRT_CHECK(sum_layer, "Unable to create sum layer from node: " << *n);
 
@@ -39,15 +39,29 @@ auto topk_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns().patte
 
        LOG_DEBUG("Output topk reduce dim: " << dim);
 
+       // The topk layer requires at least 2 input dimensions
+       auto nbDims = self->getDimensions().nbDims;
+       if (nbDims == 1) {
+         self = addPadding(ctx, n, self, 2, true, true);
+       }
+
        auto TopKOperation = largest ? (nvinfer1::TopKOperation::kMAX) : (nvinfer1::TopKOperation::kMIN);
 
        auto new_layer = ctx->net->addTopK(*self, TopKOperation, k, shiftDim);
 
        TORCHTRT_CHECK(new_layer, "Unable to create topk layer from node: " << *n);
 
-       auto out0 = ctx->AssociateValueAndTensor(n->outputs()[0], new_layer->getOutput(0));
-       auto out1 = ctx->AssociateValueAndTensor(n->outputs()[1], new_layer->getOutput(1));
+       auto values = new_layer->getOutput(0);
+       auto indices = new_layer->getOutput(1);
+
+       // If we expanded the input, squeeze the outputs
+       if (nbDims == 1) {
+         values = addUnpadding(ctx, n, values, 1, true, true, util::node_info(n) + "_squeeze_values");
+         indices = addUnpadding(ctx, n, indices, 1, true, true, util::node_info(n) + "_squeeze_indices");
+       }
 
+       auto out0 = ctx->AssociateValueAndTensor(n->outputs()[0], values);
+       auto out1 = ctx->AssociateValueAndTensor(n->outputs()[1], indices);
        LOG_DEBUG("Output tensor(0) shape: " << out0->getDimensions());
        LOG_DEBUG("Output tensor(1) shape: " << out1->getDimensions());
 
 
@@ -24,7 +24,7 @@ target_sources(${lib_name}
 
 target_link_libraries(${lib_name}
     PUBLIC
-        torch
+        "${TORCH_LIBRARIES}"
         TensorRT::nvinfer
         core
 )
@@ -71,7 +71,7 @@ target_sources(${torchtrt_lib_name}
 target_link_libraries(${torchtrt_lib_name}
     PUBLIC
         TensorRT::TensorRT
-        torch
+        "${TORCH_LIBRARIES}"
     PRIVATE
         torch_tensorrt
         core
@@ -111,7 +111,7 @@ target_sources(${runtime_lib_name}
 target_link_libraries(${runtime_lib_name}
     PUBLIC
         TensorRT::TensorRT
-        torch
+        "${TORCH_LIBRARIES}"
     PRIVATE
         core_runtime
         core_plugins