Skip to content

Commit 33e76dc

Browse files
committed
scatter reduce decomposition
1 parent ca4b263 commit 33e76dc

File tree

3 files changed

+508
-1
lines changed

3 files changed

+508
-1
lines changed

py/torch_tensorrt/dynamo/conversion/impl/elementwise/ops.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from typing import Optional, Union
22

33
import numpy as np
4-
from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape
54
import tensorrt as trt
65
import torch
76
import torch_tensorrt.dynamo.conversion.impl as impl
@@ -19,6 +18,7 @@
1918
from torch_tensorrt.dynamo.conversion.impl.elementwise.base import (
2019
convert_binary_elementwise,
2120
)
21+
from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape
2222
from torch_tensorrt.dynamo.conversion.impl.unary import atan, sign
2323
from torch_tensorrt.dynamo.conversion.impl.unary.base import convert_unary
2424
from torch_tensorrt.fx.converters.converter_utils import broadcast
@@ -67,6 +67,11 @@ def trunc_div(
6767
prod_output,
6868
)
6969

70+
# cast the sign_output back to int32 for trunc div
71+
# This is required for scatter_reduce_.two(reduce='mean' where trunc_div casts it to float32 and TRTInterpreter expects int32)
72+
if (isinstance(sign_output, TRTTensor)) and (sign_output.dtype == trt.float32):
73+
sign_output = cast_trt_tensor(ctx, sign_output, trt.int32, name)
74+
7075
# Convert constant input into ITensor for UnaryOperation
7176
if not isinstance(input, trt.tensorrt.ITensor):
7277
input = get_trt_tensor(ctx, input, f"{name}_input")

py/torch_tensorrt/dynamo/lowering/_decompositions.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
from enum import Enum, auto
23
from typing import Any, Callable, Dict, List, Optional
34

45
import torch
@@ -243,6 +244,99 @@ def empty_strided_decomposition(*args, **kwargs) -> torch.Tensor:
243244
)
244245

245246

247+
# enum class for reduce operation of scatter_reduce
248+
class reduceOperation(Enum):
249+
SUM = ("Sum reduce operation", lambda x, y: torch.add(x, y))
250+
PROD = ("Product reduce operation", lambda x, y: torch.mul(x, y))
251+
MEAN = ("Mean reduce operation", lambda x, y: torch.add(x, y))
252+
AMAX = ("Amax reduce operation", lambda x, y: torch.amax(x, y))
253+
AMIN = ("Amin reduce operation", lambda x, y: torch.amin(x, y))
254+
255+
def __new__(cls, description, func):
256+
obj = object.__new__(cls)
257+
obj._value_ = auto() # Assign a unique value based on the number of members
258+
obj.description = description
259+
obj.func = func
260+
return obj
261+
262+
def reduce_operation_with_scatter(
263+
self, operation_lhs, initial_tensor, dim, index_tensor, src_tensor
264+
):
265+
scatter_tensor = None
266+
if self == reduceOperation.SUM or self == reduceOperation.MEAN:
267+
scatter_tensor = torch.zeros_like(initial_tensor)
268+
elif self == reduceOperation.PROD:
269+
scatter_tensor = torch.ones_like(initial_tensor)
270+
elif self == reduceOperation.AMIN or self == reduceOperation.AMAX:
271+
scatter_tensor = initial_tensor
272+
else:
273+
# This case would not be encountered from torch itself
274+
print("Invalid Operation for Reduce op!!")
275+
276+
operation_rhs = torch.scatter(scatter_tensor, dim, index_tensor, src_tensor)
277+
device = to_torch_device(default_device())
278+
operation_lhs = operation_lhs.to(device)
279+
operation_rhs = operation_rhs.to(device)
280+
return self.func(operation_lhs, operation_rhs)
281+
282+
283+
@register_torch_trt_decomposition(
284+
torch.ops.aten.scatter_reduce.two, registry=TORCH_TRT_DECOMPOSITIONS
285+
)
286+
def scatter_reduce_decomposition(
287+
input_tensor: torch.Tensor,
288+
dim: int,
289+
index: torch.Tensor,
290+
src_tensor: torch.Tensor,
291+
reduce: str,
292+
) -> torch.Tensor:
293+
scatter_loop_tensor = input_tensor
294+
# required for mean reduce operation
295+
scatter_count_tensor = torch.zeros_like(input_tensor)
296+
src_shape = list(src_tensor.shape)
297+
src_dim = src_shape[dim]
298+
299+
for i in range(0, src_dim):
300+
src_slice = torch.select(src_tensor, dim, i)
301+
index_slice = torch.select(index, dim, i)
302+
# unsqueeze src and index in dim
303+
src_slice = torch.unsqueeze(src_slice, dim)
304+
index_slice = torch.unsqueeze(index_slice, dim)
305+
device = to_torch_device(default_device())
306+
307+
# moving tensor to default device
308+
scatter_loop_tensor = scatter_loop_tensor.to(device)
309+
index_slice = index_slice.to(device)
310+
src_slice = src_slice.to(device)
311+
if reduce == "sum":
312+
reduceOp = reduceOperation.SUM
313+
elif reduce == "prod":
314+
reduceOp = reduceOperation.PROD
315+
elif reduce == "mean":
316+
reduceOp = reduceOperation.MEAN
317+
scatter_count_tensor = reduceOp.reduce_operation_with_scatter(
318+
scatter_count_tensor,
319+
input_tensor,
320+
dim,
321+
index_slice,
322+
torch.ones_like(src_slice),
323+
)
324+
elif reduce == "amax":
325+
reduceOp = reduceOperation.AMAX
326+
elif reduce == "amin":
327+
reduceOp = reduceOperation.AMIN
328+
scatter_loop_tensor = reduceOp.reduce_operation_with_scatter(
329+
scatter_loop_tensor, input_tensor, dim, index_slice, src_slice
330+
)
331+
if reduce == "mean":
332+
scatter_loop_tensor = torch.div(
333+
scatter_loop_tensor,
334+
torch.add(scatter_count_tensor, torch.ones_like(scatter_count_tensor)),
335+
rounding_mode="trunc",
336+
)
337+
return scatter_loop_tensor
338+
339+
246340
def get_decompositions(
247341
enable_experimental_decompositions: bool = False,
248342
) -> Dict[OpOverload, Callable[[Any], Any]]:

0 commit comments

Comments
 (0)