Skip to content

feat: add support for image grayscaling✨ #944

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ Classes
* pre_post_processing.steps.vision.ConvertImageToBGR
* pre_post_processing.steps.vision.DrawBoundingBoxes
* pre_post_processing.steps.vision.FloatToImageBytes
* pre_post_processing.steps.vision.Grayscale
* pre_post_processing.steps.vision.ImageBytesToFloat
* pre_post_processing.steps.vision.LetterBox
* pre_post_processing.steps.vision.Normalize
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,22 @@ Classes

* pre_post_processing.step.Step

`Grayscale(layout: str = 'BGR', name: Optional[str] = None)`
: Convert an image to grayscale.

Input data can be uint8 or float.

Input shape: {height, width, 3}
Output shape is the same.

Args:
layout: Optional channel layout. "BGR" and "RGB" are supported. Defaults to "BGR".
name: Optional name of step. Defaults to 'Grayscale'.

### Ancestors (in MRO)

* pre_post_processing.step.Step

`ImageBytesToFloat(rescale_factor: float = 0.00392156862745098, name: Optional[str] = None)`
: Convert uint8 or float values in range 0..255 to floating point values in range 0..1

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Documentation was generated with pdoc3 (`pip install pdoc3`).
Documentation was generated with pdoc3 (`pip install pdoc3==0.10.0`).
From the parent directory:
`python -m pdoc pdoc pre_post_processing -o ./pre_post_processing/docs --filter pre_post_processing`

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,64 @@ def _create_graph_for_step(self, graph: onnx.GraphProto, onnx_opset: int):
#
# Pre-processing
#

class Grayscale(Step):
"""Convert an image to grayscale.

Input data can be uint8 or float.

Input shape: {height, width, 3}
Output shape is the same.
"""

def __init__(self, layout: str = "BGR", name: Optional[str] = None):
"""
Args:
layout: Optional channel layout. "BGR" and "RGB" are supported. Defaults to "BGR".
name: Optional name of step. Defaults to 'Grayscale'.
"""
super().__init__(["image"], ["grayscale_image"], name)
assert layout == "RGB" or layout == "BGR"
self._layout = layout

def _create_graph_for_step(self, graph: onnx.GraphProto, onnx_opset: int) -> onnx.GraphProto:
input_type_str, input_shape_str = self._get_input_type_and_shape_strs(graph, 0)
assert input_type_str == "uint8" or input_type_str == "float"
assert len(input_shape_str.split(",")) == 3

# do ITU-R 601-2 luma transform. Weights adapted from:
# see: https://docs.opencv.org/3.4/de/d25/imgproc_color_conversions.html
cm_str = "0.114,0.587,0.299" if self._layout == "BGR" else "0.299,0.587,0.114"

input_name = self.input_names[0]
output_name = self.output_names[0]

grayscaling_graph = onnx.parser.parse_graph(
f"""
grayscale ({input_type_str}[height, width, 3] {input_name})
=> (uint8[height, width, 3] {output_name})
{{
axes = Constant <value = int64[1] {{2}}>()

# create a tensor with shape (1, 1, 3) for tiling along the channel dimension
repeat_dims = Constant <value = int64[3] {{1, 1, 3}}>()

const_node_b = Constant <value = float[3] {{ {cm_str} }}>()

# cast to float (some ops like Sum require floats)
X_float = Cast <to={onnx.TensorProto.FLOAT}> ({input_name})
X_mult_b = Mul(const_node_b, X_float)
X_channel_gray = ReduceSum(X_mult_b, axes)

X_gray_3_channel = Tile (X_channel_gray, repeat_dims)
X_gray_3_rounded = Round(X_gray_3_channel)
{output_name} = Cast <to={onnx.TensorProto.UINT8}> (X_gray_3_rounded)
}}
"""
)
return grayscaling_graph


class Resize(Step):
"""
Resize input data. Aspect ratio is maintained.
Expand Down
47 changes: 47 additions & 0 deletions test/test_tools_add_pre_post_processing_to_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,53 @@ def test_qatask_with_tokenizer(self):

self.assertEqual(result[0][0], ref_output[0][0])

def _create_pipeline_and_run_for_grayscale(self, output_model: Path, layout: str = "RGB"):
import onnx

graph_def = onnx.parser.parse_graph("""\
identity (uint8[h,w,c] image_in)
=> (uint8[h,w,c] image_out)
{
image_out = Identity(image_in)
}
""")

onnx_opset = 16

onnx_import = onnx.helper.make_operatorsetid("", onnx_opset)
ir_version = onnx.helper.find_min_ir_version_for([onnx_import])
input_model = onnx.helper.make_model_gen_version(graph_def, opset_imports=[onnx_import], ir_version=ir_version)

create_named_value = pre_post_processing.utils.create_named_value
inputs = [
create_named_value("image", onnx.TensorProto.UINT8, ["height", "width", 3]),
]
pipeline = PrePostProcessor(inputs, onnx_opset)
pipeline.add_pre_processing([Grayscale(layout=layout)])

new_model = pipeline.run(input_model)
onnx.save_model(new_model, output_model)

def test_grayscale_step_rgb(self):
output_model = (self.temp4onnx / "identity.onnx").resolve()
self._create_pipeline_and_run_for_grayscale(output_model, layout="RGB")
image = Image.open(Path(test_data_dir) / "../pineapple.jpg").convert("RGB")

ort_sess = ort.InferenceSession(str(output_model), providers=["CPUExecutionProvider"])
grayscaled_image = ort_sess.run(
None,
{"image": np.asarray(image)},
)[0]

# all channel values are the same
self.assertEqual((np.max(grayscaled_image, axis=-1) == np.min(grayscaled_image, axis=-1)).all(), True)
# assert onnxruntime-extensions gray scaling matches Pillows gray scaling
np.testing.assert_allclose(
np.repeat(np.array(image.convert("L"))[:, :, np.newaxis], 3, axis=2),
grayscaled_image,
atol=1,
)

# Corner Case
def test_debug_step(self):
import onnx
Expand Down